diff --git a/docker/zisk/Dockerfile.cluster b/docker/zisk/Dockerfile.cluster index db57f0a2..9fb20f6b 100644 --- a/docker/zisk/Dockerfile.cluster +++ b/docker/zisk/Dockerfile.cluster @@ -57,8 +57,11 @@ ARG CUDA # Default to build for RTX 50 series ARG CUDA_ARCH=sm_120 -# Build binaries -RUN cargo build --release ${CUDA:+--features gpu} +# Strip the sm_ prefix and export as CUDA_ARCHS (plural, numeric) which is the +# env var proofman-starks-lib-c/build.rs reads to generate nvcc -gencode flags. +# See note in docker/zisk/Dockerfile.server for full rationale. +RUN CUDA_ARCHS="${CUDA_ARCH#sm_}" \ + cargo build --release ${CUDA:+--features gpu} FROM $RUNTIME_IMAGE AS runtime FROM $RUNTIME_CUDA_IMAGE AS runtime_cuda diff --git a/docker/zisk/Dockerfile.server b/docker/zisk/Dockerfile.server index aceff040..d314431f 100644 --- a/docker/zisk/Dockerfile.server +++ b/docker/zisk/Dockerfile.server @@ -17,7 +17,14 @@ ARG RUSTFLAGS # Default to build for RTX 50 series ARG CUDA_ARCH=sm_120 -RUN cargo build --release --package ere-server --bin ere-server --features zisk${CUDA:+,cuda} \ +# Strip the sm_ prefix and export as CUDA_ARCHS (plural, numeric) which is the +# env var proofman-starks-lib-c/build.rs reads to generate nvcc -gencode flags. +# Without this export, CUDA_ARCH is declared-but-unused and the build silently +# falls back to the committed sm_120 default in pil2-stark's CudaArch.mk, +# producing an image that fails on any non-sm_120 GPU with CUDA error 209 +# (no kernel image is available for execution on the device). +RUN CUDA_ARCHS="${CUDA_ARCH#sm_}" \ + cargo build --release --package ere-server --bin ere-server --features zisk${CUDA:+,cuda} \ && mkdir bin && mv target/release/ere-server bin/ere-server \ && cargo clean && rm -rf $CARGO_HOME/registry/