updates the mempool fallback option to be clearer and have testing ho… #23
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| # GPU jobs run on a local workstation self-hosted runner. | |
| # To avoid burning allocation on every PR — and to protect against | |
| # untrusted fork PRs executing arbitrary code on the runner — GPU CI | |
| # only fires on: | |
| # 1. A push to main (i.e., after a PR is merged) | |
| # 2. A manual trigger from the Actions tab | |
| # | |
| # All PRs still get the free compile-only check from build-check.yml. | |
| # External contributor PRs also require explicit approval before any | |
| # workflow runs (configured in Settings → Actions → General). | |
| on: | |
| push: | |
| branches: [main] | |
| workflow_dispatch: | |
| # Only one GPU run at a time — cancels queued runs if a newer push arrives | |
| concurrency: | |
| group: ci-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| # ── Build + full test suite ────────────────────────────────────────────────── | |
| build-and-test: | |
| name: Build & Test (GPU) | |
| # Requires a self-hosted runner with CUDA, GPU, and the label 'gpu'. | |
| # Register one at: Settings → Actions → Runners → New self-hosted runner | |
| runs-on: [self-hosted, gpu, linux] | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Load CUDA module (no Spack) | |
| shell: bash | |
| run: | | |
| export PATH=$(echo "$PATH" | tr ':' '\n' | grep -v '/spack/' | paste -sd:) | |
| source /etc/profile.d/lmod.sh | |
| module purge | |
| module load nvhpc/23.11/nvhpc-hpcx-cuda12 | |
| echo "PATH=$PATH" | |
| which nvcc | |
| nvcc --version | |
| nvidia-smi | |
| - name: Show CUDA + driver info | |
| shell: bash | |
| run: | | |
| echo "=== PATH ===" | |
| echo "$PATH" | |
| echo "=== nvcc ===" | |
| which nvcc || true | |
| nvcc --version || true | |
| echo "=== nvidia-smi ===" | |
| nvidia-smi || true | |
| - name: Configure (Release) | |
| run: | | |
| source /etc/profile.d/lmod.sh | |
| module purge | |
| module load nvhpc/23.11/nvhpc-hpcx-cuda12 | |
| cmake -S . -B build/ci \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DBUILD_TESTING=ON \ | |
| -DBUILD_EXAMPLES=ON \ | |
| -DCMAKE_CUDA_ARCHITECTURES=native \ | |
| -DCMAKE_INSTALL_PREFIX=./build/ci/install | |
| - name: Build | |
| run: cmake --build build/ci --parallel | |
| - name: Show linked CUDA runtime | |
| shell: bash | |
| run: | | |
| ldd build/ci/tests/test_cli | grep -E 'cudart|cuda' || true | |
| - name: Run tests | |
| working-directory: build/ci | |
| run: ctest --output-on-failure | |
| - name: Upload test results | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: test-results | |
| path: build/ci/Testing/ | |
| # ── ASan + UBSan ──────────────────────────────────────────────────────────── | |
| sanitizers: | |
| name: Sanitizers (ASan + UBSan) | |
| runs-on: [self-hosted, gpu, linux] | |
| # Only run after a clean build+test passes — saves time on broken PRs | |
| needs: build-and-test | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Configure (ASan preset) | |
| run: cmake --preset asan | |
| - name: Build | |
| run: cmake --build --preset asan --parallel | |
| - name: Run tests (ASan) | |
| run: | | |
| # ASan is baked into the shared libraries. The dynamic linker does not | |
| # guarantee it initializes before main(), so we must preload it first. | |
| # Without this, every test fails with: | |
| # "ASan runtime does not come first in initial library list" | |
| ASAN_LIB=$(gcc -print-file-name=libasan.so) | |
| echo "Preloading: $ASAN_LIB" | |
| # CUDA memory pools conflict with ASan's huge shadow memory mapping (OOM error). | |
| # protect_shadow_gap=0 fixes this on Linux. | |
| export ASAN_OPTIONS="protect_shadow_gap=0" | |
| # Ignore known memory leaks in the NVIDIA driver | |
| echo -e "leak:libcuda\nleak:libnv" > lsan.supp | |
| export LSAN_OPTIONS="suppressions=$(pwd)/lsan.supp" | |
| LD_PRELOAD="$ASAN_LIB" ctest --preset asan --output-on-failure |