diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 93a1fd3..75aabae 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -12,9 +12,11 @@ name: bench # nothing the normal CI suite doesn't already cover for correctness. # # How to use it: trigger on demand (Actions tab → "bench" → Run workflow, or -# `gh workflow run bench.yml`). For trustworthy numbers, run it on real, -# single-tenant deployment hardware rather than a shared CI runner. It is NOT -# a required status check on `main`. +# `gh workflow run bench.yml`). It runs on a Depot single-tenant runner +# (depot-ubuntu-24.04-4: 4 vCPU / 16 GB, no noisy neighbours), so the numbers +# are comparable run-to-run — unlike GitHub's shared pool, where per-VM noise +# of ±30–56% produced false regressions. It is still NOT a required status +# check on `main`. on: workflow_dispatch: @@ -24,11 +26,14 @@ permissions: jobs: bench: name: criterion + regression gate (manual) - runs-on: ubuntu-24.04 + runs-on: depot-ubuntu-24.04-4 timeout-minutes: 30 env: CARGO_TERM_COLOR: always RUST_BACKTRACE: 1 + # Bench temp DBs on tmpfs so disk variance can't pollute timings + # (TempDir::new() honours TMPDIR). + TMPDIR: /dev/shm # Portable x86-64-v2 baseline (SSE3/SSSE3/SSE4.1/SSE4.2/POPCNT) so a # cached binary can't SIGILL across the shared pool's CPU generations. RUSTFLAGS: "-C target-cpu=x86-64-v2" diff --git a/CLAUDE.md b/CLAUDE.md index f1699d4..d4334b9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -96,4 +96,4 @@ If the planner emits a different shape for the same logical operation, the fast Two workflow files: - `.github/workflows/ci.yml` — clippy + fmt + test (+ ASan, miri, fuzz, cargo audit, MSRV, examples). **Required status checks on `main`.** -- `.github/workflows/bench.yml` — criterion microbenchmark suite. **Manual-only (`workflow_dispatch`), NOT a required gate.** It's slow and noise-dominated on shared runners, and `powdb-bench` only depends on `powdb-storage`+`powdb-query`, so it gates nothing the normal suite doesn't already cover. Run it on demand (`gh workflow run bench.yml`), ideally on real single-tenant deployment hardware, when you want a perf check. +- `.github/workflows/bench.yml` — criterion microbenchmark suite. **Manual-only (`workflow_dispatch`), NOT a required gate.** Runs on a Depot single-tenant runner (`depot-ubuntu-24.04-4`, tmpfs temp DBs), so numbers are comparable run-to-run; `baseline/main.json` must only ever be rebaselined from a Depot run of this workflow, never from a laptop. `powdb-bench` only depends on `powdb-storage`+`powdb-query`, so it gates nothing the normal suite doesn't already cover. Run it on demand: `gh workflow run bench.yml`.