From e66db6ec7e39d4a0e0cca40dae08e25d9ce14540 Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Mon, 18 May 2026 10:43:34 -0700
Subject: [PATCH 01/14] run-sweep: gate full-sweep PRs behind a sequential
 canary

When a PR carries `full-sweep-enabled` (and not `evals-only`), pick the
lowest-conc single-node benchmark entry as a canary and run it before
fanning out the full sweep. If the canary fails, the eight fan-out jobs
are skipped to save cluster time on shared failures (bad image tag,
removed CLI flag, etc.).

Design choices:
- Canary candidacy is restricted to single_node['1k1k' | '8k1k'] and
  excludes entries with run-eval: true, so the canary is always a pure
  benchmark smoke test using the existing single-node template.
- The canary entry is removed from the regular fan-out's matrix (via
  remaining-search-space-config) only when the canary actually succeeded.
  On canary skip / cancel / canary-select failure, the regular fan-out
  falls back to the full search-space-config so coverage is preserved.
- The fan-out gate blocks only on `canary-sweep.result == 'failure'` --
  every other state (success, skipped, cancelled) proceeds, so a bug in
  the canary mechanism never blocks the rest of the sweep.
- Non-full-sweep PRs, draft PRs, pushes to main, and the reuse path all
  behave identically to before via existing gates.

The aggregated results_bmk artifact picks up both the canary's row and
the regular fan-out's rows via the existing bmk_* glob -- each entry
appears exactly once.
---
 .github/workflows/run-sweep.yml | 170 ++++++++++++++++++++++++++++----
 1 file changed, 152 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index c5ece9804..2b08a4a60 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -135,9 +135,88 @@ jobs:
                     --ref "${{ github.ref }}" \
                     --workflow-id "run-sweep.yml"
 
-    sweep-multi-node-1k1k:
+    canary-select:
         needs: setup
-        if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null' }}
+        if: >-
+            needs.setup.outputs.reuse-enabled != 'true' &&
+            github.event_name == 'pull_request' &&
+            contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') &&
+            !contains(github.event.pull_request.labels.*.name, 'evals-only')
+        runs-on: ubuntu-latest
+        outputs:
+            canary-config: ${{ steps.pick.outputs.canary-config }}
+            remaining-search-space-config: ${{ steps.pick.outputs.remaining-search-space-config }}
+        steps:
+            - id: pick
+              env:
+                  SEARCH_SPACE: ${{ needs.setup.outputs.search-space-config }}
+              run: |
+                  selection=$(jq -c '
+                    def remove_one($needle):
+                      if $needle == null then .
+                      else
+                        (index($needle)) as $idx
+                        | if $idx == null then . else del(.[$idx]) end
+                      end;
+
+                    # Canary is a benchmark-only smoke test — exclude entries
+                    # whose primary purpose is eval (run-eval == true) so the
+                    # picked canary never runs an eval pass.
+                    (((.single_node["1k1k"] // []) + (.single_node["8k1k"] // []))
+                      | map(select(.["run-eval"] != true))) as $candidates
+                    | (if ($candidates | length) == 0 then null else ($candidates | min_by(.conc)) end) as $canary
+                    | {
+                        canary: (if $canary == null then [] else [$canary] end),
+                        remaining: (
+                          .
+                          | .single_node = (.single_node // {})
+                          | .single_node["1k1k"] = ((.single_node["1k1k"] // []) | remove_one($canary))
+                          | .single_node["8k1k"] = ((.single_node["8k1k"] // []) | remove_one($canary))
+                        )
+                      }
+                  ' <<<"$SEARCH_SPACE")
+                  echo "canary-config=$(jq -c '.canary' <<<"$selection")" >> "$GITHUB_OUTPUT"
+                  echo "remaining-search-space-config=$(jq -c '.remaining' <<<"$selection")" >> "$GITHUB_OUTPUT"
+
+    canary-sweep:
+        needs: canary-select
+        if: ${{ needs.canary-select.outputs.canary-config != '' && needs.canary-select.outputs.canary-config != '[]' }}
+        uses: ./.github/workflows/benchmark-tmpl.yml
+        name: canary /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.canary-select.outputs.canary-config) }}
+        secrets: inherit
+        with:
+            exp-name: ${{ matrix.config.exp-name }}
+            isl: ${{ matrix.config.isl }}
+            osl: ${{ matrix.config.osl }}
+            max-model-len: ${{ matrix.config.max-model-len }}
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            tp: ${{ matrix.config.tp }}
+            ep: ${{ matrix.config.ep }}
+            dp-attn: ${{ matrix.config.dp-attn }}
+            conc: ${{ matrix.config.conc }}
+            spec-decoding: ${{ matrix.config.spec-decoding }}
+            disagg: ${{ matrix.config.disagg }}
+            run-eval: false
+
+    sweep-multi-node-1k1k:
+        needs: [setup, canary-select, canary-sweep]
+        if: >-
+            ${{
+              always() &&
+              needs.setup.result == 'success' &&
+              needs.setup.outputs.reuse-enabled != 'true' &&
+              needs.canary-sweep.result != 'failure' &&
+              toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null'
+            }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node 1k1k /
         strategy:
@@ -174,8 +253,15 @@ jobs:
             run-eval: false
 
     sweep-multi-node-8k1k:
-        needs: setup
-        if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null' }}
+        needs: [setup, canary-select, canary-sweep]
+        if: >-
+            ${{
+              always() &&
+              needs.setup.result == 'success' &&
+              needs.setup.outputs.reuse-enabled != 'true' &&
+              needs.canary-sweep.result != 'failure' &&
+              toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null'
+            }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node 8k1k /
         strategy:
@@ -186,14 +272,22 @@ jobs:
         with: *multi-node-inputs
 
     sweep-single-node-1k1k:
-        needs: setup
-        if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' }}
+        needs: [setup, canary-select, canary-sweep]
+        if: >-
+            ${{
+              always() &&
+              needs.setup.result == 'success' &&
+              needs.setup.outputs.reuse-enabled != 'true' &&
+              needs.canary-sweep.result != 'failure' &&
+              toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' &&
+              toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k']) != '[]'
+            }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: single-node 1k1k /
         strategy:
             fail-fast: false
             matrix:
-                config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}
+                config: ${{ fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k'] }}
         secrets: inherit
         with: &single-node-inputs
             exp-name: ${{ matrix.config.exp-name }}
@@ -215,20 +309,35 @@ jobs:
             run-eval: ${{ matrix.config.run-eval }}
 
     sweep-single-node-8k1k:
-        needs: setup
-        if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' }}
+        needs: [setup, canary-select, canary-sweep]
+        if: >-
+            ${{
+              always() &&
+              needs.setup.result == 'success' &&
+              needs.setup.outputs.reuse-enabled != 'true' &&
+              needs.canary-sweep.result != 'failure' &&
+              toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' &&
+              toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k']) != '[]'
+            }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: single-node 8k1k /
         strategy:
             fail-fast: false
             matrix:
-                config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['8k1k'] }}
+                config: ${{ fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k'] }}
         secrets: inherit
         with: *single-node-inputs
 
     sweep-agentic:
-        needs: setup
-        if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['agentic']) != 'null' }}
+        needs: [setup, canary-select, canary-sweep]
+        if: >-
+            ${{
+              always() &&
+              needs.setup.result == 'success' &&
+              needs.setup.outputs.reuse-enabled != 'true' &&
+              needs.canary-sweep.result != 'failure' &&
+              toJson(fromJson(needs.setup.outputs.search-space-config).single_node['agentic']) != 'null'
+            }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: agentic /
         strategy:
@@ -259,8 +368,15 @@ jobs:
             scenario-type: agentic-coding
 
     sweep-multi-node-agentic:
-        needs: setup
-        if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['agentic']) != 'null' }}
+        needs: [setup, canary-select, canary-sweep]
+        if: >-
+            ${{
+              always() &&
+              needs.setup.result == 'success' &&
+              needs.setup.outputs.reuse-enabled != 'true' &&
+              needs.canary-sweep.result != 'failure' &&
+              toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['agentic']) != 'null'
+            }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node agentic /
         strategy:
@@ -298,8 +414,16 @@ jobs:
             scenario-type: agentic-coding
 
     sweep-evals:
-        needs: setup
-        if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).evals) != '[]' && toJson(fromJson(needs.setup.outputs.search-space-config).evals) != 'null' }}
+        needs: [setup, canary-select, canary-sweep]
+        if: >-
+            ${{
+              always() &&
+              needs.setup.result == 'success' &&
+              needs.setup.outputs.reuse-enabled != 'true' &&
+              needs.canary-sweep.result != 'failure' &&
+              toJson(fromJson(needs.setup.outputs.search-space-config).evals) != '[]' &&
+              toJson(fromJson(needs.setup.outputs.search-space-config).evals) != 'null'
+            }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: eval /
         strategy:
@@ -328,8 +452,16 @@ jobs:
             eval-only: true
 
     sweep-multi-node-evals:
-        needs: setup
-        if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' && toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null' }}
+        needs: [setup, canary-select, canary-sweep]
+        if: >-
+            ${{
+              always() &&
+              needs.setup.result == 'success' &&
+              needs.setup.outputs.reuse-enabled != 'true' &&
+              needs.canary-sweep.result != 'failure' &&
+              toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' &&
+              toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null'
+            }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node eval /
         strategy:
@@ -368,6 +500,7 @@ jobs:
     collect-results:
         needs:
             [
+                canary-sweep,
                 sweep-single-node-1k1k,
                 sweep-single-node-8k1k,
                 sweep-agentic,
@@ -381,6 +514,7 @@ jobs:
               always() &&
               needs.setup.result == 'success' &&
               (
+                needs.canary-sweep.result == 'success' ||
                 needs.sweep-single-node-1k1k.result != 'skipped' ||
                 needs.sweep-single-node-8k1k.result != 'skipped' ||
                 needs.sweep-multi-node-1k1k.result != 'skipped' ||

From 81bfc26a34d427f53b2ff34364eb489163899f79 Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Tue, 26 May 2026 11:20:20 -0700
Subject: [PATCH 02/14] run-sweep: tighten canary gate to success/skipped only
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously the fan-out gate was `needs.canary-sweep.result != 'failure'`,
which let `cancelled` (and any unknown future result) fall through. A
cancelled canary then ran the FULL fan-out matrix — including the canary's
own entry — without canary validation, the worst-case outcome.

Replace with an explicit allowlist:
  (result == 'success' || result == 'skipped')

- success: canary passed → fan-out runs with deduped (remaining) matrix
- skipped: no canary candidate (multi-node-only / evals-only) → fan-out
  runs with full matrix (no canary to dedup against)
- failure / cancelled / anything else: fan-out blocked

The matrix-ternary `result == 'success' && remaining-search-space-config
|| full-search-space-config` already had the right shape and is untouched.

Applies to all 8 fan-out jobs (single/multi-node 1k1k+8k1k, agentic,
multi-node-agentic, evals, multi-node-evals).
---
 .github/workflows/run-sweep.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index 2b08a4a60..774cff4d3 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -214,7 +214,7 @@ jobs:
               always() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
-              needs.canary-sweep.result != 'failure' &&
+              (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
               toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null'
             }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
@@ -259,7 +259,7 @@ jobs:
               always() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
-              needs.canary-sweep.result != 'failure' &&
+              (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
               toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null'
             }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
@@ -278,7 +278,7 @@ jobs:
               always() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
-              needs.canary-sweep.result != 'failure' &&
+              (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
               toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' &&
               toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k']) != '[]'
             }}
@@ -315,7 +315,7 @@ jobs:
               always() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
-              needs.canary-sweep.result != 'failure' &&
+              (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
               toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' &&
               toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k']) != '[]'
             }}
@@ -335,7 +335,7 @@ jobs:
               always() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
-              needs.canary-sweep.result != 'failure' &&
+              (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
               toJson(fromJson(needs.setup.outputs.search-space-config).single_node['agentic']) != 'null'
             }}
         uses: ./.github/workflows/benchmark-tmpl.yml
@@ -374,7 +374,7 @@ jobs:
               always() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
-              needs.canary-sweep.result != 'failure' &&
+              (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
               toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['agentic']) != 'null'
             }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
@@ -420,7 +420,7 @@ jobs:
               always() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
-              needs.canary-sweep.result != 'failure' &&
+              (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
               toJson(fromJson(needs.setup.outputs.search-space-config).evals) != '[]' &&
               toJson(fromJson(needs.setup.outputs.search-space-config).evals) != 'null'
             }}
@@ -458,7 +458,7 @@ jobs:
               always() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
-              needs.canary-sweep.result != 'failure' &&
+              (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
               toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' &&
               toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null'
             }}

From afb408bc249a7bf6decd33bce997d6cb76c7cb39 Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Tue, 26 May 2026 11:30:05 -0700
Subject: [PATCH 03/14] run-sweep: add non-canary-full-sweep-enabled label
 (full sweep, no canary)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New label that triggers the full sweep without running the canary gate.
Acts as an escape hatch when:
- The canary is known to be flaky / unreliable for this config
- The user wants the full sweep without the canary delay
- The canary's chosen entry is not a representative smoke test

Behavior matrix:
  sweep-enabled                  - trims to max(conc) per parallelism, with canary
  full-sweep-enabled             - full intermediate conc sweep, with canary
  non-canary-full-sweep-enabled  - full intermediate conc sweep, NO canary

How it works (no changes to existing label semantics):
- Setup gate triggers on any of the 3 labels (was: 2)
- canary-select gate still requires `full-sweep-enabled` (exact array match,
  so `non-canary-full-sweep-enabled` does NOT match) → canary skips → all
  fan-out jobs run on full search space via the `== 'skipped'` allowlist
- TRIM_CONC env is unchanged — only `sweep-enabled` enables trim, so the
  new label correctly behaves as "full sweep"
- The reject-conflicting-labels step is now a 3-way exclusion: at most
  one of {sweep-enabled, full-sweep-enabled, non-canary-full-sweep-enabled}
- The same gate updates apply to the comment-visualizer job
- Concurrency-group filter excludes the new label too so toggling it
  uses the same `'active'` group key as the other sweep labels
---
 .github/workflows/run-sweep.yml | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index 774cff4d3..9d5233779 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -8,6 +8,7 @@ concurrency:
           (github.event.action == 'labeled' || github.event.action == 'unlabeled') &&
           github.event.label.name != 'sweep-enabled' &&
           github.event.label.name != 'full-sweep-enabled' &&
+          github.event.label.name != 'non-canary-full-sweep-enabled' &&
           github.run_id ||
           'active'
         }}
@@ -39,7 +40,8 @@ jobs:
             (
               (github.event.action != 'labeled' && github.event.action != 'unlabeled') ||
               github.event.label.name == 'sweep-enabled' ||
-              github.event.label.name == 'full-sweep-enabled'
+              github.event.label.name == 'full-sweep-enabled' ||
+              github.event.label.name == 'non-canary-full-sweep-enabled'
             )
         steps:
             - name: Checkout code
@@ -61,12 +63,14 @@ jobs:
               !github.event.pull_request.draft &&
               (
                 contains(github.event.pull_request.labels.*.name, 'sweep-enabled') ||
-                contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')
+                contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') ||
+                contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')
               ) &&
               (
                 (github.event.action != 'labeled' && github.event.action != 'unlabeled') ||
                 github.event.label.name == 'sweep-enabled' ||
-                github.event.label.name == 'full-sweep-enabled'
+                github.event.label.name == 'full-sweep-enabled' ||
+                github.event.label.name == 'non-canary-full-sweep-enabled'
               )
             ) ||
             (
@@ -85,10 +89,13 @@ jobs:
             - name: Reject conflicting sweep labels
               if: >-
                   github.event_name == 'pull_request' &&
-                  contains(github.event.pull_request.labels.*.name, 'sweep-enabled') &&
-                  contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')
+                  (
+                    (contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')) ||
+                    (contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')) ||
+                    (contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled'))
+                  )
               run: |
-                  echo "::error::PR has both 'sweep-enabled' and 'full-sweep-enabled' labels. Remove one — 'full-sweep-enabled' runs the full intermediate concurrency sweep; 'sweep-enabled' trims to max(conc) per parallelism config."
+                  echo "::error::PR has multiple conflicting sweep labels. Pick exactly one of: 'sweep-enabled' (trims to max(conc) per parallelism config), 'full-sweep-enabled' (full intermediate concurrency sweep, with canary gate), or 'non-canary-full-sweep-enabled' (full sweep, no canary gate)."
                   exit 1
 
             - name: Checkout code
@@ -726,12 +733,14 @@ jobs:
             !github.event.pull_request.draft &&
             (
               contains(github.event.pull_request.labels.*.name, 'sweep-enabled') ||
-              contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')
+              contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') ||
+              contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')
             ) &&
             (
               (github.event.action != 'labeled' && github.event.action != 'unlabeled') ||
               github.event.label.name == 'sweep-enabled' ||
-              github.event.label.name == 'full-sweep-enabled'
+              github.event.label.name == 'full-sweep-enabled' ||
+              github.event.label.name == 'non-canary-full-sweep-enabled'
             )
         runs-on: ubuntu-latest
         permissions:

From 80f6171a57f640f02ce5ea4685a2ab29fc64737c Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Wed, 27 May 2026 09:13:24 -0700
Subject: [PATCH 04/14] TEMP T6: trigger sweep for P1 cancellation test

---
 perf-changelog.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 614b6104e..44239501b 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3137,3 +3137,9 @@
     - "Set vLLM serving knobs in benchmarks/single_node/minimaxm2.5_fp8_h200.sh: generated benchmark max-model-len, previous eval max-model-len handling, fp8 KV cache, FlashInfer attention/autotune, Triton MoE, and MiniMax QK norm fusion"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1354
 
+- config-keys:
+    - gptoss-fp4-h100-vllm
+  description:
+    - "TEMP T6 cancellation test for PR #1503 — DO NOT MERGE — verifies always() defeats workflow cancellation"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1503
+

From 7d2317403c4e9f882668c0d8276f3ae4408f5d6c Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Wed, 27 May 2026 09:25:58 -0700
Subject: [PATCH 05/14] fix: resolve perf-changelog.yaml merge conflict markers

The merge of main into sweep-canary-gate (e49c5bf1) committed unresolved
conflict markers in perf-changelog.yaml, breaking process_changelog.py
parsing and failing the Run Sweep setup job.

Keep all three entries: the AMD/MI355X DSv4 image bump (#1568), the
power-aggregation validation entry (#1558), and the TEMP T6 entry used
to trigger this PR's own canary test.
---
 perf-changelog.yaml | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index cbf9bfdb0..62c2ea75f 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3147,13 +3147,6 @@
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1354
 
 - config-keys:
-<<<<<<< sweep-canary-gate
-    - gptoss-fp4-h100-vllm
-  description:
-    - "TEMP T6 cancellation test for PR #1503 — DO NOT MERGE — verifies always() defeats workflow cancellation"
-  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1503
-
-=======
     - dsv4-fp4-mi355x-sglang
   description:
     - "Bump image to rocm/sgl-dev:rocm720-mi35x-f96ac98-20260526-DSv4"
@@ -3166,4 +3159,9 @@
   description:
     - "Validates measured-power aggregation pipeline (PR #1558) on both NVIDIA (H200) and AMD (MI355X) hardware — different SMI tools (nvidia-smi vs amd-smi), different CSV schemas (power.draw [W] vs socket_power), same aggregator. No config change. Entry intentionally kept past merge so run-sweep produces canonical agg JSONs with avg_power_w + joules_per_output_token on main for both vendors, seeding the dashboard's day-zero data."
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1558
->>>>>>> main
+
+- config-keys:
+    - gptoss-fp4-h100-vllm
+  description:
+    - "TEMP T6 cancellation test for PR #1503 — DO NOT MERGE — verifies always() defeats workflow cancellation"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1503

From d51965fe9433ee41c51826f5275a76ce5319505e Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Wed, 27 May 2026 09:26:06 -0700
Subject: [PATCH 06/14] run-sweep: drop redundant evals-only PR-label check
 from canary-select
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

evals-only is a per-entry perf-changelog field (validation.py:484:
evals_only: bool = Field(alias="evals-only", default=False)), not a PR
label. The canary-select if-clause's
!contains(github.event.pull_request.labels.*.name, 'evals-only') was a
mis-design — it treated evals-only as a PR-level gate.

The PR-label check is also redundant. process_changelog.py routes
evals-only changelog entries through --evals-only in
generate_sweep_configs.py, which leaves single_node['1k1k'] and
single_node['8k1k'] empty (covered by test_evals_only_no_benchmarks).
The canary-select jq filter walks exactly those arrays, so for an
evals-only-only changelog it emits canary-config=[] and canary-sweep
skips via its existing guard. No PR-label inspection needed.

The previous T4 test (evals-only PR label + full-sweep-enabled) is
obsolete and will be removed from the PR body.
---
 .github/workflows/run-sweep.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index 9d5233779..8912ff625 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -147,8 +147,7 @@ jobs:
         if: >-
             needs.setup.outputs.reuse-enabled != 'true' &&
             github.event_name == 'pull_request' &&
-            contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') &&
-            !contains(github.event.pull_request.labels.*.name, 'evals-only')
+            contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')
         runs-on: ubuntu-latest
         outputs:
             canary-config: ${{ steps.pick.outputs.canary-config }}

From 11ab3aec36dd2bcfd5419c372dbeda689e9f54f4 Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Wed, 27 May 2026 09:40:34 -0700
Subject: [PATCH 07/14] run-sweep: !cancelled() so fan-out honors workflow
 cancellation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The eight sweep-* fan-out jobs gated their if: clause on
`always() && needs.setup.result == 'success' && ...`. `always()`
returns true regardless of workflow cancellation, so any matrix entry
whose runner was already assigned would launch and run to completion
even after `gh run cancel` — and queued matrix entries kept picking up
runners after the cancel signal.

Empirically reproduced on run 26524225403: 12 queued entries all
picked up runners after cancel and finished with conclusion=success;
zero entries were marked cancelled.

Switch the eight gates to `!cancelled() && ...`. Per the GitHub Actions
expression reference, `!cancelled()` evaluates false once the workflow
is cancelled (by user, by concurrency, or by upstream), which is what
we want here — fan-out should not start (and should not continue) once
the user has cancelled.

collect-results (line 520) and the housekeeping jobs at 537/607/626/
664/697/730 retain `always()` on purpose so partial-result aggregation
and cleanup still run on cancel; only the 8 expensive fan-out matrices
are switched.
---
 .github/workflows/run-sweep.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index 8912ff625..62b8ff191 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -217,7 +217,7 @@ jobs:
         needs: [setup, canary-select, canary-sweep]
         if: >-
             ${{
-              always() &&
+              !cancelled() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
               (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
@@ -262,7 +262,7 @@ jobs:
         needs: [setup, canary-select, canary-sweep]
         if: >-
             ${{
-              always() &&
+              !cancelled() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
               (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
@@ -281,7 +281,7 @@ jobs:
         needs: [setup, canary-select, canary-sweep]
         if: >-
             ${{
-              always() &&
+              !cancelled() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
               (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
@@ -318,7 +318,7 @@ jobs:
         needs: [setup, canary-select, canary-sweep]
         if: >-
             ${{
-              always() &&
+              !cancelled() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
               (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
@@ -338,7 +338,7 @@ jobs:
         needs: [setup, canary-select, canary-sweep]
         if: >-
             ${{
-              always() &&
+              !cancelled() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
               (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
@@ -377,7 +377,7 @@ jobs:
         needs: [setup, canary-select, canary-sweep]
         if: >-
             ${{
-              always() &&
+              !cancelled() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
               (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
@@ -423,7 +423,7 @@ jobs:
         needs: [setup, canary-select, canary-sweep]
         if: >-
             ${{
-              always() &&
+              !cancelled() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
               (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
@@ -461,7 +461,7 @@ jobs:
         needs: [setup, canary-select, canary-sweep]
         if: >-
             ${{
-              always() &&
+              !cancelled() &&
               needs.setup.result == 'success' &&
               needs.setup.outputs.reuse-enabled != 'true' &&
               (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&

From e166509d4352da5329fb3b418912f0716c0b9646 Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Wed, 27 May 2026 09:40:48 -0700
Subject: [PATCH 08/14] reuse: accept non-canary-full-sweep-enabled label
 alongside full-sweep-enabled

The reuse path's two hard label checks only accepted full-sweep-enabled,
so a PR that landed using the new non-canary-full-sweep-enabled escape
hatch could not subsequently approve /reuse-sweep-run on main or be
merged via utils/merge_with_reuse.sh. Both labels produce a complete
full-sweep matrix in run-sweep.yml (the only difference is whether the
canary smoke test runs first), so both should be reusable.

Changes:
- find_reusable_sweep_run.py --full-sweep-label now accepts a comma-
  separated list (mirroring --allowed-author-associations in the same
  file). Default extended to
  "full-sweep-enabled,non-canary-full-sweep-enabled"; reuse passes if
  the PR carries any one.
- merge_with_reuse.sh preflight now accepts either label and updates
  the error message accordingly.
- Two new pytest cases:
  - PR with only non-canary-full-sweep-enabled is accepted.
  - PR with neither label fails with both names in the error message.
---
 utils/find_reusable_sweep_run.py      |  18 +++-
 utils/merge_with_reuse.sh             |   9 +-
 utils/test_find_reusable_sweep_run.py | 130 ++++++++++++++++++++++++++
 3 files changed, 151 insertions(+), 6 deletions(-)

diff --git a/utils/find_reusable_sweep_run.py b/utils/find_reusable_sweep_run.py
index 8af018a8e..3f814d2e5 100644
--- a/utils/find_reusable_sweep_run.py
+++ b/utils/find_reusable_sweep_run.py
@@ -276,7 +276,11 @@ def main() -> int:
     parser.add_argument("--event-name", required=True)
     parser.add_argument("--ref", required=True)
     parser.add_argument("--workflow-id", default="run-sweep.yml")
-    parser.add_argument("--full-sweep-label", default="full-sweep-enabled")
+    parser.add_argument(
+        "--full-sweep-label",
+        default="full-sweep-enabled,non-canary-full-sweep-enabled",
+        help="Comma-separated PR labels treated as 'full sweep'; reuse requires at least one.",
+    )
     parser.add_argument("--pinned-run-command", default="/reuse-sweep-run")
     parser.add_argument(
         "--allowed-author-associations",
@@ -355,10 +359,16 @@ def main() -> int:
 
     pr = github_api(args.repo, f"/pulls/{pr_number}", token)
     labels = label_names(pr)
-    if args.full_sweep_label not in labels:
+    accepted_full_sweep_labels = {
+        value.strip()
+        for value in args.full_sweep_label.split(",")
+        if value.strip()
+    }
+    if not accepted_full_sweep_labels.intersection(labels):
+        accepted = ", ".join(sorted(accepted_full_sweep_labels))
         raise RuntimeError(
-            f"PR #{pr_number} has {args.pinned_run_command} authorization but not "
-            f"{args.full_sweep_label}."
+            f"PR #{pr_number} has {args.pinned_run_command} authorization but is "
+            f"missing any of: {accepted}."
         )
     if not pr.get("merged_at"):
         raise RuntimeError(f"PR #{pr_number} is not marked as merged.")
diff --git a/utils/merge_with_reuse.sh b/utils/merge_with_reuse.sh
index 9336b81c2..a94ea0f69 100755
--- a/utils/merge_with_reuse.sh
+++ b/utils/merge_with_reuse.sh
@@ -38,8 +38,13 @@ PR_STATE="$(jq -r '.state' <<<"$PR_INFO")"
 [ "$PR_STATE" = "OPEN" ] || die "PR #${PR} is ${PR_STATE}, expected OPEN"
 
 HEAD_BRANCH="$(jq -r '.headRefName' <<<"$PR_INFO")"
-HAS_FULL_SWEEP="$(jq -r '[.labels[].name] | index("full-sweep-enabled") // ""' <<<"$PR_INFO")"
-[ -n "$HAS_FULL_SWEEP" ] || die "PR #${PR} is missing the 'full-sweep-enabled' label"
+HAS_FULL_SWEEP="$(jq -r '
+    [.labels[].name] as $names
+    | if (($names | index("full-sweep-enabled")) != null)
+         or (($names | index("non-canary-full-sweep-enabled")) != null)
+      then "1" else "" end
+' <<<"$PR_INFO")"
+[ -n "$HAS_FULL_SWEEP" ] || die "PR #${PR} is missing 'full-sweep-enabled' or 'non-canary-full-sweep-enabled' label"
 
 # Warn early if no successful run exists on any current PR commit.
 PR_SHAS="$(gh api "repos/${REPO}/pulls/${PR}/commits" --paginate --jq '.[].sha')"
diff --git a/utils/test_find_reusable_sweep_run.py b/utils/test_find_reusable_sweep_run.py
index e779bbe8b..0f8a09ac7 100644
--- a/utils/test_find_reusable_sweep_run.py
+++ b/utils/test_find_reusable_sweep_run.py
@@ -455,3 +455,133 @@ def fake_paginated_github_api(repo, path, token, item_key, params=None):
     assert outputs["reuse-enabled"] == "false"
     assert outputs["reuse-source-pr-number"] == "1321"
     assert outputs["reuse-reason"] == "PR #1321 has no /reuse-sweep-run authorization"
+
+
+def test_main_accepts_non_canary_full_sweep_label(monkeypatch, tmp_path) -> None:
+    comments = [
+        {
+            "created_at": "2026-05-13T00:00:00Z",
+            "author_association": "OWNER",
+            "body": "/reuse-sweep-run 25763404168",
+        },
+    ]
+    run = {
+        "id": 25763404168,
+        "event": "pull_request",
+        "status": "completed",
+        "conclusion": "success",
+        "path": ".github/workflows/run-sweep.yml",
+        "pull_requests": [{"number": 1321}],
+        "run_attempt": 1,
+        "html_url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/25763404168",
+        "head_sha": "abc123",
+    }
+
+    def fake_github_api(repo, path, token, params=None):
+        if path == "/commits/merge-sha/pulls":
+            return [{"number": 1321}]
+        if path == "/pulls/1321":
+            return {
+                "merged_at": "2026-05-13T00:01:00Z",
+                "labels": [{"name": "non-canary-full-sweep-enabled"}],
+                "head": {"sha": "abc123"},
+            }
+        if path == "/actions/runs/25763404168":
+            return run
+        raise AssertionError(f"unexpected GitHub API path: {path}")
+
+    def fake_paginated_github_api(repo, path, token, item_key, params=None):
+        if path == "/issues/1321/comments":
+            return comments
+        if path == "/pulls/1321/commits":
+            return [{"sha": "abc123"}]
+        if path == "/actions/runs/25763404168/artifacts":
+            return [{"name": "results_bmk"}]
+        raise AssertionError(f"unexpected paginated GitHub API path: {path}")
+
+    output_path = tmp_path / "outputs"
+    monkeypatch.setenv("GITHUB_TOKEN", "token")
+    monkeypatch.setattr(reuse, "github_api", fake_github_api)
+    monkeypatch.setattr(reuse, "paginated_github_api", fake_paginated_github_api)
+    monkeypatch.setattr(
+        reuse.sys,
+        "argv",
+        [
+            "find_reusable_sweep_run.py",
+            "--repo",
+            "SemiAnalysisAI/InferenceX",
+            "--commit-sha",
+            "merge-sha",
+            "--event-name",
+            "push",
+            "--ref",
+            "refs/heads/main",
+            "--github-output",
+            str(output_path),
+        ],
+    )
+
+    assert reuse.main() == 0
+
+    outputs = dict(line.split("=", 1) for line in output_path.read_text().splitlines())
+    assert outputs["reuse-enabled"] == "true"
+
+
+def test_main_rejects_pr_with_neither_full_sweep_label(monkeypatch, tmp_path) -> None:
+    comments = [
+        {
+            "created_at": "2026-05-13T00:00:00Z",
+            "author_association": "OWNER",
+            "body": "/reuse-sweep-run 25763404168",
+        },
+    ]
+
+    def fake_github_api(repo, path, token, params=None):
+        if path == "/commits/merge-sha/pulls":
+            return [{"number": 1321}]
+        if path == "/pulls/1321":
+            return {
+                "merged_at": "2026-05-13T00:01:00Z",
+                "labels": [{"name": "sweep-enabled"}],
+                "head": {"sha": "abc123"},
+            }
+        raise AssertionError(f"unexpected GitHub API path: {path}")
+
+    def fake_paginated_github_api(repo, path, token, item_key, params=None):
+        if path == "/issues/1321/comments":
+            return comments
+        raise AssertionError(f"unexpected paginated GitHub API path: {path}")
+
+    output_path = tmp_path / "outputs"
+    monkeypatch.setenv("GITHUB_TOKEN", "token")
+    monkeypatch.setattr(reuse, "github_api", fake_github_api)
+    monkeypatch.setattr(reuse, "paginated_github_api", fake_paginated_github_api)
+    monkeypatch.setattr(
+        reuse.sys,
+        "argv",
+        [
+            "find_reusable_sweep_run.py",
+            "--repo",
+            "SemiAnalysisAI/InferenceX",
+            "--commit-sha",
+            "merge-sha",
+            "--event-name",
+            "push",
+            "--ref",
+            "refs/heads/main",
+            "--github-output",
+            str(output_path),
+        ],
+    )
+
+    try:
+        reuse.main()
+    except RuntimeError as error:
+        msg = str(error)
+        assert "full-sweep-enabled" in msg
+        assert "non-canary-full-sweep-enabled" in msg
+    else:
+        raise AssertionError(
+            "expected RuntimeError when PR has neither full-sweep-enabled nor "
+            "non-canary-full-sweep-enabled label"
+        )

From 398395256ccc843094fd55cb6a4fdb309dd7f17d Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Wed, 27 May 2026 09:55:42 -0700
Subject: [PATCH 09/14] perf-changelog: drop TEMP T6 cancellation-test entry

Used by T6/T7 (cancellation-regression repro and !cancelled() fix
verification, runs 26524225403 and 26525033406). Both tests are
complete and recorded in the PR description; the entry is no longer
needed.
---
 perf-changelog.yaml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 62c2ea75f..ad37e0c27 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3159,9 +3159,3 @@
   description:
     - "Validates measured-power aggregation pipeline (PR #1558) on both NVIDIA (H200) and AMD (MI355X) hardware — different SMI tools (nvidia-smi vs amd-smi), different CSV schemas (power.draw [W] vs socket_power), same aggregator. No config change. Entry intentionally kept past merge so run-sweep produces canonical agg JSONs with avg_power_w + joules_per_output_token on main for both vendors, seeding the dashboard's day-zero data."
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1558
-
-- config-keys:
-    - gptoss-fp4-h100-vllm
-  description:
-    - "TEMP T6 cancellation test for PR #1503 — DO NOT MERGE — verifies always() defeats workflow cancellation"
-  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1503

From 13272b920a3154470a60a7cc5c8147a2647d5cd0 Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Wed, 27 May 2026 11:17:03 -0700
Subject: [PATCH 10/14] TEMP T8 redo: trigger Run Sweep for skipped-canary
 fan-out test

---
 perf-changelog.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index ad37e0c27..6199b55d3 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3159,3 +3159,9 @@
   description:
     - "Validates measured-power aggregation pipeline (PR #1558) on both NVIDIA (H200) and AMD (MI355X) hardware — different SMI tools (nvidia-smi vs amd-smi), different CSV schemas (power.draw [W] vs socket_power), same aggregator. No config change. Entry intentionally kept past merge so run-sweep produces canonical agg JSONs with avg_power_w + joules_per_output_token on main for both vendors, seeding the dashboard's day-zero data."
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1558
+
+- config-keys:
+    - gptoss-fp4-h100-vllm
+  description:
+    - "TEMP T8 redo for PR #1503 — DO NOT MERGE — verifies skipped-canary fan-out under !cancelled() gate with non-canary-full-sweep-enabled label"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1503

From d35e0b76afec5b41f3f4b5de3348922196443d0d Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Wed, 27 May 2026 11:52:43 -0700
Subject: [PATCH 11/14] =?UTF-8?q?TEMP:=20swap=20T8=20redo=20=E2=86=92=20T3?=
 =?UTF-8?q?=20redo=20(kimik2.5-int4-h100-vllm)=20for=20PR=20#1503?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DO NOT MERGE. Verifies empty-canary-config path on HEAD's !cancelled() gate.
Agentic-only config → canary-select returns [] → canary-sweep skipped via
inner guard → agentic fan-out on full search space.
---
 perf-changelog.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 6199b55d3..2ca38c3ce 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3161,7 +3161,7 @@
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1558
 
 - config-keys:
-    - gptoss-fp4-h100-vllm
+    - kimik2.5-int4-h100-vllm
   description:
-    - "TEMP T8 redo for PR #1503 — DO NOT MERGE — verifies skipped-canary fan-out under !cancelled() gate with non-canary-full-sweep-enabled label"
+    - "TEMP T3 redo for PR #1503 — DO NOT MERGE — verifies empty-canary-config skip path (agentic-only config) + agentic fan-out on full search space under !cancelled() gate"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1503

From b632ca848680e997934e8ac8564e072710137409 Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Wed, 27 May 2026 14:28:21 -0700
Subject: [PATCH 12/14] =?UTF-8?q?TEMP:=20swap=20T3=20redo=20=E2=86=92=20T1?=
 =?UTF-8?q?=20redo=20(gptoss-fp4-h100-vllm)=20for=20PR=20#1503?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DO NOT MERGE. Verifies canary success → fan-out + dedup path on HEAD's
!cancelled() gate.
---
 perf-changelog.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 2ca38c3ce..890b745ac 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3161,7 +3161,7 @@
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1558
 
 - config-keys:
-    - kimik2.5-int4-h100-vllm
+    - gptoss-fp4-h100-vllm
   description:
-    - "TEMP T3 redo for PR #1503 — DO NOT MERGE — verifies empty-canary-config skip path (agentic-only config) + agentic fan-out on full search space under !cancelled() gate"
+    - "TEMP T1 redo for PR #1503 — DO NOT MERGE — verifies canary success → fan-out runs on remaining-search-space-config (canary entry de-duped) under !cancelled() gate"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1503

From e3f40988d0f5de1ef21878365444b7ccaab13f37 Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Wed, 27 May 2026 14:36:05 -0700
Subject: [PATCH 13/14] Document non-canary-full-sweep-enabled label

AGENTS.md: list all three sweep labels in the table; clarify multi-label rejection.
.github/workflows/README.md: reuse section accepts either full-sweep label;
fail-closed wording covers both.
KLAUD_DEBUG.md / klaud-pr-status-html.md: mention the new label alongside
full-sweep-enabled.
---
 .claude/commands/klaud-pr-status-html.md |  2 +-
 .github/workflows/README.md              | 23 ++++++++++++-----------
 AGENTS.md                                |  7 ++++---
 KLAUD_DEBUG.md                           |  2 +-
 4 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/.claude/commands/klaud-pr-status-html.md b/.claude/commands/klaud-pr-status-html.md
index 6051357c6..47d6024f8 100644
--- a/.claude/commands/klaud-pr-status-html.md
+++ b/.claude/commands/klaud-pr-status-html.md
@@ -30,7 +30,7 @@ State buckets:
 - **RUNNING** — no failed checks; at least one is `QUEUED` / `IN_PROGRESS` / `PENDING`.
 - **READY** — no failed, no pending, and at least one `Run Sweep` check is `SUCCESS`.
 - **NO_SUCCESS** — sweep ran but never produced a `SUCCESS` (e.g. all matrix jobs got SKIPPED).
-- **NO_SWEEP** — no `Run Sweep` check exists for this head SHA at all (sweep never triggered — usually missing `full-sweep-enabled` label).
+- **NO_SWEEP** — no `Run Sweep` check exists for this head SHA at all (sweep never triggered — usually missing a sweep label such as `full-sweep-enabled` or `non-canary-full-sweep-enabled`).
 
 ```bash
 : > /tmp/klaud_pr_status.tsv
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
index 2252d8b7c..8517d1580 100644
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@@ -180,18 +180,18 @@ test-config --config-keys *-b200-* --conc 4 8 --config-files .github/configs/nvi
 
 ## Reusing an Approved PR Full Sweep
 
-If a PR has already run the full untrimmed sweep (`full-sweep-enabled` label),
-a maintainer can avoid running the same sweep again after merge by leaving a
-PR comment before merging:
+If a PR has already run the full untrimmed sweep (`full-sweep-enabled` with a
+sequential canary, or `non-canary-full-sweep-enabled` without one), a
+maintainer can avoid running the same sweep again after merge by leaving a PR
+comment before merging:
 
 ```
 /reuse-sweep-run
 ```
 
-That reuses the latest successful `run-sweep.yml` `pull_request` run for the
-PR's current head SHA. If the PR was rebased or had to merge `main` after the
-successful sweep — so the current head no longer has a matching run — pin the
-source run explicitly:
+That reuses the latest successful `run-sweep.yml` `pull_request` run whose
+commit is still part of the PR. To select a particular eligible successful
+run, pin the source run explicitly:
 
 ```
 /reuse-sweep-run <run_id>
@@ -209,10 +209,11 @@ Only comments from `OWNER`, `MEMBER`, or `COLLABORATOR` users authorize reuse.
 The most recent matching comment wins, so a maintainer can supersede an earlier
 pin by leaving a new `/reuse-sweep-run [<run_id>]` comment.
 
-Reuse fails closed: if the comment is present but the `full-sweep-enabled`
-label, source PR run, or artifacts cannot be validated, the push-to-main
-workflow fails instead of falling back to a cluster sweep. Without the comment,
-the push-to-main workflow runs the normal full sweep.
+Reuse fails closed: if the comment is present but neither full-sweep label
+(`full-sweep-enabled` or `non-canary-full-sweep-enabled`) is present, or if
+the source PR run or artifacts cannot be validated, the push-to-main workflow
+fails instead of falling back to a cluster sweep. Without the comment, the
+push-to-main workflow runs the normal full sweep.
 
 ## Validation Architecture
 
diff --git a/AGENTS.md b/AGENTS.md
index cd057f4d3..764dc2f80 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -59,12 +59,13 @@ Git: conventional commit messages. `[skip-sweep]` in commit message skips benchm
 
 ### Pull Request Sweep Labels
 
-PRs do not run the sweep automatically - `run-sweep.yml` is gated on a label. Pick exactly one; setting both is rejected by the workflow's `setup` job.
+PRs do not run the sweep automatically - `run-sweep.yml` is gated on a label. Pick exactly one; setting multiple sweep labels is rejected by the workflow's `setup` job.
 
 - `sweep-enabled` - runs the sweep with `--trim-conc` (each parallelism config reduced to its single highest concurrency). Default for most PRs.
-- `full-sweep-enabled` - runs the full intermediate concurrency sweep, identical to push-to-main. Use when intermediate points matter (e.g. a recipe change shifts the throughput/latency curve, not just its endpoints).
+- `full-sweep-enabled` - runs the full intermediate concurrency sweep behind a sequential single-node canary gate. Use when intermediate points matter (e.g. a recipe change shifts the throughput/latency curve, not just its endpoints).
+- `non-canary-full-sweep-enabled` - runs the full intermediate concurrency sweep without the canary gate. Use when the canary is flaky or not representative of the affected configuration.
 
-**The sweep does not trigger while the PR has merge conflicts.** Even with `sweep-enabled` / `full-sweep-enabled` applied, the `run-sweep.yml` workflow will not start until the PR cleanly merges into main — a stale claude/* or update-* branch with a `perf-changelog.yaml` conflict (the common case) will sit in NO_SWEEP / NO_SUCCESS until rebased. Resolution recipe is documented in `KLAUD_DEBUG.md §1.1`: `git merge origin/main`, then `git checkout origin/main -- perf-changelog.yaml`, then re-append the PR's own changelog entry at the tail. Don't 3-way merge `perf-changelog.yaml`; whitespace edits silently re-trigger the deletion check.
+**The sweep does not trigger while the PR has merge conflicts.** Even with `sweep-enabled`, `full-sweep-enabled`, or `non-canary-full-sweep-enabled` applied, the `run-sweep.yml` workflow will not start until the PR cleanly merges into main — a stale claude/* or update-* branch with a `perf-changelog.yaml` conflict (the common case) will sit in NO_SWEEP / NO_SUCCESS until rebased. Resolution recipe is documented in `KLAUD_DEBUG.md §1.1`: `git merge origin/main`, then `git checkout origin/main -- perf-changelog.yaml`, then re-append the PR's own changelog entry at the tail. Don't 3-way merge `perf-changelog.yaml`; whitespace edits silently re-trigger the deletion check.
 
 Push-to-main always runs the full untrimmed sweep unless `[skip-sweep]` is in the commit message. Trim logic lives in `trim_conc()` in `utils/process_changelog.py`: single-node entries are grouped by every non-`conc` field and only the highest-`conc` entry per group is kept; multi-node entries have their `conc` list collapsed to `[max(conc)]`.
 
diff --git a/KLAUD_DEBUG.md b/KLAUD_DEBUG.md
index 92eb76bfc..1f81b6a9f 100644
--- a/KLAUD_DEBUG.md
+++ b/KLAUD_DEBUG.md
@@ -193,7 +193,7 @@ Or check whether any other recipe on main uses the proposed tag — if zero uses
 ## 9. PR conventions for this repo
 
 - Image-bump / new-recipe PRs I open on behalf of the user (or that the user creates) get the **`[Klaud Cold]`** title prefix.
-- Add the `full-sweep-enabled` label so a full sweep actually runs (`gh api -X POST ... labels[]=full-sweep-enabled`). Without it, the sweep is mostly SKIPPED.
+- Add the `full-sweep-enabled` label so a canary-gated full sweep actually runs (`gh api -X POST ... labels[]=full-sweep-enabled`). Use `non-canary-full-sweep-enabled` instead only when the single-node canary is flaky or unrepresentative; it runs the full sweep without the canary gate. Without one of the sweep labels, the sweep is mostly SKIPPED.
 - After any code change that shifts a PR's scope (drops a recipe, changes an image tag), **update the PR title AND body in the same step** and **verify** with `gh pr view <N> --json title,body` — `gh pr edit` silently fails (see §8).
 - `utils/merge_with_reuse.sh <N>` is the merge entrypoint; it handles the `perf-changelog.yaml` auto-append.
 

From d87804d9dfff9b7408388a23a16e6152e5c300b5 Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Wed, 27 May 2026 14:36:09 -0700
Subject: [PATCH 14/14] Drop TEMP T1-redo changelog entry for PR #1503

Final cleanup. T8 redo (run 26530059401, commit 13272b92, success) is the
reusable green sweep for /reuse-sweep-run.
---
 perf-changelog.yaml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 890b745ac..ad37e0c27 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3159,9 +3159,3 @@
   description:
     - "Validates measured-power aggregation pipeline (PR #1558) on both NVIDIA (H200) and AMD (MI355X) hardware — different SMI tools (nvidia-smi vs amd-smi), different CSV schemas (power.draw [W] vs socket_power), same aggregator. No config change. Entry intentionally kept past merge so run-sweep produces canonical agg JSONs with avg_power_w + joules_per_output_token on main for both vendors, seeding the dashboard's day-zero data."
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1558
-
-- config-keys:
-    - gptoss-fp4-h100-vllm
-  description:
-    - "TEMP T1 redo for PR #1503 — DO NOT MERGE — verifies canary success → fan-out runs on remaining-search-space-config (canary entry de-duped) under !cancelled() gate"
-  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1503