diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index c5ece9804..62b8ff191 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -8,6 +8,7 @@ concurrency: (github.event.action == 'labeled' || github.event.action == 'unlabeled') && github.event.label.name != 'sweep-enabled' && github.event.label.name != 'full-sweep-enabled' && + github.event.label.name != 'non-canary-full-sweep-enabled' && github.run_id || 'active' }} @@ -39,7 +40,8 @@ jobs: ( (github.event.action != 'labeled' && github.event.action != 'unlabeled') || github.event.label.name == 'sweep-enabled' || - github.event.label.name == 'full-sweep-enabled' + github.event.label.name == 'full-sweep-enabled' || + github.event.label.name == 'non-canary-full-sweep-enabled' ) steps: - name: Checkout code @@ -61,12 +63,14 @@ jobs: !github.event.pull_request.draft && ( contains(github.event.pull_request.labels.*.name, 'sweep-enabled') || - contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') + contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') || + contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled') ) && ( (github.event.action != 'labeled' && github.event.action != 'unlabeled') || github.event.label.name == 'sweep-enabled' || - github.event.label.name == 'full-sweep-enabled' + github.event.label.name == 'full-sweep-enabled' || + github.event.label.name == 'non-canary-full-sweep-enabled' ) ) || ( @@ -85,10 +89,13 @@ jobs: - name: Reject conflicting sweep labels if: >- github.event_name == 'pull_request' && - contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && - contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') + ( + (contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')) || + (contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')) || + (contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')) + ) run: | - echo "::error::PR has both 'sweep-enabled' and 'full-sweep-enabled' labels. Remove one — 'full-sweep-enabled' runs the full intermediate concurrency sweep; 'sweep-enabled' trims to max(conc) per parallelism config." + echo "::error::PR has multiple conflicting sweep labels. Pick exactly one of: 'sweep-enabled' (trims to max(conc) per parallelism config), 'full-sweep-enabled' (full intermediate concurrency sweep, with canary gate), or 'non-canary-full-sweep-enabled' (full sweep, no canary gate)." exit 1 - name: Checkout code @@ -135,9 +142,87 @@ jobs: --ref "${{ github.ref }}" \ --workflow-id "run-sweep.yml" - sweep-multi-node-1k1k: + canary-select: needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null' }} + if: >- + needs.setup.outputs.reuse-enabled != 'true' && + github.event_name == 'pull_request' && + contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') + runs-on: ubuntu-latest + outputs: + canary-config: ${{ steps.pick.outputs.canary-config }} + remaining-search-space-config: ${{ steps.pick.outputs.remaining-search-space-config }} + steps: + - id: pick + env: + SEARCH_SPACE: ${{ needs.setup.outputs.search-space-config }} + run: | + selection=$(jq -c ' + def remove_one($needle): + if $needle == null then . + else + (index($needle)) as $idx + | if $idx == null then . else del(.[$idx]) end + end; + + # Canary is a benchmark-only smoke test — exclude entries + # whose primary purpose is eval (run-eval == true) so the + # picked canary never runs an eval pass. + (((.single_node["1k1k"] // []) + (.single_node["8k1k"] // [])) + | map(select(.["run-eval"] != true))) as $candidates + | (if ($candidates | length) == 0 then null else ($candidates | min_by(.conc)) end) as $canary + | { + canary: (if $canary == null then [] else [$canary] end), + remaining: ( + . + | .single_node = (.single_node // {}) + | .single_node["1k1k"] = ((.single_node["1k1k"] // []) | remove_one($canary)) + | .single_node["8k1k"] = ((.single_node["8k1k"] // []) | remove_one($canary)) + ) + } + ' <<<"$SEARCH_SPACE") + echo "canary-config=$(jq -c '.canary' <<<"$selection")" >> "$GITHUB_OUTPUT" + echo "remaining-search-space-config=$(jq -c '.remaining' <<<"$selection")" >> "$GITHUB_OUTPUT" + + canary-sweep: + needs: canary-select + if: ${{ needs.canary-select.outputs.canary-config != '' && needs.canary-select.outputs.canary-config != '[]' }} + uses: ./.github/workflows/benchmark-tmpl.yml + name: canary / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.canary-select.outputs.canary-config) }} + secrets: inherit + with: + exp-name: ${{ matrix.config.exp-name }} + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + tp: ${{ matrix.config.tp }} + ep: ${{ matrix.config.ep }} + dp-attn: ${{ matrix.config.dp-attn }} + conc: ${{ matrix.config.conc }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + run-eval: false + + sweep-multi-node-1k1k: + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null' + }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 1k1k / strategy: @@ -174,8 +259,15 @@ jobs: run-eval: false sweep-multi-node-8k1k: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null' + }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 8k1k / strategy: @@ -186,14 +278,22 @@ jobs: with: *multi-node-inputs sweep-single-node-1k1k: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' && + toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k']) != '[]' + }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 1k1k / strategy: fail-fast: false matrix: - config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }} + config: ${{ fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k'] }} secrets: inherit with: &single-node-inputs exp-name: ${{ matrix.config.exp-name }} @@ -215,20 +315,35 @@ jobs: run-eval: ${{ matrix.config.run-eval }} sweep-single-node-8k1k: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' && + toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k']) != '[]' + }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 8k1k / strategy: fail-fast: false matrix: - config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['8k1k'] }} + config: ${{ fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k'] }} secrets: inherit with: *single-node-inputs sweep-agentic: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['agentic']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson(needs.setup.outputs.search-space-config).single_node['agentic']) != 'null' + }} uses: ./.github/workflows/benchmark-tmpl.yml name: agentic / strategy: @@ -259,8 +374,15 @@ jobs: scenario-type: agentic-coding sweep-multi-node-agentic: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['agentic']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['agentic']) != 'null' + }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node agentic / strategy: @@ -298,8 +420,16 @@ jobs: scenario-type: agentic-coding sweep-evals: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).evals) != '[]' && toJson(fromJson(needs.setup.outputs.search-space-config).evals) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson(needs.setup.outputs.search-space-config).evals) != '[]' && + toJson(fromJson(needs.setup.outputs.search-space-config).evals) != 'null' + }} uses: ./.github/workflows/benchmark-tmpl.yml name: eval / strategy: @@ -328,8 +458,16 @@ jobs: eval-only: true sweep-multi-node-evals: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' && toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + !cancelled() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + (needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') && + toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' && + toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null' + }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node eval / strategy: @@ -368,6 +506,7 @@ jobs: collect-results: needs: [ + canary-sweep, sweep-single-node-1k1k, sweep-single-node-8k1k, sweep-agentic, @@ -381,6 +520,7 @@ jobs: always() && needs.setup.result == 'success' && ( + needs.canary-sweep.result == 'success' || needs.sweep-single-node-1k1k.result != 'skipped' || needs.sweep-single-node-8k1k.result != 'skipped' || needs.sweep-multi-node-1k1k.result != 'skipped' || @@ -592,12 +732,14 @@ jobs: !github.event.pull_request.draft && ( contains(github.event.pull_request.labels.*.name, 'sweep-enabled') || - contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') + contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') || + contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled') ) && ( (github.event.action != 'labeled' && github.event.action != 'unlabeled') || github.event.label.name == 'sweep-enabled' || - github.event.label.name == 'full-sweep-enabled' + github.event.label.name == 'full-sweep-enabled' || + github.event.label.name == 'non-canary-full-sweep-enabled' ) runs-on: ubuntu-latest permissions: diff --git a/perf-changelog.yaml b/perf-changelog.yaml index ad37e0c27..2ca38c3ce 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3159,3 +3159,9 @@ description: - "Validates measured-power aggregation pipeline (PR #1558) on both NVIDIA (H200) and AMD (MI355X) hardware — different SMI tools (nvidia-smi vs amd-smi), different CSV schemas (power.draw [W] vs socket_power), same aggregator. No config change. Entry intentionally kept past merge so run-sweep produces canonical agg JSONs with avg_power_w + joules_per_output_token on main for both vendors, seeding the dashboard's day-zero data." pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1558 + +- config-keys: + - kimik2.5-int4-h100-vllm + description: + - "TEMP T3 redo for PR #1503 — DO NOT MERGE — verifies empty-canary-config skip path (agentic-only config) + agentic fan-out on full search space under !cancelled() gate" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1503 diff --git a/utils/find_reusable_sweep_run.py b/utils/find_reusable_sweep_run.py index 8af018a8e..3f814d2e5 100644 --- a/utils/find_reusable_sweep_run.py +++ b/utils/find_reusable_sweep_run.py @@ -276,7 +276,11 @@ def main() -> int: parser.add_argument("--event-name", required=True) parser.add_argument("--ref", required=True) parser.add_argument("--workflow-id", default="run-sweep.yml") - parser.add_argument("--full-sweep-label", default="full-sweep-enabled") + parser.add_argument( + "--full-sweep-label", + default="full-sweep-enabled,non-canary-full-sweep-enabled", + help="Comma-separated PR labels treated as 'full sweep'; reuse requires at least one.", + ) parser.add_argument("--pinned-run-command", default="/reuse-sweep-run") parser.add_argument( "--allowed-author-associations", @@ -355,10 +359,16 @@ def main() -> int: pr = github_api(args.repo, f"/pulls/{pr_number}", token) labels = label_names(pr) - if args.full_sweep_label not in labels: + accepted_full_sweep_labels = { + value.strip() + for value in args.full_sweep_label.split(",") + if value.strip() + } + if not accepted_full_sweep_labels.intersection(labels): + accepted = ", ".join(sorted(accepted_full_sweep_labels)) raise RuntimeError( - f"PR #{pr_number} has {args.pinned_run_command} authorization but not " - f"{args.full_sweep_label}." + f"PR #{pr_number} has {args.pinned_run_command} authorization but is " + f"missing any of: {accepted}." ) if not pr.get("merged_at"): raise RuntimeError(f"PR #{pr_number} is not marked as merged.") diff --git a/utils/merge_with_reuse.sh b/utils/merge_with_reuse.sh index 9336b81c2..a94ea0f69 100755 --- a/utils/merge_with_reuse.sh +++ b/utils/merge_with_reuse.sh @@ -38,8 +38,13 @@ PR_STATE="$(jq -r '.state' <<<"$PR_INFO")" [ "$PR_STATE" = "OPEN" ] || die "PR #${PR} is ${PR_STATE}, expected OPEN" HEAD_BRANCH="$(jq -r '.headRefName' <<<"$PR_INFO")" -HAS_FULL_SWEEP="$(jq -r '[.labels[].name] | index("full-sweep-enabled") // ""' <<<"$PR_INFO")" -[ -n "$HAS_FULL_SWEEP" ] || die "PR #${PR} is missing the 'full-sweep-enabled' label" +HAS_FULL_SWEEP="$(jq -r ' + [.labels[].name] as $names + | if (($names | index("full-sweep-enabled")) != null) + or (($names | index("non-canary-full-sweep-enabled")) != null) + then "1" else "" end +' <<<"$PR_INFO")" +[ -n "$HAS_FULL_SWEEP" ] || die "PR #${PR} is missing 'full-sweep-enabled' or 'non-canary-full-sweep-enabled' label" # Warn early if no successful run exists on any current PR commit. PR_SHAS="$(gh api "repos/${REPO}/pulls/${PR}/commits" --paginate --jq '.[].sha')" diff --git a/utils/test_find_reusable_sweep_run.py b/utils/test_find_reusable_sweep_run.py index e779bbe8b..0f8a09ac7 100644 --- a/utils/test_find_reusable_sweep_run.py +++ b/utils/test_find_reusable_sweep_run.py @@ -455,3 +455,133 @@ def fake_paginated_github_api(repo, path, token, item_key, params=None): assert outputs["reuse-enabled"] == "false" assert outputs["reuse-source-pr-number"] == "1321" assert outputs["reuse-reason"] == "PR #1321 has no /reuse-sweep-run authorization" + + +def test_main_accepts_non_canary_full_sweep_label(monkeypatch, tmp_path) -> None: + comments = [ + { + "created_at": "2026-05-13T00:00:00Z", + "author_association": "OWNER", + "body": "/reuse-sweep-run 25763404168", + }, + ] + run = { + "id": 25763404168, + "event": "pull_request", + "status": "completed", + "conclusion": "success", + "path": ".github/workflows/run-sweep.yml", + "pull_requests": [{"number": 1321}], + "run_attempt": 1, + "html_url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/25763404168", + "head_sha": "abc123", + } + + def fake_github_api(repo, path, token, params=None): + if path == "/commits/merge-sha/pulls": + return [{"number": 1321}] + if path == "/pulls/1321": + return { + "merged_at": "2026-05-13T00:01:00Z", + "labels": [{"name": "non-canary-full-sweep-enabled"}], + "head": {"sha": "abc123"}, + } + if path == "/actions/runs/25763404168": + return run + raise AssertionError(f"unexpected GitHub API path: {path}") + + def fake_paginated_github_api(repo, path, token, item_key, params=None): + if path == "/issues/1321/comments": + return comments + if path == "/pulls/1321/commits": + return [{"sha": "abc123"}] + if path == "/actions/runs/25763404168/artifacts": + return [{"name": "results_bmk"}] + raise AssertionError(f"unexpected paginated GitHub API path: {path}") + + output_path = tmp_path / "outputs" + monkeypatch.setenv("GITHUB_TOKEN", "token") + monkeypatch.setattr(reuse, "github_api", fake_github_api) + monkeypatch.setattr(reuse, "paginated_github_api", fake_paginated_github_api) + monkeypatch.setattr( + reuse.sys, + "argv", + [ + "find_reusable_sweep_run.py", + "--repo", + "SemiAnalysisAI/InferenceX", + "--commit-sha", + "merge-sha", + "--event-name", + "push", + "--ref", + "refs/heads/main", + "--github-output", + str(output_path), + ], + ) + + assert reuse.main() == 0 + + outputs = dict(line.split("=", 1) for line in output_path.read_text().splitlines()) + assert outputs["reuse-enabled"] == "true" + + +def test_main_rejects_pr_with_neither_full_sweep_label(monkeypatch, tmp_path) -> None: + comments = [ + { + "created_at": "2026-05-13T00:00:00Z", + "author_association": "OWNER", + "body": "/reuse-sweep-run 25763404168", + }, + ] + + def fake_github_api(repo, path, token, params=None): + if path == "/commits/merge-sha/pulls": + return [{"number": 1321}] + if path == "/pulls/1321": + return { + "merged_at": "2026-05-13T00:01:00Z", + "labels": [{"name": "sweep-enabled"}], + "head": {"sha": "abc123"}, + } + raise AssertionError(f"unexpected GitHub API path: {path}") + + def fake_paginated_github_api(repo, path, token, item_key, params=None): + if path == "/issues/1321/comments": + return comments + raise AssertionError(f"unexpected paginated GitHub API path: {path}") + + output_path = tmp_path / "outputs" + monkeypatch.setenv("GITHUB_TOKEN", "token") + monkeypatch.setattr(reuse, "github_api", fake_github_api) + monkeypatch.setattr(reuse, "paginated_github_api", fake_paginated_github_api) + monkeypatch.setattr( + reuse.sys, + "argv", + [ + "find_reusable_sweep_run.py", + "--repo", + "SemiAnalysisAI/InferenceX", + "--commit-sha", + "merge-sha", + "--event-name", + "push", + "--ref", + "refs/heads/main", + "--github-output", + str(output_path), + ], + ) + + try: + reuse.main() + except RuntimeError as error: + msg = str(error) + assert "full-sweep-enabled" in msg + assert "non-canary-full-sweep-enabled" in msg + else: + raise AssertionError( + "expected RuntimeError when PR has neither full-sweep-enabled nor " + "non-canary-full-sweep-enabled label" + )