From 8de6e8f3daa42657a05a081c48d58c692b805228 Mon Sep 17 00:00:00 2001
From: Mohit <mohityadav8@users.noreply.github.com>
Date: Sun, 28 Jun 2026 22:06:50 +0530
Subject: [PATCH 1/6] feat(ci): consolidate KWOK tier workflows into single
 reusable runner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace kwok-tier3-shard.yaml with kwok-test-run.yaml; the checkout/

  load-versions/kwok-test step block now lives in exactly one place

- Define readonly DEPLOYERS once in discover; Tier 1, Tier 2, and Tier 3

  all derive from it — no more hardcoded matrix axes or sync comments

- Emit tier1_pairs and tier2_pairs from discover; all three tiers call

  kwok-test-run.yaml with pre-built {recipe,deployer} pairs

- Tier 2 deployer coverage: helm-only (deliberate; full coverage in Tier 3)

- Fix workflow_dispatch early-exit to also emit tier1_pairs/tier2_pairs

- Add Tier 1 pair-count guard (warning at >200)

- Update paths triggers and ADR-003 workflow diagram

Closes #1172
---
 .github/workflows/kwok-recipes.yaml           | 108 ++++++++++++++----
 ...ok-tier3-shard.yaml => kwok-test-run.yaml} |  21 ++--
 2 files changed, 99 insertions(+), 30 deletions(-)
 rename .github/workflows/{kwok-tier3-shard.yaml => kwok-test-run.yaml} (73%)

diff --git a/.github/workflows/kwok-recipes.yaml b/.github/workflows/kwok-recipes.yaml
index fec1fdcae..90f65f5f5 100644
--- a/.github/workflows/kwok-recipes.yaml
+++ b/.github/workflows/kwok-recipes.yaml
@@ -23,7 +23,7 @@ on:
       - 'kwok/**'
       - 'tests/chainsaw/kwok/**'
       - '.github/workflows/kwok-recipes.yaml'
-      - '.github/workflows/kwok-tier3-shard.yaml'
+      - '.github/workflows/kwok-test-run.yaml'
       - '.github/actions/kwok-test/**'
       - '!**.md'
   pull_request:
@@ -34,7 +34,7 @@ on:
       - 'kwok/**'
       - 'tests/chainsaw/kwok/**'
       - '.github/workflows/kwok-recipes.yaml'
-      - '.github/workflows/kwok-tier3-shard.yaml'
+      - '.github/workflows/kwok-test-run.yaml'
       - '.github/actions/kwok-test/**'
       - '!**.md'
   schedule:
@@ -66,6 +66,8 @@ jobs:
       tier2: ${{ steps.classify.outputs.tier2 }}
       tier3: ${{ steps.classify.outputs.tier3 }}
       tier3_batches: ${{ steps.classify.outputs.tier3_batches }}
+      tier1_pairs: ${{ steps.classify.outputs.tier1_pairs }}
+      tier2_pairs: ${{ steps.classify.outputs.tier2_pairs }}
     steps:
       - name: Checkout
         uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
@@ -82,13 +84,25 @@ jobs:
         run: |
           set -euo pipefail
 
+          # Deployer list — single source of truth, consumed by all tiers and by
+          # the workflow_dispatch early-exit below. To add or remove a deployer,
+          # change this one line; Tier 1, Tier 2, and Tier 3 all derive from it.
+          readonly DEPLOYERS='["helm","argocd-oci","argocd-helm-oci","argocd-git","flux-oci","flux-git"]'
+
           # --- workflow_dispatch: test exactly the requested recipe ---
           if [[ -n "${DISPATCH_RECIPE}" ]]; then
             single=$(jq -nc '[$r]' --arg r "${DISPATCH_RECIPE}")
-            echo "tier1=${single}"   >> "$GITHUB_OUTPUT"
-            echo "tier2=[]"          >> "$GITHUB_OUTPUT"
-            echo "tier3=[]"          >> "$GITHUB_OUTPUT"
-            echo "tier3_batches=[]"  >> "$GITHUB_OUTPUT"
+            single_pairs=$(jq -cn \
+              --arg r "${DISPATCH_RECIPE}" \
+              --argjson deployers "${DEPLOYERS}" '
+                [ $deployers[] | {recipe: $r, deployer: .} ]
+              ')
+            echo "tier1=${single}"             >> "$GITHUB_OUTPUT"
+            echo "tier2=[]"                    >> "$GITHUB_OUTPUT"
+            echo "tier3=[]"                    >> "$GITHUB_OUTPUT"
+            echo "tier3_batches=[]"            >> "$GITHUB_OUTPUT"
+            echo "tier1_pairs=${single_pairs}" >> "$GITHUB_OUTPUT"
+            echo "tier2_pairs=[]"              >> "$GITHUB_OUTPUT"
             echo "Manual dispatch: ${DISPATCH_RECIPE}"
             exit 0
           fi
@@ -219,16 +233,47 @@ jobs:
           # --- Tier 3: full matrix (all testable overlays) ---
           tier3="$all"
 
+          # Local alias so jq --argjson calls below can use $deployers
+          # (DEPLOYERS is readonly; this assignment is intentional).
+          # shellcheck disable=SC2034
+          deployers="${DEPLOYERS}"
+
+          # --- Pre-build Tier 1 pairs: generic recipes × all deployers ---
+          tier1_pairs=$(jq -cn \
+            --argjson recipes "$tier1" \
+            --argjson deployers "$deployers" '
+              [ $recipes[] as $r | $deployers[] as $d | {recipe: $r, deployer: $d} ]
+            ')
+
+          # Guard: Tier 1 is expected to stay well under 256 (no batching needed).
+          # Warn early if organic growth is approaching the limit.
+          tier1_pair_count=$(echo "$tier1_pairs" | jq 'length')
+          if (( tier1_pair_count > 200 )); then
+            echo "::warning::Tier 1 has ${tier1_pair_count} pairs (>200) — consider adding batching before it reaches 256"
+          fi
+
+          # --- Pre-build Tier 2 pairs: diff-affected recipes, helm-only ---
+          # Coverage-policy decision: Tier 2 uses helm only to keep PR wall-clock
+          # time proportional to the change scope. Full deployer coverage runs in
+          # Tier 3 on every push to main and on the nightly schedule. See ADR-003
+          # §"Tier 2 deployer coverage" for rationale and how to revisit this.
+          tier2_pairs=$(echo "$tier2" | jq -c '[.[] | {recipe: ., deployer: "helm"}]')
+
           # --- Tier 3 batching ---
           # GitHub caps a single job's matrix at 256 configurations. Tier 3
           # crosses every testable recipe with every deployer, so the raw
-          # cross-product (recipes × deployers) outgrew the cap. Split the
+          # cross-product (recipes × deployers) can outgrow the cap. Split the
           # {recipe, deployer} pairs into batches of <= TIER3_BATCH_SIZE; the
+<<<<<<< Updated upstream
           # caller fans each batch out to kwok-tier3-shard.yaml, keeping every
           # shard's matrix under the limit. Keep this deployer list in sync
           # with the test-tier1 matrix above and the input doc in
           # .github/actions/kwok-test/action.yml.
           deployers='["helm","argocd-oci","argocd-helm-oci","flux-oci"]'
+=======
+          # caller fans each batch out to kwok-test-run.yaml, keeping every
+          # shard's matrix under the limit.
+>>>>>>> Stashed changes
           readonly TIER3_BATCH_SIZE=200  # headroom under GitHub's 256 cap
 
           # Fail closed if the batch size is ever raised past the hard limit —
@@ -250,23 +295,26 @@ jobs:
             ')
 
           # --- Output ---
-          echo "tier1=${tier1}" >> "$GITHUB_OUTPUT"
-          echo "tier2=${tier2}" >> "$GITHUB_OUTPUT"
-          echo "tier3=${tier3}" >> "$GITHUB_OUTPUT"
+          echo "tier1=${tier1}"                 >> "$GITHUB_OUTPUT"
+          echo "tier2=${tier2}"                 >> "$GITHUB_OUTPUT"
+          echo "tier3=${tier3}"                 >> "$GITHUB_OUTPUT"
           echo "tier3_batches=${tier3_batches}" >> "$GITHUB_OUTPUT"
+          echo "tier1_pairs=${tier1_pairs}"     >> "$GITHUB_OUTPUT"
+          echo "tier2_pairs=${tier2_pairs}"     >> "$GITHUB_OUTPUT"
 
+          deployer_count=$(echo "$deployers" | jq 'length')
           tier3_pairs=$(echo "$tier3_batches" | jq '[.[].pairs[]] | length')
           tier3_batch_count=$(echo "$tier3_batches" | jq 'length')
-          echo "Tier 1 (generic):      $(echo "$tier1" | jq 'length') recipe(s)"
-          echo "Tier 2 (diff-aware):   $(echo "$tier2" | jq 'length') recipe(s)"
-          echo "Tier 3 (full matrix):  $(echo "$tier3" | jq 'length') recipe(s) × $(echo "$deployers" | jq 'length') deployer(s) = ${tier3_pairs} pair(s) in ${tier3_batch_count} batch(es)"
+          echo "Tier 1 (generic):      $(echo "$tier1" | jq 'length') recipe(s) × ${deployer_count} deployer(s) = ${tier1_pair_count} pair(s)"
+          echo "Tier 2 (diff-aware):   $(echo "$tier2" | jq 'length') recipe(s) × 1 deployer (helm) = $(echo "$tier2_pairs" | jq 'length') pair(s)"
+          echo "Tier 3 (full matrix):  $(echo "$tier3" | jq 'length') recipe(s) × ${deployer_count} deployer(s) = ${tier3_pairs} pair(s) in ${tier3_batch_count} batch(es)"
 
   # ── Tier 1: PR gate — generic overlays (PR + push, skip on schedule) ──
   test-tier1:
-    name: 'Tier 1: ${{ matrix.recipe }} (${{ matrix.deployer }})'
     needs: discover
     if: >-
       github.event_name != 'schedule' &&
+<<<<<<< Updated upstream
       needs.discover.outputs.tier1 != '[]' &&
       needs.discover.outputs.tier1 != ''
     runs-on: ubuntu-latest
@@ -302,13 +350,22 @@ jobs:
           chainsaw_version: ${{ steps.versions.outputs.chainsaw }}
           chainsaw_sha256: ${{ steps.versions.outputs.chainsaw_sha256_linux_amd64 }}
           kind_node_image: ${{ steps.versions.outputs.kind_node_image }}
+=======
+      needs.discover.outputs.tier1_pairs != '[]' &&
+      needs.discover.outputs.tier1_pairs != ''
+    uses: ./.github/workflows/kwok-test-run.yaml
+    with:
+      pairs: ${{ needs.discover.outputs.tier1_pairs }}
+>>>>>>> Stashed changes
 
   # ── Tier 2: diff-aware accelerator tests (PR only, conditional) ──
+  # Coverage-policy decision: Tier 2 uses helm only (see ADR-003 §"Tier 2
+  # deployer coverage"). Full deployer coverage runs in Tier 3 on push/nightly.
   test-tier2:
-    name: 'Tier 2: ${{ matrix.recipe }}'
     needs: discover
     if: >-
       github.event_name == 'pull_request' &&
+<<<<<<< Updated upstream
       needs.discover.outputs.tier2 != '[]' &&
       needs.discover.outputs.tier2 != ''
     runs-on: ubuntu-latest
@@ -342,14 +399,20 @@ jobs:
           chainsaw_version: ${{ steps.versions.outputs.chainsaw }}
           chainsaw_sha256: ${{ steps.versions.outputs.chainsaw_sha256_linux_amd64 }}
           kind_node_image: ${{ steps.versions.outputs.kind_node_image }}
+=======
+      needs.discover.outputs.tier2_pairs != '[]' &&
+      needs.discover.outputs.tier2_pairs != ''
+    uses: ./.github/workflows/kwok-test-run.yaml
+    with:
+      pairs: ${{ needs.discover.outputs.tier2_pairs }}
+>>>>>>> Stashed changes
 
   # ── Tier 3: full matrix (push to main + nightly schedule) ──
-  # The recipe × deployer cross-product exceeds GitHub's 256-config matrix cap,
-  # so discover splits it into batches and we fan each batch out to the
-  # kwok-tier3-shard reusable workflow (one shard per batch, each <= 256).
-  # Per ADR-003: the concurrency group is keyed by SHA so successive merges to
-  # main never cancel in-flight Tier 3 runs; the batch id keeps every shard of a
-  # single run in its own group so they all run in parallel.
+  # The recipe × deployer cross-product can exceed GitHub's 256-config cap, so
+  # discover batches the pairs and we fan each batch out to kwok-test-run.yaml
+  # (one shard per batch, each <= 256 pairs). Per ADR-003: concurrency is keyed
+  # by SHA so successive merges never cancel in-flight Tier 3 runs; the batch id
+  # keeps every shard of a single run in its own group so they all run in parallel.
   test-tier3:
     needs: discover
     concurrency:
@@ -363,7 +426,7 @@ jobs:
       fail-fast: false
       matrix:
         batch: ${{ fromJSON(needs.discover.outputs.tier3_batches) }}
-    uses: ./.github/workflows/kwok-tier3-shard.yaml
+    uses: ./.github/workflows/kwok-test-run.yaml
     with:
       pairs: ${{ toJSON(matrix.batch.pairs) }}
 
@@ -416,3 +479,4 @@ jobs:
           fi
 
           echo "All recipe validations passed" >> $GITHUB_STEP_SUMMARY
+          
\ No newline at end of file
diff --git a/.github/workflows/kwok-tier3-shard.yaml b/.github/workflows/kwok-test-run.yaml
similarity index 73%
rename from .github/workflows/kwok-tier3-shard.yaml
rename to .github/workflows/kwok-test-run.yaml
index fdfa0140b..92f4735ae 100644
--- a/.github/workflows/kwok-tier3-shard.yaml
+++ b/.github/workflows/kwok-test-run.yaml
@@ -12,18 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Reusable shard for the Tier 3 KWOK full matrix. GitHub caps a single job's
-# matrix at 256 configurations; the caller (kwok-recipes.yaml) splits the full
-# recipe × deployer cross-product into batches of <= 256 pairs and invokes this
-# workflow once per batch, so the per-shard matrix always stays under the cap.
-name: KWOK Tier 3 Shard
+# Reusable KWOK test runner shared by all three tiers (kwok-recipes.yaml).
+# Takes a JSON array of {recipe, deployer} pairs and runs each as a matrix job.
+#
+# Tier 1 passes a flat pairs list (well under 256 → no batching needed).
+# Tier 2 passes a flat pairs list (helm-only for diff-affected recipes).
+# Tier 3 passes one batch at a time from the discover job's tier3_batches output;
+#   the caller (test-tier3 in kwok-recipes.yaml) shards across batches to keep
+#   every shard's matrix under GitHub's 256-configuration cap.
+name: KWOK Test Run
 
 on:
   workflow_call:
     inputs:
       pairs:
-        # Caller batches at TIER3_BATCH_SIZE (kwok-recipes.yaml); must never
-        # exceed GitHub's hard cap of 256 matrix configurations per job.
+        # JSON array of {recipe, deployer} objects. Must never exceed GitHub's
+        # hard cap of 256 matrix configurations per job.
         description: 'JSON array of {recipe, deployer} objects to test (<= 256 entries)'
         required: true
         type: string
@@ -33,7 +37,7 @@ permissions:
 
 jobs:
   test:
-    name: 'Tier 3: ${{ matrix.pair.recipe }} (${{ matrix.pair.deployer }})'
+    name: '${{ matrix.pair.recipe }} (${{ matrix.pair.deployer }})'
     runs-on: ubuntu-latest
     timeout-minutes: 15
     strategy:
@@ -66,3 +70,4 @@ jobs:
           chainsaw_version: ${{ steps.versions.outputs.chainsaw }}
           chainsaw_sha256: ${{ steps.versions.outputs.chainsaw_sha256_linux_amd64 }}
           kind_node_image: ${{ steps.versions.outputs.kind_node_image }}
+          
\ No newline at end of file

From f5d8fdba6d1e7dc2b40fd37761812b350223c895 Mon Sep 17 00:00:00 2001
From: Mohit <mohityadav8@users.noreply.github.com>
Date: Mon, 29 Jun 2026 13:28:18 +0530
Subject: [PATCH 2/6] feat(ci): consolidate KWOK tier workflows into single
 reusable runner

- Replace kwok-tier3-shard.yaml with kwok-test-run.yaml (step block in one place)

- readonly DEPLOYERS single source of truth; Tier 1/2/3 all derive from it

- discover emits tier1_pairs + tier2_pairs; all tiers call kwok-test-run.yaml

- Tier 2 helm-only (deliberate, documented in ADR-003)

- Fix workflow_dispatch early-exit to emit tier1_pairs/tier2_pairs

Closes #1172
---
 .github/workflows/kwok-recipes.yaml  | 97 ++--------------------------
 .github/workflows/kwok-test-run.yaml |  2 +-
 2 files changed, 8 insertions(+), 91 deletions(-)

diff --git a/.github/workflows/kwok-recipes.yaml b/.github/workflows/kwok-recipes.yaml
index 90f65f5f5..af9918e94 100644
--- a/.github/workflows/kwok-recipes.yaml
+++ b/.github/workflows/kwok-recipes.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+﻿# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -70,7 +70,7 @@ jobs:
       tier2_pairs: ${{ steps.classify.outputs.tier2_pairs }}
     steps:
       - name: Checkout
-        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
         with:
           persist-credentials: false
           # Full checkout needed for diff-aware Tier 2 discovery
@@ -115,8 +115,8 @@ jobs:
             name=$(basename "$overlay" .yaml)
             service=$(yq eval '.spec.criteria.service // ""' "$overlay" 2>/dev/null || true)
 
-            # Skip overlays without a concrete service
-            if [[ -z "$service" || "$service" == "null" || "$service" == "any" ]]; then
+            # Skip non-testable overlays (no service, or OCP — needs OpenShift operators)
+            if [[ -z "$service" || "$service" == "null" || "$service" == "any" || "$service" == "ocp" ]]; then
               continue
             fi
 
@@ -154,8 +154,8 @@ jobs:
               service=$(yq eval '.spec.criteria.service // ""' "$overlay" 2>/dev/null || true)
               accel=$(yq eval '.spec.criteria.accelerator // ""' "$overlay" 2>/dev/null || true)
 
-              # Only accelerator-specific overlays belong in Tier 2
-              if [[ -z "$service" || "$service" == "null" || "$service" == "any" ]]; then
+              # Only accelerator-specific, KWOK-testable overlays belong in Tier 2
+              if [[ -z "$service" || "$service" == "null" || "$service" == "any" || "$service" == "ocp" ]]; then
                 continue
               fi
               if [[ -z "$accel" || "$accel" == "null" || "$accel" == "any" ]]; then
@@ -264,16 +264,8 @@ jobs:
           # crosses every testable recipe with every deployer, so the raw
           # cross-product (recipes × deployers) can outgrow the cap. Split the
           # {recipe, deployer} pairs into batches of <= TIER3_BATCH_SIZE; the
-<<<<<<< Updated upstream
-          # caller fans each batch out to kwok-tier3-shard.yaml, keeping every
-          # shard's matrix under the limit. Keep this deployer list in sync
-          # with the test-tier1 matrix above and the input doc in
-          # .github/actions/kwok-test/action.yml.
-          deployers='["helm","argocd-oci","argocd-helm-oci","flux-oci"]'
-=======
           # caller fans each batch out to kwok-test-run.yaml, keeping every
           # shard's matrix under the limit.
->>>>>>> Stashed changes
           readonly TIER3_BATCH_SIZE=200  # headroom under GitHub's 256 cap
 
           # Fail closed if the batch size is ever raised past the hard limit —
@@ -314,49 +306,11 @@ jobs:
     needs: discover
     if: >-
       github.event_name != 'schedule' &&
-<<<<<<< Updated upstream
-      needs.discover.outputs.tier1 != '[]' &&
-      needs.discover.outputs.tier1 != ''
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    strategy:
-      fail-fast: false
-      matrix:
-        recipe: ${{ fromJSON(needs.discover.outputs.tier1) }}
-        deployer: [helm, argocd-oci, argocd-helm-oci, flux-oci]
-    steps:
-      - name: Checkout Code
-        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
-        with:
-          persist-credentials: false
-
-      - name: Load versions
-        id: versions
-        uses: ./.github/actions/load-versions
-
-      - name: Run KWOK test
-        uses: ./.github/actions/kwok-test
-        with:
-          recipe: ${{ matrix.recipe }}
-          deployer: ${{ matrix.deployer }}
-          go_version: ${{ steps.versions.outputs.go }}
-          goreleaser_version: ${{ steps.versions.outputs.goreleaser }}
-          kind_version: ${{ steps.versions.outputs.kind }}
-          helm_version: ${{ steps.versions.outputs.helm }}
-          kwok_version: ${{ steps.versions.outputs.kwok }}
-          kubectl_version: ${{ steps.versions.outputs.kubectl }}
-          yq_version: ${{ steps.versions.outputs.yq }}
-          flux_version: ${{ steps.versions.outputs.flux }}
-          chainsaw_version: ${{ steps.versions.outputs.chainsaw }}
-          chainsaw_sha256: ${{ steps.versions.outputs.chainsaw_sha256_linux_amd64 }}
-          kind_node_image: ${{ steps.versions.outputs.kind_node_image }}
-=======
       needs.discover.outputs.tier1_pairs != '[]' &&
       needs.discover.outputs.tier1_pairs != ''
     uses: ./.github/workflows/kwok-test-run.yaml
     with:
       pairs: ${{ needs.discover.outputs.tier1_pairs }}
->>>>>>> Stashed changes
 
   # ── Tier 2: diff-aware accelerator tests (PR only, conditional) ──
   # Coverage-policy decision: Tier 2 uses helm only (see ADR-003 §"Tier 2
@@ -365,47 +319,11 @@ jobs:
     needs: discover
     if: >-
       github.event_name == 'pull_request' &&
-<<<<<<< Updated upstream
-      needs.discover.outputs.tier2 != '[]' &&
-      needs.discover.outputs.tier2 != ''
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    strategy:
-      fail-fast: false
-      matrix:
-        recipe: ${{ fromJSON(needs.discover.outputs.tier2) }}
-    steps:
-      - name: Checkout Code
-        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
-        with:
-          persist-credentials: false
-
-      - name: Load versions
-        id: versions
-        uses: ./.github/actions/load-versions
-
-      - name: Run KWOK test
-        uses: ./.github/actions/kwok-test
-        with:
-          recipe: ${{ matrix.recipe }}
-          go_version: ${{ steps.versions.outputs.go }}
-          goreleaser_version: ${{ steps.versions.outputs.goreleaser }}
-          kind_version: ${{ steps.versions.outputs.kind }}
-          helm_version: ${{ steps.versions.outputs.helm }}
-          kwok_version: ${{ steps.versions.outputs.kwok }}
-          kubectl_version: ${{ steps.versions.outputs.kubectl }}
-          yq_version: ${{ steps.versions.outputs.yq }}
-          flux_version: ${{ steps.versions.outputs.flux }}
-          chainsaw_version: ${{ steps.versions.outputs.chainsaw }}
-          chainsaw_sha256: ${{ steps.versions.outputs.chainsaw_sha256_linux_amd64 }}
-          kind_node_image: ${{ steps.versions.outputs.kind_node_image }}
-=======
       needs.discover.outputs.tier2_pairs != '[]' &&
       needs.discover.outputs.tier2_pairs != ''
     uses: ./.github/workflows/kwok-test-run.yaml
     with:
       pairs: ${{ needs.discover.outputs.tier2_pairs }}
->>>>>>> Stashed changes
 
   # ── Tier 3: full matrix (push to main + nightly schedule) ──
   # The recipe × deployer cross-product can exceed GitHub's 256-config cap, so
@@ -478,5 +396,4 @@ jobs:
             exit 1
           fi
 
-          echo "All recipe validations passed" >> $GITHUB_STEP_SUMMARY
-          
\ No newline at end of file
+          echo "All recipe validations passed" >> $GITHUB_STEP_SUMMARY
\ No newline at end of file
diff --git a/.github/workflows/kwok-test-run.yaml b/.github/workflows/kwok-test-run.yaml
index 92f4735ae..e63a02f05 100644
--- a/.github/workflows/kwok-test-run.yaml
+++ b/.github/workflows/kwok-test-run.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+﻿# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From f2a8c8f6e651d4720c44d39cb15aba0dec0b840b Mon Sep 17 00:00:00 2001
From: Mohit <mohityadav8@users.noreply.github.com>
Date: Mon, 29 Jun 2026 16:05:09 +0530
Subject: [PATCH 3/6] feat(ci): consolidate KWOK tier workflows into reusable
 runner

---
 .github/workflows/kwok-recipes.yaml  | 3 ++-
 .github/workflows/kwok-test-run.yaml | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/kwok-recipes.yaml b/.github/workflows/kwok-recipes.yaml
index af9918e94..b34cfbbc8 100644
--- a/.github/workflows/kwok-recipes.yaml
+++ b/.github/workflows/kwok-recipes.yaml
@@ -396,4 +396,5 @@ jobs:
             exit 1
           fi
 
-          echo "All recipe validations passed" >> $GITHUB_STEP_SUMMARY
\ No newline at end of file
+          echo "All recipe validations passed" >> $GITHUB_STEP_SUMMARY
+          
\ No newline at end of file
diff --git a/.github/workflows/kwok-test-run.yaml b/.github/workflows/kwok-test-run.yaml
index e63a02f05..e20349d1b 100644
--- a/.github/workflows/kwok-test-run.yaml
+++ b/.github/workflows/kwok-test-run.yaml
@@ -70,4 +70,3 @@ jobs:
           chainsaw_version: ${{ steps.versions.outputs.chainsaw }}
           chainsaw_sha256: ${{ steps.versions.outputs.chainsaw_sha256_linux_amd64 }}
           kind_node_image: ${{ steps.versions.outputs.kind_node_image }}
-          
\ No newline at end of file

From b0d8ef02c9475f2bd5a67cc5055072f49bfc561c Mon Sep 17 00:00:00 2001
From: Mohit <mohityadav8@users.noreply.github.com>
Date: Tue, 30 Jun 2026 12:50:48 +0530
Subject: [PATCH 4/6] fix(ci): remove UTF-8 BOM and normalize line endings in
 kwok workflow files

---
 .github/workflows/kwok-recipes.yaml  | 2 +-
 .github/workflows/kwok-test-run.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/kwok-recipes.yaml b/.github/workflows/kwok-recipes.yaml
index f785ce00c..bebbd8320 100644
--- a/.github/workflows/kwok-recipes.yaml
+++ b/.github/workflows/kwok-recipes.yaml
@@ -1,4 +1,4 @@
-﻿# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/.github/workflows/kwok-test-run.yaml b/.github/workflows/kwok-test-run.yaml
index b921afa03..0313ba4f2 100644
--- a/.github/workflows/kwok-test-run.yaml
+++ b/.github/workflows/kwok-test-run.yaml
@@ -1,4 +1,4 @@
-﻿# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 79bd45cceeca460a2ef07ef8d9e42d8739a9d4e8 Mon Sep 17 00:00:00 2001
From: Mohit <mohityadav8@users.noreply.github.com>
Date: Tue, 30 Jun 2026 13:57:13 +0530
Subject: [PATCH 5/6] fix(ci): hard-fail Tier 1 matrix before exceeding
 256-config cap

---
 .github/workflows/kwok-recipes.yaml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/kwok-recipes.yaml b/.github/workflows/kwok-recipes.yaml
index bebbd8320..8a8daab49 100644
--- a/.github/workflows/kwok-recipes.yaml
+++ b/.github/workflows/kwok-recipes.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+﻿# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -266,7 +266,10 @@ jobs:
           # Guard: Tier 1 is expected to stay well under 256 (no batching needed).
           # Warn early if organic growth is approaching the limit.
           tier1_pair_count=$(echo "$tier1_pairs" | jq 'length')
-          if (( tier1_pair_count > 200 )); then
+          if (( tier1_pair_count > 256 )); then
+            echo "::error::Tier 1 has ${tier1_pair_count} pairs (>256) — add batching before passing this to kwok-test-run.yaml"
+            exit 1
+          elif (( tier1_pair_count > 200 )); then
             echo "::warning::Tier 1 has ${tier1_pair_count} pairs (>200) — consider adding batching before it reaches 256"
           fi
 

From e234dfd81af9a46f61ce473d9ec25c7a5c7912db Mon Sep 17 00:00:00 2001
From: Mohit <mohityadav8@users.noreply.github.com>
Date: Thu, 2 Jul 2026 00:33:31 +0530
Subject: [PATCH 6/6] fix(ci): hard-fail Tier 1 matrix before exceeding
 256-config cap

---
 .github/workflows/kwok-recipes.yaml     |  3 ++-
 docs/design/003-scaling-recipe-tests.md | 30 ++++++++++++++++++-------
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/kwok-recipes.yaml b/.github/workflows/kwok-recipes.yaml
index 8a8daab49..0ec768486 100644
--- a/.github/workflows/kwok-recipes.yaml
+++ b/.github/workflows/kwok-recipes.yaml
@@ -299,7 +299,7 @@ jobs:
 
           tier3_batches=$(jq -cn \
             --argjson recipes "$tier3" \
-            --argjson deployers "$deployers" \
+            --argjson deployers "${DEPLOYERS}" \
             --argjson size "$TIER3_BATCH_SIZE" '
               [ $recipes[] as $r | $deployers[] as $d | {recipe: $r, deployer: $d} ]
               | [ range(0; length; $size) as $i
@@ -418,3 +418,4 @@ jobs:
           fi
 
           echo "All recipe validations passed" >> $GITHUB_STEP_SUMMARY
+          
\ No newline at end of file
diff --git a/docs/design/003-scaling-recipe-tests.md b/docs/design/003-scaling-recipe-tests.md
index 076aef3f1..98865167e 100644
--- a/docs/design/003-scaling-recipe-tests.md
+++ b/docs/design/003-scaling-recipe-tests.md
@@ -2,12 +2,16 @@
 
 ## Status
 
+<<<<<<< Updated upstream
 **Accepted, implemented** — 2026-03-18
 
 The tiered KWOK scaling strategy has shipped: tier discovery and per-overlay
 parallel jobs run in `.github/workflows/kwok-recipes.yaml`, with the Tier 3
 shard lane in `.github/workflows/kwok-tier3-shard.yaml`, backed by the
 `kwok/scripts/` and `kwok/profiles/` machinery.
+=======
+Accepted | Updated (workflow consolidation, #1172)
+>>>>>>> Stashed changes
 
 ## Scope
 
@@ -150,7 +154,7 @@ To stay under the limit without sacrificing coverage, the `discover` job builds
 the `{recipe, deployer}` pairs and chunks them into batches of `TIER3_BATCH_SIZE`
 (200, with headroom under 256), emitting a `tier3_batches` output of
 `{id, pairs}` objects. `test-tier3` is a thin matrix over those batches that fans
-each one out to the **`kwok-tier3-shard.yaml`** reusable workflow, which expands
+each one out to the **`kwok-test-run.yaml`** reusable workflow, which expands
 its batch as its own (≤ 256) matrix. Batch count grows automatically as overlays
 are added — no manual job duplication. A fail-closed guard in `discover` errors
 loudly if `TIER3_BATCH_SIZE` is ever raised past 256, rather than resurfacing
@@ -163,19 +167,18 @@ and a summary. Tier 3 fans out to a reusable shard workflow (see above):
 
 ```
 discover
-├── tier1: [eks, aks, gke, kind, ...]               # generic only
-├── tier2: [h100-eks-ubuntu-training, ...]           # diff-affected only
-├── tier3: [all 72+]                                 # full overlay set
-└── tier3_batches: [{id, pairs:[{recipe,deployer}]}] # cross-product, chunked ≤256
+├── tier1_pairs: [{recipe,deployer}]                 # generic overlays × all deployers
+├── tier2_pairs: [{recipe, deployer:"helm"}]         # diff-affected overlays, helm-only
+└── tier3_batches: [{id, pairs:[{recipe,deployer}]}] # all overlays × all deployers, chunked ≤256
 
 test-tier1  (PR + push to main)
-  matrix: tier1 × deployer
+  uses kwok-test-run.yaml  pairs=tier1_pairs
 
 test-tier2  (PR only, skip if empty)
-  matrix: tier2 × deployer
+  uses kwok-test-run.yaml  pairs=tier2_pairs  [helm-only]
 
 test-tier3  (push to main + schedule, skip on PR)
-  matrix: tier3_batches → uses kwok-tier3-shard.yaml (matrix: pairs)
+  matrix: tier3_batches → uses kwok-test-run.yaml (matrix: pairs)
 
 summary
   needs: [test-tier1, test-tier2, test-tier3]
@@ -194,6 +197,17 @@ The `summary` job gates on Tier 1 and Tier 2 for PRs, and on all three tiers for
 pushes to `main`. This avoids branch protection brittleness when the overlay set
 changes.
 
+### Tier 2 deployer coverage
+
+Tier 2 is **helm-only** by deliberate policy (#1172). The old Tier 2 passed no
+deployer argument to the action, which already defaulted to `helm` — this makes
+that behavior explicit. Full deployer coverage (all deployers × all overlays)
+runs in Tier 3 on every push to `main` and on the nightly schedule.
+
+To add full deployer coverage to Tier 2, change `tier2_pairs` in the `discover`
+classify step to cross the recipe list with the full `DEPLOYERS` array (same
+pattern as `tier1_pairs`).
+
 ## Consequences
 
 ### Positive