diff --git a/.github/workflows/docker-tag-monitor.yml b/.github/workflows/docker-tag-monitor.yml
index 5bb3c863d..aa58cf597 100644
--- a/.github/workflows/docker-tag-monitor.yml
+++ b/.github/workflows/docker-tag-monitor.yml
@@ -179,6 +179,42 @@ jobs:
fi
ALLOWED_SKUS=$(cat "$ALLOWED_SKUS_FILE")
+ # Snapshot per-config submission staleness from the production frontend.
+ # Helps Claude prioritize bumps for configs that haven't been refreshed in a long time.
+ STALENESS_TABLE=$(mktemp)
+ if curl -sf --max-time 15 --compressed "https://inferencex.semianalysis.com/api/v1/submissions" -o /tmp/sub.json; then
+ jq -r --arg today "$(date -u +%Y-%m-%d)" '
+ def days_between($a; $b):
+ (($a | strptime("%Y-%m-%d") | mktime) as $ta
+ | ($b | strptime("%Y-%m-%d") | mktime) as $tb
+ | (($ta - $tb) / 86400) | floor);
+ [
+ "| Config | Days since |",
+ "|--------|------------|"
+ ]
+ + (
+ (.summary // [])
+ | map(select(.is_multinode == false and (.model | startswith("llama") | not)))
+ | group_by([.model, .hardware, .framework, .precision, .spec_method, .disagg])
+ | map({
+ key: (
+ .[0].model + "-" + .[0].precision + "-" + .[0].hardware + "-" + .[0].framework
+ + (if .[0].spec_method != "none" then "-mtp" else "" end)
+ + (if .[0].disagg then "-disagg" else "" end)
+ ),
+ last_date: ([.[].date] | max)
+ })
+ | map(. + {days_since: days_between($today; .last_date)})
+ | sort_by(-.days_since)
+ | .[0:20]
+ | map("| `\(.key)` | \(.days_since) |")
+ )
+ | .[]
+ ' /tmp/sub.json > "$STALENESS_TABLE"
+ else
+ echo "_Could not reach https://inferencex.semianalysis.com/api/v1/submissions — submission staleness data not available._" > "$STALENESS_TABLE"
+ fi
+
# Build issue body and write to file
BODY_FILE=$(mktemp)
{
@@ -207,6 +243,17 @@ jobs:
echo "**Allowed SKUs for this run:** _none — skip PR creation and post a comment explaining the runner shortage._"
fi
echo ""
+ echo "### Submission Staleness (single-node configs)"
+ echo ""
+ echo "_Source: https://inferencex.semianalysis.com/api/v1/submissions at $(date -u +%Y-%m-%dT%H:%M:%SZ). Sorted oldest-first — older = higher priority for refresh._"
+ echo ""
+ echo ""
+ echo "Show staleness table
"
+ echo ""
+ cat "$STALENESS_TABLE"
+ echo ""
+ echo " "
+ echo ""
echo "---"
echo ""
echo "@claude Please update the configurations:"
@@ -215,6 +262,14 @@ jobs:
echo "2. Add entries to \`perf-changelog.yaml\` documenting the version changes"
echo "3. For each eligible config-key, push a branch and actually open a PR — do not stop at the \"Create a pull request for ...\" remote hint that \`git push\` prints. Run \`gh pr create\` (or the equivalent MCP tool) and verify the returned PR URL. Link every PR back to this issue in a comment."
echo ""
+ echo "**Pre-flight research (required before opening any PR):** For each image being bumped, read the upstream release notes for every version between the current tag and the new one — vLLM at \`https://github.com/vllm-project/vllm/releases\` (or \`gh api repos/vllm-project/vllm/releases\`), SGLang at \`https://github.com/sgl-project/sglang/releases\`. You are looking for two specific failure modes that have bitten prior runs:"
+ echo ""
+ echo "1. **Suffix convention changes.** The default CUDA/ROCm build can shift between versions, which changes which Docker tag to pick. Concrete example: vLLM v0.21.0 promoted CUDA 13 to the default build, so the bare \`v0.21.0\` tag *is* the cu13 image and no \`v0.21.0-cu13\` tag exists — mechanically reusing the old suffix would point at a 404. Before settling on a tag, confirm it actually exists on Docker Hub (\`curl -sf https://hub.docker.com/v2/repositories//tags//\`) and that its build matches the runner's accelerator. If the convention shifted, use the new correct tag and call it out explicitly in the PR body."
+ echo ""
+ echo "2. **CLI flag deprecations or removals.** Server flags get removed between minors and the container exits with an error on startup, so this only surfaces during the e2e run. Concrete example: vLLM removed \`--disable-log-requests\` (silent-by-default; passing it now errors). Before opening the PR, \`grep -rn\` the repo for the launch flags actually passed to this image (check launch scripts, sbatch recipes, and any per-config templates — not just the master config). For every flag in use, check the release notes between current and new version for deprecations/removals/renames. If a flag was removed or renamed, fix it in the same PR and add a perf-changelog entry noting the flag change. If a flag has no replacement and is load-bearing, skip the config-key and explain in your wrap-up comment."
+ echo ""
+ echo "List the release-notes URLs you consulted in the PR body so reviewers can audit the research. PRs that bump a tag without evidence of this check will be rejected."
+ echo ""
echo "**Required PR label:** Every PR you open from this issue MUST carry the \`full-sweep-enabled\` label. Apply it at creation time via \`gh pr create --label full-sweep-enabled\` (or add it immediately after with \`gh pr edit --add-label full-sweep-enabled\`). Do not skip this — downstream automation keys off the label."
echo ""
echo "**PR title / commit message formatting:** Multi-line titles and bodies MUST use a heredoc, not \`\\n\` escapes and not \`\$'...'\` ANSI-C quoting. A prior run produced commits literally starting with \`\$\` and containing \`\\n\\n\` as text because of mis-quoted ANSI-C strings. Use this pattern instead:"
@@ -254,7 +309,7 @@ jobs:
echo "If Docker Hub lists multiple variants for the same base version (e.g. \`cu128\` vs \`cu130\`, \`rocm70\` vs \`rocm72\`), pick the variant whose suffix matches what the config-key's current image entry already uses — don't switch CUDA/ROCm minor versions in this update."
} > "$BODY_FILE"
- rm -f "$RUNNERS_TABLE" "$ALLOWED_SKUS_FILE" /tmp/ci.json
+ rm -f "$RUNNERS_TABLE" "$ALLOWED_SKUS_FILE" "$STALENESS_TABLE" /tmp/ci.json /tmp/sub.json
echo "body_file=$BODY_FILE" >> "$GITHUB_OUTPUT"