From 32beed6afefeff28677bc66a3d4c1d9d89d45314 Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Mon, 18 May 2026 10:43:35 -0700
Subject: [PATCH] docker-tag-monitor: ask @claude to read upstream release
notes
Add a pre-flight research block to the auto-generated issue body so that
Claude reads vLLM / SGLang release notes between the current and new tag
before opening a PR. Prior runs missed two specific failure modes:
1. Tag-suffix convention changes (e.g. vLLM v0.21.0 promoted CUDA 13
to the default build, so the bare v0.21.0 tag *is* the cu13 image
and no v0.21.0-cu13 tag exists -- mechanical suffix reuse would
point at a 404).
2. CLI flag deprecations or removals (e.g. vLLM removed
`--disable-log-requests`; passing it now errors at startup and
only surfaces during the e2e run).
The block tells Claude to fetch release notes, confirm the chosen tag
exists on Docker Hub, grep the repo for launch flags actually used with
this image, and either fix in the same PR or skip the config-key and
explain in the wrap-up comment.
---
.github/workflows/docker-tag-monitor.yml | 57 +++++++++++++++++++++++-
1 file changed, 56 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/docker-tag-monitor.yml b/.github/workflows/docker-tag-monitor.yml
index 5bb3c863d..aa58cf597 100644
--- a/.github/workflows/docker-tag-monitor.yml
+++ b/.github/workflows/docker-tag-monitor.yml
@@ -179,6 +179,42 @@ jobs:
fi
ALLOWED_SKUS=$(cat "$ALLOWED_SKUS_FILE")
+ # Snapshot per-config submission staleness from the production frontend.
+ # Helps Claude prioritize bumps for configs that haven't been refreshed in a long time.
+ STALENESS_TABLE=$(mktemp)
+ if curl -sf --max-time 15 --compressed "https://inferencex.semianalysis.com/api/v1/submissions" -o /tmp/sub.json; then
+ jq -r --arg today "$(date -u +%Y-%m-%d)" '
+ def days_between($a; $b):
+ (($a | strptime("%Y-%m-%d") | mktime) as $ta
+ | ($b | strptime("%Y-%m-%d") | mktime) as $tb
+ | (($ta - $tb) / 86400) | floor);
+ [
+ "| Config | Days since |",
+ "|--------|------------|"
+ ]
+ + (
+ (.summary // [])
+ | map(select(.is_multinode == false and (.model | startswith("llama") | not)))
+ | group_by([.model, .hardware, .framework, .precision, .spec_method, .disagg])
+ | map({
+ key: (
+ .[0].model + "-" + .[0].precision + "-" + .[0].hardware + "-" + .[0].framework
+ + (if .[0].spec_method != "none" then "-mtp" else "" end)
+ + (if .[0].disagg then "-disagg" else "" end)
+ ),
+ last_date: ([.[].date] | max)
+ })
+ | map(. + {days_since: days_between($today; .last_date)})
+ | sort_by(-.days_since)
+ | .[0:20]
+ | map("| `\(.key)` | \(.days_since) |")
+ )
+ | .[]
+ ' /tmp/sub.json > "$STALENESS_TABLE"
+ else
+ echo "_Could not reach https://inferencex.semianalysis.com/api/v1/submissions — submission staleness data not available._" > "$STALENESS_TABLE"
+ fi
+
# Build issue body and write to file
BODY_FILE=$(mktemp)
{
@@ -207,6 +243,17 @@ jobs:
echo "**Allowed SKUs for this run:** _none — skip PR creation and post a comment explaining the runner shortage._"
fi
echo ""
+ echo "### Submission Staleness (single-node configs)"
+ echo ""
+ echo "_Source: https://inferencex.semianalysis.com/api/v1/submissions at $(date -u +%Y-%m-%dT%H:%M:%SZ). Sorted oldest-first — older = higher priority for refresh._"
+ echo ""
+ echo ""
+ echo "Show staleness table
"
+ echo ""
+ cat "$STALENESS_TABLE"
+ echo ""
+ echo " "
+ echo ""
echo "---"
echo ""
echo "@claude Please update the configurations:"
@@ -215,6 +262,14 @@ jobs:
echo "2. Add entries to \`perf-changelog.yaml\` documenting the version changes"
echo "3. For each eligible config-key, push a branch and actually open a PR — do not stop at the \"Create a pull request for ...\" remote hint that \`git push\` prints. Run \`gh pr create\` (or the equivalent MCP tool) and verify the returned PR URL. Link every PR back to this issue in a comment."
echo ""
+ echo "**Pre-flight research (required before opening any PR):** For each image being bumped, read the upstream release notes for every version between the current tag and the new one — vLLM at \`https://github.com/vllm-project/vllm/releases\` (or \`gh api repos/vllm-project/vllm/releases\`), SGLang at \`https://github.com/sgl-project/sglang/releases\`. You are looking for two specific failure modes that have bitten prior runs:"
+ echo ""
+ echo "1. **Suffix convention changes.** The default CUDA/ROCm build can shift between versions, which changes which Docker tag to pick. Concrete example: vLLM v0.21.0 promoted CUDA 13 to the default build, so the bare \`v0.21.0\` tag *is* the cu13 image and no \`v0.21.0-cu13\` tag exists — mechanically reusing the old suffix would point at a 404. Before settling on a tag, confirm it actually exists on Docker Hub (\`curl -sf https://hub.docker.com/v2/repositories//tags//\`) and that its build matches the runner's accelerator. If the convention shifted, use the new correct tag and call it out explicitly in the PR body."
+ echo ""
+ echo "2. **CLI flag deprecations or removals.** Server flags get removed between minors and the container exits with an error on startup, so this only surfaces during the e2e run. Concrete example: vLLM removed \`--disable-log-requests\` (silent-by-default; passing it now errors). Before opening the PR, \`grep -rn\` the repo for the launch flags actually passed to this image (check launch scripts, sbatch recipes, and any per-config templates — not just the master config). For every flag in use, check the release notes between current and new version for deprecations/removals/renames. If a flag was removed or renamed, fix it in the same PR and add a perf-changelog entry noting the flag change. If a flag has no replacement and is load-bearing, skip the config-key and explain in your wrap-up comment."
+ echo ""
+ echo "List the release-notes URLs you consulted in the PR body so reviewers can audit the research. PRs that bump a tag without evidence of this check will be rejected."
+ echo ""
echo "**Required PR label:** Every PR you open from this issue MUST carry the \`full-sweep-enabled\` label. Apply it at creation time via \`gh pr create --label full-sweep-enabled\` (or add it immediately after with \`gh pr edit --add-label full-sweep-enabled\`). Do not skip this — downstream automation keys off the label."
echo ""
echo "**PR title / commit message formatting:** Multi-line titles and bodies MUST use a heredoc, not \`\\n\` escapes and not \`\$'...'\` ANSI-C quoting. A prior run produced commits literally starting with \`\$\` and containing \`\\n\\n\` as text because of mis-quoted ANSI-C strings. Use this pattern instead:"
@@ -254,7 +309,7 @@ jobs:
echo "If Docker Hub lists multiple variants for the same base version (e.g. \`cu128\` vs \`cu130\`, \`rocm70\` vs \`rocm72\`), pick the variant whose suffix matches what the config-key's current image entry already uses — don't switch CUDA/ROCm minor versions in this update."
} > "$BODY_FILE"
- rm -f "$RUNNERS_TABLE" "$ALLOWED_SKUS_FILE" /tmp/ci.json
+ rm -f "$RUNNERS_TABLE" "$ALLOWED_SKUS_FILE" "$STALENESS_TABLE" /tmp/ci.json /tmp/sub.json
echo "body_file=$BODY_FILE" >> "$GITHUB_OUTPUT"