SemiAnalysisAI · arygupt · May 27, 2026 · May 27, 2026 · May 28, 2026 · May 28, 2026
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -3171,3 +3171,9 @@
   description:
     - "Validates measured-power aggregation pipeline (PR #1558) on both NVIDIA (H200) and AMD (MI355X) hardware — different SMI tools (nvidia-smi vs amd-smi), different CSV schemas (power.draw [W] vs socket_power), same aggregator. No config change. Entry intentionally kept past merge so run-sweep produces canonical agg JSONs with avg_power_w + joules_per_output_token on main for both vendors, seeding the dashboard's day-zero data."
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1558
+
+- config-keys:
+    - dsv4-fp4-gb300-dynamo-sglang
+  description:
+    - "Smoke run validating multinode measured-power aggregation (PR #1574). No config change; entry exists to trigger a sweep that produces the first multinode agg JSON with avg_power_w + joules_per_*_token populated from per-node srt-slurm perfmon CSVs. Validates per-source GPU-id namespacing in aggregate_power.py (without it, 14 nodes × 4 GPUs would report num_gpus=4 instead of 56) and the GPU_METRICS_CSV_GLOB env var bridge in process_result.py. Only the gb300-cw runner has the perfmon launcher changes; any gb300-nv runs in the sweep will succeed normally without power fields, which the dashboard handles gracefully (chart gates on field presence)."
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1574
diff --git a/runners/launch_gb300-cw.sh b/runners/launch_gb300-cw.sh
@@ -12,8 +12,13 @@ if [[ $MODEL_PREFIX == "dsv4" && $PRECISION == "fp4" ]]; then
     export MODEL_PATH="/mnt/vast/models/dsv4"
 
     if [[ $FRAMEWORK == "dynamo-sglang" ]]; then
-        SRT_SLURM_RECIPES_REPO="https://github.com/NVIDIA/srt-slurm.git"
-        SRT_SLURM_RECIPES_REF="main"
+        # Pinned to our SemiAnalysisAI fork of NVIDIA/srt-slurm to pick up
+        # PR #35 (per-node nvidia-smi monitoring during the benchmark sweep)
+        # ahead of its upstream merge. The branch tracks PR #35's head SHA:
+        # to bump, re-fetch refs/pull/35/head from NVIDIA/srt-slurm and force-
+        # push to SemiAnalysisAI/srt-slurm:feat/inferencex-perfmon.
+        SRT_SLURM_RECIPES_REPO="https://github.com/SemiAnalysisAI/srt-slurm.git"
+        SRT_SLURM_RECIPES_REF="feat/inferencex-perfmon"
         SRT_RECIPE_SRC="$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4"
         SRT_RECIPE_DST="recipes/sglang/deepseek-v4"
     elif [[ $FRAMEWORK == "dynamo-vllm" ]]; then
@@ -106,6 +111,30 @@ git checkout "$SRT_SLURM_RECIPES_REF"
 mkdir -p "$SRT_RECIPE_DST"
 cp -rT "$SRT_RECIPE_SRC" "$SRT_RECIPE_DST"
 
+# Enable per-node GPU perfmon (PR #35) on every overlaid recipe. `monitoring`
+# is a top-level SrtConfig field and defaults to None, so without this the
+# orchestrator's _start_perf_monitor short-circuits and no perf_samples_*.csv
+# are ever written — multinode measured-power aggregation would silently
+# skip. Idempotent: skips recipes that already declare `monitoring:`.
+#
+# CRITICAL: use `find` recursively, not a flat `*.yaml` glob. Recipes live
+# in $SRT_RECIPE_DST/<workload>/*.yaml (e.g. .../8k1k/*.yaml) — a flat glob
+# matches zero files, the loop runs zero times, no recipe gets monitoring,
+# and perfmon never spawns. PR #1574's first real sweep (#26548110246) hit
+# exactly this: completed "success" with no power data because the glob
+# matched nothing and the failure was silent end-to-end.
+INJECTED_COUNT=0
+while IFS= read -r recipe; do
+    if ! grep -q '^monitoring:' "$recipe"; then
+        printf '\nmonitoring:\n  enabled: true\n  sample_interval: 1.0\n' >> "$recipe"
+        echo "[perfmon] enabled monitoring in recipe: $recipe"
+        INJECTED_COUNT=$((INJECTED_COUNT + 1))
+    fi
+done < <(find "$SRT_RECIPE_DST" -type f -name '*.yaml')
+if [ "$INJECTED_COUNT" -eq 0 ]; then
+    echo "[perfmon] WARNING: zero recipes received monitoring injection under $SRT_RECIPE_DST. Either every recipe already had it, or the directory layout changed — power data will be MISSING from this run." >&2
+fi
+
 echo "Installing srtctl..."
 # CRITICAL — uv install location.
 # Runner pod is x86 but compute nodes are aarch64, and /mnt/home is
@@ -279,6 +308,25 @@ else
     echo "Warning: Logs directory not found at $LOGS_DIR"
 fi
 
+# Hand the per-node perfmon CSVs off to the downstream "Process result" step
+# in benchmark-multinode-tmpl.yml. srt-slurm's perfmon (PR #35) writes
+# perf_samples_{node}.csv straight into $LOGS_DIR on the host. process_result.py
+# already invokes aggregate_power.run() inline; teaching it to read
+# GPU_METRICS_CSV_GLOB lets utils/aggregate_power.py do the multi-CSV
+# aggregation (each agg JSON gets avg_power_w / joules_per_*_token patched in
+# place). Use an absolute glob because process_result.py runs from
+# $GITHUB_WORKSPACE, not from this srt-slurm checkout.
+if [ -d "$LOGS_DIR" ]; then
+    perf_glob_dir="$(pwd)/$LOGS_DIR"
+    perf_csv_count=$(ls "$perf_glob_dir"/perf_samples_*.csv 2>/dev/null | wc -l | tr -d ' ')
+    if [ "$perf_csv_count" -gt 0 ]; then
+        echo "[perfmon] Found $perf_csv_count per-node perf_samples_*.csv under $perf_glob_dir/"
+        echo "GPU_METRICS_CSV_GLOB=$perf_glob_dir/perf_samples_*.csv" >> "$GITHUB_ENV"
+    else
+        echo "[perfmon] WARNING: monitoring enabled but no perf_samples_*.csv found in $perf_glob_dir — measured power aggregation will be skipped"
+    fi
+fi
+
 if [[ "${EVAL_ONLY:-false}" != "true" ]]; then
     if [ ! -d "$LOGS_DIR" ]; then
         exit 1