From 42fdf80787e39c09797bc085cfe722e200d7bd7e Mon Sep 17 00:00:00 2001 From: Aryan Date: Tue, 26 May 2026 18:02:11 -0700 Subject: [PATCH] feat(inference): add "Measured J per Token" metric (input + output denominator) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a third option to the gated "Measured Energy" dropdown group: - Measured J per Token (J/total-token: system energy / (input + output)) Distinct from the existing "Measured J per Output Token" which divides only by output tokens (treating the prompt as free). For workload-shape-fair comparisons — especially with prompt-heavy workloads like 8k/1k where J/output-token is ~9x higher than J/total-token despite the same energy. Wires the new field through the same plumbing as the existing measured- power metrics: - packages/constants/src/metric-keys.ts: register joules_per_total_token - packages/app/src/lib/benchmark-transform.ts: pass through (left undefined for legacy rows) - packages/app/src/components/inference/types.ts: extend AggDataEntry, InferenceData, YAxisMetricKey, ChartDefinition - packages/app/src/lib/chart-utils.ts: extend Y_AXIS_METRICS, createChartDataPoint, roofline union, markRooflinePoints - packages/app/src/components/inference/inference-chart-config.json: add y_measuredJPerTotalToken to both chartTypes (roofline lower_right / lower_left) - packages/app/src/components/inference/ui/ChartControls.tsx: add to the Measured Energy gated group Companion runner-side change: semianalysisai/InferenceX@363e49c4 emits joules_per_total_token in every agg_.json. Tests: +3 covering the new field (presence, parallel independence from J/output-token, graceful absence on legacy rows). 1944/1944 vitest pass. --- .../inference/inference-chart-config.json | 8 ++++++ .../app/src/components/inference/types.ts | 11 ++++++-- .../components/inference/ui/ChartControls.tsx | 2 +- packages/app/src/lib/benchmark-transform.ts | 1 + packages/app/src/lib/chart-utils.test.ts | 27 +++++++++++++++++++ packages/app/src/lib/chart-utils.ts | 13 +++++++-- packages/constants/src/metric-keys.ts | 5 +++- 7 files changed, 61 insertions(+), 6 deletions(-) diff --git a/packages/app/src/components/inference/inference-chart-config.json b/packages/app/src/components/inference/inference-chart-config.json index 1ca1c440..9581fce2 100644 --- a/packages/app/src/components/inference/inference-chart-config.json +++ b/packages/app/src/components/inference/inference-chart-config.json @@ -95,6 +95,10 @@ "y_measuredJPerOutputToken_label": "Measured J per Output Token (J/tok)", "y_measuredJPerOutputToken_title": "Measured Joules per Output Token", "y_measuredJPerOutputToken_roofline": "lower_right", + "y_measuredJPerTotalToken": "measuredJPerTotalToken.y", + "y_measuredJPerTotalToken_label": "Measured J per Token (J/tok)", + "y_measuredJPerTotalToken_title": "Measured Joules per Token (input + output)", + "y_measuredJPerTotalToken_roofline": "lower_right", "y_cost_limit": 5, "y_latency_limit": 60 }, @@ -193,6 +197,10 @@ "y_measuredJPerOutputToken_label": "Measured J per Output Token (J/tok)", "y_measuredJPerOutputToken_title": "Measured Joules per Output Token", "y_measuredJPerOutputToken_roofline": "lower_left", + "y_measuredJPerTotalToken": "measuredJPerTotalToken.y", + "y_measuredJPerTotalToken_label": "Measured J per Token (J/tok)", + "y_measuredJPerTotalToken_title": "Measured Joules per Token (input + output)", + "y_measuredJPerTotalToken_roofline": "lower_left", "y_cost_limit": 5, "y_latency_limit": 60 } diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts index 83926fe1..ab338b87 100644 --- a/packages/app/src/components/inference/types.ts +++ b/packages/app/src/components/inference/types.ts @@ -68,9 +68,10 @@ export interface AggDataEntry { std_e2el: number; p99_e2el: number; // Measured GPU telemetry (emitted by runner's aggregate_power.py). - // Optional because historical runs predate the field. + // Optional because historical runs predate the fields. avg_power_w?: number; joules_per_output_token?: number; + joules_per_total_token?: number; disagg: boolean; num_prefill_gpu: number; num_decode_gpu: number; @@ -162,6 +163,7 @@ export interface InferenceData extends Partial { expect(point.measuredAvgPower).toBeDefined(); expect(point.measuredAvgPower!.y).toBe(0); }); + + it('emits measuredJPerTotalToken when joules_per_total_token is present', () => { + const e = entry({ joules_per_total_token: 0.93 }); + const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100'); + expect(point.measuredJPerTotalToken).toBeDefined(); + expect(point.measuredJPerTotalToken!.y).toBe(0.93); + expect(point.measuredJPerTotalToken!.roof).toBe(false); + }); + + it('emits J/output and J/total independently — different denominators', () => { + // 8k1k workload: J/output ≈ 9 × J/total (input is ~8x output, so output/total ≈ 1/9). + const e = entry({ joules_per_output_token: 2.04, joules_per_total_token: 0.23 }); + const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100'); + expect(point.measuredJPerOutputToken!.y).toBe(2.04); + expect(point.measuredJPerTotalToken!.y).toBe(0.23); + }); + + it('omits measuredJPerTotalToken on rows that predate the field', () => { + // Rows ingested before joules_per_total_token was added still have avg_power_w + // and joules_per_output_token. The new field must be absent (not 0) so the + // chart correctly drops them from the J/total view rather than plotting fake data. + const e = entry({ avg_power_w: 458, joules_per_output_token: 2.04 }); + const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100'); + expect(point.measuredAvgPower).toBeDefined(); + expect(point.measuredJPerOutputToken).toBeDefined(); + expect(point.measuredJPerTotalToken).toBeUndefined(); + }); }); // =========================================================================== diff --git a/packages/app/src/lib/chart-utils.ts b/packages/app/src/lib/chart-utils.ts index c7b6e04b..980b4c7b 100644 --- a/packages/app/src/lib/chart-utils.ts +++ b/packages/app/src/lib/chart-utils.ts @@ -152,6 +152,7 @@ export const Y_AXIS_METRICS = [ // distinct from the spec-sheet TDP-derived jTotal/jOutput/jInput above). 'y_measuredAvgPower', 'y_measuredJPerOutputToken', + 'y_measuredJPerTotalToken', ] as const; export type YAxisMetric = (typeof Y_AXIS_METRICS)[number]; @@ -403,6 +404,9 @@ export function createChartDataPoint( ...(typeof entry.joules_per_output_token === 'number' ? { measuredJPerOutputToken: { y: entry.joules_per_output_token, roof: false } } : {}), + ...(typeof entry.joules_per_total_token === 'number' + ? { measuredJPerTotalToken: { y: entry.joules_per_total_token, roof: false } } + : {}), }; } @@ -565,7 +569,8 @@ export const calculateRoofline = ( | `jOutput.y` | `jInput.y` | `measuredAvgPower.y` - | `measuredJPerOutputToken.y`, + | `measuredJPerOutputToken.y` + | `measuredJPerTotalToken.y`, rooflineDirection: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right', ): InferenceData[] => { const pointsForRoofline = points.map((p) => { @@ -637,7 +642,8 @@ export function computeAllRooflines( | `jOutput.y` | `jInput.y` | `measuredAvgPower.y` - | `measuredJPerOutputToken.y`, + | `measuredJPerOutputToken.y` + | `measuredJPerTotalToken.y`, rooflineDirection, ); } @@ -683,6 +689,7 @@ export function markRooflinePoints( if (newPoint.jInput) newPoint.jInput.roof = false; if (newPoint.measuredAvgPower) newPoint.measuredAvgPower.roof = false; if (newPoint.measuredJPerOutputToken) newPoint.measuredJPerOutputToken.roof = false; + if (newPoint.measuredJPerTotalToken) newPoint.measuredJPerTotalToken.roof = false; for (const chartDefYKey of Y_AXIS_METRICS) { const rooflinePoints = computedRooflines[hwKey]?.[chartDefYKey]; @@ -749,6 +756,8 @@ export function markRooflinePoints( newPoint.measuredJPerOutputToken ) { newPoint.measuredJPerOutputToken.roof = onCurrentRoofline; + } else if (chartDefYKey === 'y_measuredJPerTotalToken' && newPoint.measuredJPerTotalToken) { + newPoint.measuredJPerTotalToken.roof = onCurrentRoofline; } } finalProcessedData.push(newPoint); diff --git a/packages/constants/src/metric-keys.ts b/packages/constants/src/metric-keys.ts index bc2a5e65..037d7df4 100644 --- a/packages/constants/src/metric-keys.ts +++ b/packages/constants/src/metric-keys.ts @@ -44,8 +44,11 @@ export const METRIC_KEYS = new Set([ 'p99.9_intvty', 'std_intvty', // measured power / energy (emitted by runner's aggregate_power.py) - // avg_power_w: mean per-GPU draw (W) during the load window + // avg_power_w: mean per-GPU draw (W) during the load window // joules_per_output_token: avg_power_w * num_gpus * duration / total_output_tokens + // joules_per_total_token: avg_power_w * num_gpus * duration / (total_input + total_output) + // — workload-shape-fair view that doesn't treat prompt as free 'avg_power_w', 'joules_per_output_token', + 'joules_per_total_token', ]);