Skip to content

Commit 3a192ab

Browse files
committed
update
1 parent 69c9236 commit 3a192ab

8 files changed

Lines changed: 46 additions & 49 deletions

.github/configs/nvidia-master.yaml

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8186,86 +8186,86 @@ dsv4-fp4-gb300-dynamo-sglang:
81868186
osl: 1024
81878187
search-space:
81888188
# WideEP TP=16 decode: 1p1d-dep4-dep16. 5 nodes (4P + 16D = 20 GPUs).
8189-
- conc-list: [512]
8189+
- conc-list: [8192]
81908190
prefill:
81918191
num-worker: 1
81928192
tp: 4
81938193
ep: 4
81948194
dp-attn: true
81958195
additional-settings:
8196-
- "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/conc512-20.yaml"
8196+
- "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-dep16-5-c8192.yaml"
81978197
decode:
81988198
num-worker: 1
81998199
tp: 16
82008200
ep: 16
82018201
dp-attn: true
8202-
# DP-attn wideep: 1p1d-dep4-dep8. 3 nodes.
8203-
- conc-list: [512]
8202+
# WideEP TP=16 decode: 4p1d-dep4-dep16. 8 nodes.
8203+
- conc-list: [1024]
82048204
prefill:
8205-
num-worker: 1
8205+
num-worker: 4
82068206
tp: 4
82078207
ep: 4
82088208
dp-attn: true
82098209
additional-settings:
8210-
- "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/conc512.yaml"
8210+
- "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb300-4p1d-dep4-dep16-8-c1024.yaml"
82118211
decode:
82128212
num-worker: 1
8213-
tp: 8
8214-
ep: 8
8213+
tp: 16
8214+
ep: 16
82158215
dp-attn: true
8216-
# DP-attn wideep: 2p1d-dep4-dep8. 4 nodes.
8217-
- conc-list: [1024]
8216+
# WideEP TP=16 decode: 8p1d-dep4-dep16. 12 nodes.
8217+
- conc-list: [4096]
82188218
prefill:
8219-
num-worker: 2
8219+
num-worker: 8
82208220
tp: 4
82218221
ep: 4
82228222
dp-attn: true
82238223
additional-settings:
8224-
- "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/conc1024.yaml"
8224+
- "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb300-8p1d-dep4-dep16-12-c4096.yaml"
82258225
decode:
82268226
num-worker: 1
8227-
tp: 8
8228-
ep: 8
8227+
tp: 16
8228+
ep: 16
82298229
dp-attn: true
8230-
# Low concurrency
8230+
# Low concurrency: 1p1d-tp4-tp4. 2 nodes.
82318231
- conc-list: [1]
82328232
prefill:
82338233
num-worker: 1
82348234
tp: 4
82358235
ep: 1
82368236
dp-attn: false
82378237
additional-settings:
8238-
- "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/conc1.yaml"
8238+
- "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb300-1p1d-tp4-tp4-2-c1.yaml"
82398239
decode:
82408240
num-worker: 1
82418241
tp: 4
82428242
ep: 1
82438243
dp-attn: false
8244-
# Mid concurrency
8245-
- conc-list: [2048]
8244+
# Mid concurrency: 10p1d-dep4-dep16. 14 nodes.
8245+
- conc-list: [8192]
82468246
prefill:
8247-
num-worker: 4
8247+
num-worker: 10
82488248
tp: 4
82498249
ep: 4
82508250
dp-attn: true
82518251
additional-settings:
8252-
- "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/conc2048.yaml"
8252+
- "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb300-10p1d-dep4-dep16-14-c8192.yaml"
82538253
decode:
82548254
num-worker: 1
8255-
tp: 8
8256-
ep: 8
8255+
tp: 16
8256+
ep: 16
82578257
dp-attn: true
8258-
# Max concurrency
8259-
- conc-list: [16384]
8258+
# Max concurrency: 12p1d-dep4-dep12. 15 nodes.
8259+
- conc-list: [21504]
82608260
prefill:
8261-
num-worker: 14
8261+
num-worker: 12
82628262
tp: 4
82638263
ep: 4
82648264
dp-attn: true
82658265
additional-settings:
8266-
- "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/conc16384.yaml"
8266+
- "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb300-12p1d-dep4-dep12-15-c21504.yaml"
82678267
decode:
82688268
num-worker: 1
8269-
tp: 16
8270-
ep: 16
8269+
tp: 12
8270+
ep: 12
82718271
dp-attn: true

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/conc2048.yaml renamed to benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-10p1d-dep4-dep16-14-c8192.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: "conc2048"
1+
name: "disagg-gb300-10p1d-dep4-dep16-14-c8192"
22

33
# 8k/1k high-throughput topology for the wideep DSV4-Pro setup.
44
#
@@ -37,7 +37,7 @@ model:
3737
precision: "fp4"
3838

3939
dynamo:
40-
hash: "9d3c913d300eb368cda28b3f98a23a5762621e0d"
40+
hash: "81d0555ee23519cea80a42b4fe824e30368b7300"
4141
install: true
4242

4343
slurm:
@@ -186,6 +186,7 @@ backend:
186186
max-running-requests: 21504
187187
cuda-graph-max-bs: 1280
188188

189+
189190
benchmark:
190191
type: "sa-bench"
191192
isl: 8192

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/conc16384.yaml renamed to benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-12p1d-dep4-dep12-15-c21504.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: "conc16384"
1+
name: "disagg-gb300-12p1d-dep4-dep12-15-c21504"
22

33
# 8k/1k high-throughput topology for the wideep DSV4-Pro setup.
44
#
@@ -37,7 +37,7 @@ model:
3737
precision: "fp4"
3838

3939
dynamo:
40-
hash: "9d3c913d300eb368cda28b3f98a23a5762621e0d"
40+
hash: "81d0555ee23519cea80a42b4fe824e30368b7300"
4141
install: true
4242

4343
slurm:
@@ -175,7 +175,7 @@ backend:
175175
disaggregation-transfer-backend: mooncake
176176
disaggregation-decode-polling-interval: 8
177177

178-
mem-fraction-static: 0.962
178+
mem-fraction-static: 0.94
179179
swa-full-tokens-ratio: 0.056
180180
context-length: 9216
181181
tensor-parallel-size: 12

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/conc512-20.yaml renamed to benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-dep16-5-c8192.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: "conc16384"
1+
name: "disagg-gb300-1p1d-dep4-dep16-5-c8192"
22

33
# 8k/1k high-throughput topology for the wideep DSV4-Pro setup.
44
#
@@ -37,7 +37,7 @@ model:
3737
precision: "fp4"
3838

3939
dynamo:
40-
hash: "9d3c913d300eb368cda28b3f98a23a5762621e0d"
40+
hash: "81d0555ee23519cea80a42b4fe824e30368b7300"
4141
install: true
4242

4343
slurm:
@@ -175,7 +175,7 @@ backend:
175175
disaggregation-transfer-backend: mooncake
176176
disaggregation-decode-polling-interval: 8
177177

178-
mem-fraction-static: 0.962
178+
mem-fraction-static: 0.94
179179
swa-full-tokens-ratio: 0.056
180180
context-length: 9216
181181
tensor-parallel-size: 16

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/conc1.yaml renamed to benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-1p1d-tp4-tp4-2-c1.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: "conc1"
1+
name: "disagg-gb300-1p1d-tp4-tp4-2-c1"
22

33
# 8k/1k high-throughput topology for the wideep DSV4-Pro setup.
44
#
@@ -40,7 +40,7 @@ model:
4040
# rationale. Hash bumped from PR #1213 to track the dynamo-sglang dsv4
4141
# dev branch.
4242
dynamo:
43-
hash: "9d3c913d300eb368cda28b3f98a23a5762621e0d"
43+
hash: "81d0555ee23519cea80a42b4fe824e30368b7300"
4444
install: true
4545

4646
slurm:

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/conc512.yaml renamed to benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-4p1d-dep4-dep16-8-c1024.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: "conc16384"
1+
name: "disagg-gb300-4p1d-dep4-dep16-8-c1024"
22

33
# 8k/1k high-throughput topology for the wideep DSV4-Pro setup.
44
#
@@ -37,7 +37,7 @@ model:
3737
precision: "fp4"
3838

3939
dynamo:
40-
hash: "9d3c913d300eb368cda28b3f98a23a5762621e0d"
40+
hash: "81d0555ee23519cea80a42b4fe824e30368b7300"
4141
install: true
4242

4343
slurm:
@@ -175,7 +175,7 @@ backend:
175175
disaggregation-transfer-backend: mooncake
176176
disaggregation-decode-polling-interval: 8
177177

178-
mem-fraction-static: 0.962
178+
mem-fraction-static: 0.94
179179
swa-full-tokens-ratio: 0.056
180180
context-length: 9216
181181
tensor-parallel-size: 16

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/conc1024.yaml renamed to benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-8p1d-dep4-dep16-12-c4096.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: "conc16384"
1+
name: "disagg-gb300-8p1d-dep4-dep16-12-c4096"
22

33
# 8k/1k high-throughput topology for the wideep DSV4-Pro setup.
44
#
@@ -37,7 +37,7 @@ model:
3737
precision: "fp4"
3838

3939
dynamo:
40-
hash: "9d3c913d300eb368cda28b3f98a23a5762621e0d"
40+
hash: "81d0555ee23519cea80a42b4fe824e30368b7300"
4141
install: true
4242

4343
slurm:
@@ -175,7 +175,7 @@ backend:
175175
disaggregation-transfer-backend: mooncake
176176
disaggregation-decode-polling-interval: 8
177177

178-
mem-fraction-static: 0.962
178+
mem-fraction-static: 0.94
179179
swa-full-tokens-ratio: 0.056
180180
context-length: 9216
181181
tensor-parallel-size: 16

utils/matrix_logic/generate_sweep_configs.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,7 @@ def _max_eval_conc(ie):
115115
)
116116
mn_groups[key].append((i, entry))
117117

118-
for key, entries in mn_groups.items():
119-
# TODO(pr1157): srt-slurm pin (9d75f82) lacks the lm-eval orchestrator path
120-
# (only on sa-submission-q2-2026). Skip eval-only here until the pin is bumped.
121-
if key[:3] == ("deepseek-ai/DeepSeek-V4-Pro", "gb300-cw", "dynamo-sglang"):
122-
continue
118+
for entries in mn_groups.values():
123119
best_idx, best_entry = max(entries, key=_max_eval_conc)
124120
eval_indices.add(best_idx)
125121
# Set eval-conc to median of eligible conc values to avoid OOM during eval

0 commit comments

Comments
 (0)