From f66004e239aae543beda9d5d0248858eb27e988f Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Fri, 22 May 2026 15:55:01 -0700 Subject: [PATCH 1/5] [GB300][SGLang] Bump SGLang image for dsv4-fp4-gb300-dynamo-sglang-mtp Update SGLang container image from nightly-dev-cu13-20260510-2473659e to nightly-dev-20260522-c9153da5 across all DeepSeek-V4 8k1k disagg recipes. --- .../8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml | 2 +- .../8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml | 2 +- .../8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml | 2 +- .../8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml | 2 +- .../8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml | 2 +- .../8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml | 2 +- perf-changelog.yaml | 6 ++++++ 7 files changed, 12 insertions(+), 6 deletions(-) diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml index accb5e56f..76aedb056 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e" + container: "lmsysorg/sglang:nightly-dev-20260522-c9153da5" precision: "mxfp4" sbatch_directives: diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml index 3e8fca87b..c083e0b0b 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e" + container: "lmsysorg/sglang:nightly-dev-20260522-c9153da5" precision: "mxfp4" sbatch_directives: diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml index 6745aa37e..502898e4b 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e" + container: "lmsysorg/sglang:nightly-dev-20260522-c9153da5" precision: "mxfp4" sbatch_directives: diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml index 842fbb556..5cd11b4c1 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e" + container: "lmsysorg/sglang:nightly-dev-20260522-c9153da5" precision: "mxfp4" sbatch_directives: diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml index 49b0d31c3..5e7759e07 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e" + container: "lmsysorg/sglang:nightly-dev-20260522-c9153da5" precision: "mxfp4" sbatch_directives: diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml index d907c369e..1927f9c43 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e" + container: "lmsysorg/sglang:nightly-dev-20260522-c9153da5" precision: "mxfp4" sbatch_directives: diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 208a2da6f..fc41b568c 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3129,3 +3129,9 @@ description: - "Add --use-chat-template to run_benchmark_serving so prompts are formatted with the Qwen chat template (matching the other Qwen MTP recipes)" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1555 + +- config-keys: + - dsv4-fp4-gb300-dynamo-sglang-mtp + description: + - "Update SGLang image from nightly-dev-cu13-20260510-2473659e to nightly-dev-20260522-c9153da5" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/PLACEHOLDER From a6666cc27d707e0997536b8a802aac8f333d4a04 Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Fri, 22 May 2026 15:55:22 -0700 Subject: [PATCH 2/5] Update perf-changelog.yaml with PR #1559 link --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index fc41b568c..5b32d03d3 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3134,4 +3134,4 @@ - dsv4-fp4-gb300-dynamo-sglang-mtp description: - "Update SGLang image from nightly-dev-cu13-20260510-2473659e to nightly-dev-20260522-c9153da5" - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/PLACEHOLDER + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1559 From 27e4d9bb73ac99ecf4c36cf611eab9ded18afd67 Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Fri, 22 May 2026 16:32:10 -0700 Subject: [PATCH 3/5] Clean up obsolete sglang envs in dsv4 8k1k disagg recipes - Drop SGLANG_OPT_USE_JIT_NORM, SGLANG_OPT_USE_JIT_INDEXER_METADATA, SGLANG_OPT_USE_TOPK_V2 (now default-on in latest sglang). - Drop the MegaMoE companion envs that sglang now auto-sets when SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE is enabled: SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE, SGLANG_OPT_FIX_HASH_MEGA_MOE, SGLANG_OPT_FIX_MEGA_MOE_MEMORY, SGLANG_OPT_FIX_NEXTN_MEGA_MOE, SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK. - Drop SGLANG_RADIX_DISABLE_REUSE and SGLANG_OPT_USE_FAST_MASK_EP which no longer exist in sglang's environ.py. --- .../disagg-low-latency-1p1d-tp4-tp4-mtp.yaml | 8 -------- .../disagg-low-latency-1p6d-dep4-tp4-mtp.yaml | 14 ------------- .../disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml | 20 ------------------- .../disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml | 20 ------------------- .../disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml | 20 ------------------- .../disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml | 20 ------------------- perf-changelog.yaml | 1 + 7 files changed, 1 insertion(+), 102 deletions(-) diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml index 76aedb056..a4b178c2a 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml @@ -31,14 +31,10 @@ backend: prefill_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True" @@ -49,14 +45,10 @@ backend: decode_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True" diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml index c083e0b0b..28ead1ef5 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml @@ -31,24 +31,14 @@ backend: prefill_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" - SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216" - SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" - SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" @@ -60,14 +50,10 @@ backend: decode_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True" diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml index 502898e4b..9e54ada4e 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml @@ -33,24 +33,14 @@ backend: prefill_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" - SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216" - SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" - SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" @@ -62,23 +52,13 @@ backend: decode_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" - SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "2048" - SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" - SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml index 5cd11b4c1..937d1c309 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml @@ -33,24 +33,14 @@ backend: prefill_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" - SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216" - SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" - SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" @@ -62,23 +52,13 @@ backend: decode_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" - SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "2048" - SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" - SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml index 5e7759e07..2622412fb 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml @@ -33,24 +33,14 @@ backend: prefill_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" - SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216" - SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" - SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" @@ -62,23 +52,13 @@ backend: decode_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" - SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "4096" - SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" - SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml index 1927f9c43..de6a47537 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml @@ -33,24 +33,14 @@ backend: prefill_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" - SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216" - SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" - SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" @@ -62,23 +52,13 @@ backend: decode_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" - SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "4096" - SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" - SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 5b32d03d3..15edd533d 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3134,4 +3134,5 @@ - dsv4-fp4-gb300-dynamo-sglang-mtp description: - "Update SGLang image from nightly-dev-cu13-20260510-2473659e to nightly-dev-20260522-c9153da5" + - "Clean up obsolete environs in the 8k1k disagg recipes: drop SGLANG_OPT_USE_JIT_NORM / SGLANG_OPT_USE_JIT_INDEXER_METADATA / SGLANG_OPT_USE_TOPK_V2 (now default-on); drop the auto-set MegaMoE companions (SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE, SGLANG_OPT_FIX_HASH_MEGA_MOE, SGLANG_OPT_FIX_MEGA_MOE_MEMORY, SGLANG_OPT_FIX_NEXTN_MEGA_MOE, SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK); drop SGLANG_RADIX_DISABLE_REUSE / SGLANG_OPT_USE_FAST_MASK_EP which no longer exist in sglang environ.py" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1559 From 0be65805f93182f7857a485ae5e71e24e51217a6 Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Fri, 22 May 2026 16:34:49 -0700 Subject: [PATCH 4/5] Switch moe-a2a-backend from deepep to megamoe in MegaMoE blocks --- .../8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml | 2 +- .../8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml | 4 ++-- .../deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml | 4 ++-- .../deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml | 4 ++-- .../deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml index 28ead1ef5..e3081801c 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml @@ -81,7 +81,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: "deepep" + moe-a2a-backend: megamoe deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' mem-fraction-static: 0.9 diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml index 9e54ada4e..b627b1368 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml @@ -86,7 +86,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: "deepep" + moe-a2a-backend: megamoe deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' mem-fraction-static: 0.9 @@ -111,7 +111,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: "deepep" + moe-a2a-backend: megamoe deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' speculative-algo: "EAGLE" diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml index 937d1c309..2b3d844ab 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml @@ -86,7 +86,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: "deepep" + moe-a2a-backend: megamoe deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' mem-fraction-static: 0.9 @@ -111,7 +111,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: "deepep" + moe-a2a-backend: megamoe deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' speculative-algo: "EAGLE" diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml index 2622412fb..d1e52ef7b 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml @@ -86,7 +86,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: "deepep" + moe-a2a-backend: megamoe deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' mem-fraction-static: 0.9 @@ -111,7 +111,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: "deepep" + moe-a2a-backend: megamoe deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' speculative-algo: "EAGLE" diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml index de6a47537..95bef5176 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml @@ -86,7 +86,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: "deepep" + moe-a2a-backend: megamoe deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' mem-fraction-static: 0.9 @@ -111,7 +111,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: "deepep" + moe-a2a-backend: megamoe deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' speculative-algo: "EAGLE" From beb0fe5a59bee13e7ecc7c32c23d6181b6f4c0ff Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Fri, 22 May 2026 17:39:57 -0700 Subject: [PATCH 5/5] Bump dynamo hash, quote megamoe backend, drop deepep-config - Update dynamo commit hash to 81d0555ee23519cea80a42b4fe824e30368b7300 across all 6 dsv4 8k1k disagg recipes. - Quote moe-a2a-backend value as "megamoe" for consistency with other string fields. - Remove the now-unused deepep-config entries; megamoe doesn't read them. --- .../8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml | 2 +- .../8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml | 5 ++--- .../8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml | 8 +++----- .../8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml | 8 +++----- .../8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml | 8 +++----- .../8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml | 8 +++----- 6 files changed, 15 insertions(+), 24 deletions(-) diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml index a4b178c2a..0efd8c224 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml @@ -6,7 +6,7 @@ frontend: num_additional_frontends: 8 dynamo: - hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c" + hash: "81d0555ee23519cea80a42b4fe824e30368b7300" install: true model: diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml index e3081801c..b502ec587 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml @@ -6,7 +6,7 @@ frontend: num_additional_frontends: 8 dynamo: - hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c" + hash: "81d0555ee23519cea80a42b4fe824e30368b7300" install: true model: @@ -81,8 +81,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: megamoe - deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' + moe-a2a-backend: "megamoe" mem-fraction-static: 0.9 max-running-requests: 128 diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml index b627b1368..5f4b21583 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml @@ -6,7 +6,7 @@ frontend: num_additional_frontends: 8 dynamo: - hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c" + hash: "81d0555ee23519cea80a42b4fe824e30368b7300" install: true model: @@ -86,8 +86,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: megamoe - deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' + moe-a2a-backend: "megamoe" mem-fraction-static: 0.9 max-running-requests: 256 @@ -111,8 +110,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: megamoe - deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' + moe-a2a-backend: "megamoe" speculative-algo: "EAGLE" speculative-num-steps: 3 diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml index 2b3d844ab..84c4d6443 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml @@ -6,7 +6,7 @@ frontend: num_additional_frontends: 8 dynamo: - hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c" + hash: "81d0555ee23519cea80a42b4fe824e30368b7300" install: true model: @@ -86,8 +86,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: megamoe - deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' + moe-a2a-backend: "megamoe" mem-fraction-static: 0.9 max-running-requests: 256 @@ -111,8 +110,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: megamoe - deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' + moe-a2a-backend: "megamoe" speculative-algo: "EAGLE" speculative-num-steps: 3 diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml index d1e52ef7b..ea64f91ec 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml @@ -6,7 +6,7 @@ frontend: num_additional_frontends: 8 dynamo: - hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c" + hash: "81d0555ee23519cea80a42b4fe824e30368b7300" install: true model: @@ -86,8 +86,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: megamoe - deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' + moe-a2a-backend: "megamoe" mem-fraction-static: 0.9 max-running-requests: 512 @@ -111,8 +110,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: megamoe - deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' + moe-a2a-backend: "megamoe" speculative-algo: "EAGLE" speculative-num-steps: 3 diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml index 95bef5176..ad6c54b0e 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml @@ -6,7 +6,7 @@ frontend: num_additional_frontends: 8 dynamo: - hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c" + hash: "81d0555ee23519cea80a42b4fe824e30368b7300" install: true model: @@ -86,8 +86,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: megamoe - deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' + moe-a2a-backend: "megamoe" mem-fraction-static: 0.9 max-running-requests: 1024 @@ -111,8 +110,7 @@ backend: enable-dp-attention: true enable-dp-lm-head: true - moe-a2a-backend: megamoe - deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}' + moe-a2a-backend: "megamoe" speculative-algo: "EAGLE" speculative-num-steps: 3