Skip to content
387 changes: 387 additions & 0 deletions .github/configs/nvidia-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9428,3 +9428,390 @@ glm5-fp4-gb300-dynamo-sglang:
tp: 4
ep: 1
dp-attn: false

minimaxm2.5-fp4-gb300-dynamo-vllm:
image: vllm/vllm-openai:v0.20.1
model: nvidia/MiniMax-M2.5-NVFP4
model-prefix: minimaxm2.5
runner: gb300-nv
precision: fp4
framework: dynamo-vllm
multinode: true
disagg: true
scenarios:
fixed-seq-len:
- isl: 1024
osl: 1024
search-space:
- conc-list: [2, 4, 16]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml"
decode:
num-worker: 1
tp: 4
ep: 1
dp-attn: false
- conc-list: [4, 8, 16, 64]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml"
decode:
num-worker: 2
tp: 4
ep: 1
dp-attn: false
- conc-list: [32, 64, 128]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml"
decode:
num-worker: 1
tp: 4
ep: 4
dp-attn: false
- conc-list: [64, 128, 256, 512, 1024]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml"
decode:
num-worker: 3
tp: 4
ep: 4
dp-attn: false
- conc-list: [2048]
prefill:
num-worker: 2
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml"
decode:
num-worker: 3
tp: 2
ep: 2
dp-attn: true
- conc-list: [6144, 8192]
prefill:
num-worker: 2
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml"
decode:
num-worker: 3
tp: 2
ep: 2
dp-attn: true
- conc-list: [1024, 2048, 4096]
prefill:
num-worker: 2
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml"
decode:
num-worker: 1
tp: 8
ep: 8
dp-attn: true
- isl: 8192
osl: 1024
search-space:
- conc-list: [2, 4, 8, 16]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml"
decode:
num-worker: 1
tp: 4
ep: 1
dp-attn: false
- conc-list: [32, 64, 128, 256]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml"
decode:
num-worker: 1
tp: 4
ep: 4
dp-attn: false
- conc-list: [64, 128]
prefill:
num-worker: 2
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml"
decode:
num-worker: 1
tp: 4
ep: 4
dp-attn: false
- conc-list: [256]
prefill:
num-worker: 4
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml"
decode:
num-worker: 1
tp: 4
ep: 4
dp-attn: true
- conc-list: [1024, 2048]
prefill:
num-worker: 4
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml"
decode:
num-worker: 1
tp: 8
ep: 8
dp-attn: true

minimaxm2.5-fp4-b300-dynamo-vllm:
image: vllm/vllm-openai:v0.20.1
model: nvidia/MiniMax-M2.5-NVFP4
model-prefix: minimaxm2.5
runner: b300
precision: fp4
framework: dynamo-vllm
multinode: true
disagg: true
scenarios:
fixed-seq-len:
- isl: 1024
osl: 1024
search-space:
- conc-list: [4, 16]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml"
decode:
num-worker: 1
tp: 4
ep: 1
dp-attn: false
- conc-list: [4]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp8-1p1d.yaml"
decode:
num-worker: 1
tp: 8
ep: 1
dp-attn: false
- conc-list: [8, 16]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml"
decode:
num-worker: 2
tp: 4
ep: 1
dp-attn: false
- conc-list: [32, 64, 128]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml"
decode:
num-worker: 1
tp: 4
ep: 4
dp-attn: false
- conc-list: [64, 128, 256, 1024]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml"
decode:
num-worker: 3
tp: 4
ep: 4
dp-attn: false
- conc-list: [4096]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep2-1p2d.yaml"
decode:
num-worker: 2
tp: 2
ep: 2
dp-attn: true
- conc-list: [2048, 4096]
prefill:
num-worker: 2
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml"
decode:
num-worker: 3
tp: 2
ep: 2
dp-attn: true
- conc-list: [6144, 8192]
prefill:
num-worker: 2
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml"
decode:
num-worker: 3
tp: 2
ep: 2
dp-attn: true
- conc-list: [1024, 1536, 2048, 4096]
prefill:
num-worker: 2
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml"
decode:
num-worker: 1
tp: 8
ep: 8
dp-attn: true
- isl: 8192
osl: 1024
search-space:
- conc-list: [2, 4, 8, 16]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml"
decode:
num-worker: 1
tp: 4
ep: 1
dp-attn: false
- conc-list: [4]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp8-1p1d.yaml"
decode:
num-worker: 1
tp: 8
ep: 1
dp-attn: false
- conc-list: [32, 128]
prefill:
num-worker: 1
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml"
decode:
num-worker: 1
tp: 4
ep: 4
dp-attn: false
- conc-list: [64, 128, 256, 512]
prefill:
num-worker: 2
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml"
decode:
num-worker: 1
tp: 4
ep: 4
dp-attn: false
- conc-list: [384]
prefill:
num-worker: 4
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml"
decode:
num-worker: 1
tp: 4
ep: 4
dp-attn: true
- conc-list: [384]
prefill:
num-worker: 4
tp: 1
ep: 1
dp-attn: false
additional-settings:
- "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml"
decode:
num-worker: 1
tp: 8
ep: 8
dp-attn: true
Loading
Loading