From fbddaa30d0fa5405779e55839a39081b926d20cd Mon Sep 17 00:00:00 2001
From: thomawan <thomawan@amd.com>
Date: Wed, 27 May 2026 10:12:59 +0800
Subject: [PATCH 1/7] Update config

---
 .github/configs/amd-master.yaml                  | 6 +++---
 benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
index 418ad5ab9..bea585dab 100644
--- a/.github/configs/amd-master.yaml
+++ b/.github/configs/amd-master.yaml
@@ -1774,7 +1774,7 @@ dsr1-fp4-mi355x-sglang-disagg-mtp:
 # image tag, so bumping sglang is just an image tag bump here. Sweeps
 # DP-attention on/off and EP=8.
 dsv4-fp4-mi355x-sglang:
-  image: rocm/sgl-dev:rocm720-mi35x-8c3b5aa-20260521-DSv4
+  image: rocm/sgl-dev:rocm720-mi35x-f96ac98-20260526-DSv4
   model: deepseek-ai/DeepSeek-V4-Pro
   model-prefix: dsv4
   runner: mi355x
@@ -1786,12 +1786,12 @@ dsv4-fp4-mi355x-sglang:
     - isl: 1024
       osl: 1024
       search-space:
-      - { tp: 8, dp-attn: true, conc-start: 64, conc-end: 1024 }
+      - { tp: 8, dp-attn: true, conc-start: 64, conc-end: 4096 }
       - { tp: 8, dp-attn: false, conc-start: 1 , conc-end: 32 }
     - isl: 8192
       osl: 1024
       search-space:
-      - { tp: 8, dp-attn: true, conc-start: 64, conc-end: 512 }
+      - { tp: 8, dp-attn: true, conc-start: 64, conc-end: 2048 }
       - { tp: 8, dp-attn: false, conc-start: 1, conc-end: 32 }
 
 # DSv4 on MI355X via vLLM, using the official vllm/vllm-openai-rocm
diff --git a/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh b/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh
index 571132a54..b26fa6033 100755
--- a/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh
+++ b/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh
@@ -116,6 +116,8 @@ python3 -m sglang.launch_server \
     --disable-radix-cache \
     --attention-backend compressed \
     --max-running-requests ${CONC} \
+    --mem-fraction-static 0.90 \
+    --swa-full-tokens-ratio 0.1 \
     --page-size 256 \
     --context-length $MAX_MODEL_LEN \
     --chunked-prefill-size 8192 \

From f3870db3865f97247365da81f4e811bc727d435b Mon Sep 17 00:00:00 2001
From: thomawan <thomawan@amd.com>
Date: Wed, 27 May 2026 10:24:41 +0800
Subject: [PATCH 2/7] Update change log

---
 perf-changelog.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 614b6104e..4838ddf5e 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3137,3 +3137,9 @@
     - "Set vLLM serving knobs in benchmarks/single_node/minimaxm2.5_fp8_h200.sh: generated benchmark max-model-len, previous eval max-model-len handling, fp8 KV cache, FlashInfer attention/autotune, Triton MoE, and MiniMax QK norm fusion"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1354
 
+- config-keys:
+    - dsv4-fp4-mi355x-sglang
+  description:
+    - "Bump image to rocm/sgl-dev:rocm720-mi35x-f96ac98-20260526-DSv4"
+    - "Add args to avoid kvcache pool full issue on high conc"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1568

From 11daefb38650b271ccafc800ebbec8344f21e969 Mon Sep 17 00:00:00 2001
From: thomawan <thomawan@amd.com>
Date: Wed, 27 May 2026 10:29:04 +0800
Subject: [PATCH 3/7] Update change log

---
 perf-changelog.yaml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index a3c697a75..e84cb9688 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3138,16 +3138,15 @@
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1354
 
 - config-keys:
-<<<<<<< dsv4-mi355-sgl-0526
     - dsv4-fp4-mi355x-sglang
   description:
     - "Bump image to rocm/sgl-dev:rocm720-mi35x-f96ac98-20260526-DSv4"
     - "Add args to avoid kvcache pool full issue on high conc"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1568
-=======
+
+- config-keys:
     - qwen3.5-fp8-h200-sglang
     - dsr1-fp8-mi355x-sglang
   description:
     - "Validates measured-power aggregation pipeline (PR #1558) on both NVIDIA (H200) and AMD (MI355X) hardware — different SMI tools (nvidia-smi vs amd-smi), different CSV schemas (power.draw [W] vs socket_power), same aggregator. No config change. Entry intentionally kept past merge so run-sweep produces canonical agg JSONs with avg_power_w + joules_per_output_token on main for both vendors, seeding the dashboard's day-zero data."
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1558
->>>>>>> main

From 44f19c08e765ed3282238c13e1defa5306296e6b Mon Sep 17 00:00:00 2001
From: thomawan <thomawan@amd.com>
Date: Wed, 27 May 2026 12:57:27 +0800
Subject: [PATCH 4/7] Update config

---
 benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh b/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh
index b26fa6033..ec9ebce02 100755
--- a/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh
+++ b/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh
@@ -117,7 +117,7 @@ python3 -m sglang.launch_server \
     --attention-backend compressed \
     --max-running-requests ${CONC} \
     --mem-fraction-static 0.90 \
-    --swa-full-tokens-ratio 0.1 \
+    --swa-full-tokens-ratio 0.2 \
     --page-size 256 \
     --context-length $MAX_MODEL_LEN \
     --chunked-prefill-size 8192 \

From a618b9e76659290c62a0aee884cfb89df84140b0 Mon Sep 17 00:00:00 2001
From: thomawan <thomawan@amd.com>
Date: Wed, 27 May 2026 12:58:31 +0800
Subject: [PATCH 5/7] Update config

---
 .github/configs/amd-master.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
index bea585dab..fc4ba15a7 100644
--- a/.github/configs/amd-master.yaml
+++ b/.github/configs/amd-master.yaml
@@ -1786,7 +1786,7 @@ dsv4-fp4-mi355x-sglang:
     - isl: 1024
       osl: 1024
       search-space:
-      - { tp: 8, dp-attn: true, conc-start: 64, conc-end: 4096 }
+      - { tp: 8, dp-attn: true, conc-start: 64, conc-end: 2048 }
       - { tp: 8, dp-attn: false, conc-start: 1 , conc-end: 32 }
     - isl: 8192
       osl: 1024

From 6deed239b272e943e00bc8a34a1de63cc2e3fea3 Mon Sep 17 00:00:00 2001
From: thomawan <thomawan@amd.com>
Date: Wed, 27 May 2026 13:54:25 +0800
Subject: [PATCH 6/7] Update config

---
 benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh b/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh
index ec9ebce02..3a307ed0b 100755
--- a/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh
+++ b/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh
@@ -117,7 +117,7 @@ python3 -m sglang.launch_server \
     --attention-backend compressed \
     --max-running-requests ${CONC} \
     --mem-fraction-static 0.90 \
-    --swa-full-tokens-ratio 0.2 \
+    --swa-full-tokens-ratio 0.15 \
     --page-size 256 \
     --context-length $MAX_MODEL_LEN \
     --chunked-prefill-size 8192 \

From 3ad3afeb8039f107cc625021d73fc8908af7df36 Mon Sep 17 00:00:00 2001
From: thomawan <thomawan@amd.com>
Date: Wed, 27 May 2026 14:03:42 +0800
Subject: [PATCH 7/7] Update config

---
 benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh b/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh
index 3a307ed0b..a4976bdb0 100755
--- a/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh
+++ b/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh
@@ -77,7 +77,7 @@ export SGLANG_FORCE_TRITON_MOE_FP8=0
 export SGLANG_HACK_FLASHMLA_BACKEND=triton
 export SGLANG_OPT_USE_TILELANG_INDEXER=true
 export SGLANG_OPT_USE_TRITON_SWA_PREPARE=true
-export AITER_BF16_FP8_MOE_BOUND=1
+export AITER_BF16_FP8_MOE_BOUND=0
 export SGLANG_OPT_FUSE_WQA_WKV=true
 export SGLANG_OPT_USE_FUSED_PAGED_COMPRESS=true
 export SGLANG_OPT_USE_MULTI_STREAM_OVERLAP=0