From 4ce8fde4c1a9d866f32ef6c2885f523dc2a6775c Mon Sep 17 00:00:00 2001 From: Gaurav Garg Date: Sun, 19 Apr 2026 23:28:13 +0530 Subject: [PATCH 1/3] Fix delayed AllReduce on Gemma-4 MoE Skip forward past nodes that don't consume the current one, and allow a chain of MULs. --- ggml/src/ggml-backend-meta.cpp | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-backend-meta.cpp b/ggml/src/ggml-backend-meta.cpp index 39651adc1c1..0c450b52c23 100644 --- a/ggml/src/ggml-backend-meta.cpp +++ b/ggml/src/ggml-backend-meta.cpp @@ -1661,6 +1661,24 @@ static enum ggml_status ggml_backend_meta_graph_compute(ggml_backend_t backend, ggml_tensor * node = cgraph->nodes[id]; int32_t n_used = ggml_node_get_use_count(cgraph, id); + + // Skip MIRRORED nodes that don't consume node + auto skip_unrelated = [&]() { + while (id + 1 < cgraph->n_nodes) { + ggml_tensor * next = cgraph->nodes[id+1]; + bool uses_node = false; + for (int s = 0; s < GGML_MAX_SRC; s++) { + if (next->src[s] == node) { uses_node = true; break; } + } + if (uses_node) break; + if (ggml_backend_meta_get_split_state(next, false).axis != GGML_BACKEND_SPLIT_AXIS_MIRRORED) { + break; + } + id++; + } + }; + + skip_unrelated(); if (id + 1 >= cgraph->n_nodes) { return idr; } @@ -1675,10 +1693,12 @@ static enum ggml_status ggml_backend_meta_graph_compute(ggml_backend_t backend, n_used = ggml_node_get_use_count(cgraph, id); } } - if (id + 1 >= cgraph->n_nodes) { - return idr; - } - { + // Chain of MULs with MIRRORED src[1] + while (true) { + skip_unrelated(); + if (id + 1 >= cgraph->n_nodes) { + return idr; + } ggml_tensor * next = cgraph->nodes[id+1]; if (next->op == GGML_OP_MUL && next->src[0] == node && ggml_backend_meta_get_split_state(next->src[1], false).axis == GGML_BACKEND_SPLIT_AXIS_MIRRORED) { @@ -1686,6 +1706,8 @@ static enum ggml_status ggml_backend_meta_graph_compute(ggml_backend_t backend, id++; idr = id; n_used = ggml_node_get_use_count(cgraph, id); + } else { + break; } } From 07a158549fdd905728c16be596f36b8344d99be0 Mon Sep 17 00:00:00 2001 From: Gaurav Garg Date: Mon, 20 Apr 2026 17:04:14 +0530 Subject: [PATCH 2/3] Check for all sources before skipping nodes --- ggml/src/ggml-backend-meta.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ggml/src/ggml-backend-meta.cpp b/ggml/src/ggml-backend-meta.cpp index 0c450b52c23..bf6616fc681 100644 --- a/ggml/src/ggml-backend-meta.cpp +++ b/ggml/src/ggml-backend-meta.cpp @@ -1674,6 +1674,17 @@ static enum ggml_status ggml_backend_meta_graph_compute(ggml_backend_t backend, if (ggml_backend_meta_get_split_state(next, false).axis != GGML_BACKEND_SPLIT_AXIS_MIRRORED) { break; } + + bool all_srcs_mirrored = true; + for (int s = 0; s < GGML_MAX_SRC; s++) { + if (next->src[s] == nullptr) continue; + if (ggml_backend_meta_get_split_state(next->src[s], false).axis + != GGML_BACKEND_SPLIT_AXIS_MIRRORED) { + all_srcs_mirrored = false; + break; + } + } + if (!all_srcs_mirrored) break; id++; } }; From 63c7607d272a7af82053e723afdd6d159c3e0aaf Mon Sep 17 00:00:00 2001 From: Gaurav Garg Date: Mon, 20 Apr 2026 18:16:06 +0530 Subject: [PATCH 3/3] Address review comments --- ggml/src/ggml-backend-meta.cpp | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/ggml/src/ggml-backend-meta.cpp b/ggml/src/ggml-backend-meta.cpp index bf6616fc681..6521902b072 100644 --- a/ggml/src/ggml-backend-meta.cpp +++ b/ggml/src/ggml-backend-meta.cpp @@ -1666,25 +1666,26 @@ static enum ggml_status ggml_backend_meta_graph_compute(ggml_backend_t backend, auto skip_unrelated = [&]() { while (id + 1 < cgraph->n_nodes) { ggml_tensor * next = cgraph->nodes[id+1]; - bool uses_node = false; - for (int s = 0; s < GGML_MAX_SRC; s++) { - if (next->src[s] == node) { uses_node = true; break; } - } - if (uses_node) break; if (ggml_backend_meta_get_split_state(next, false).axis != GGML_BACKEND_SPLIT_AXIS_MIRRORED) { break; } - - bool all_srcs_mirrored = true; + bool safe = true; for (int s = 0; s < GGML_MAX_SRC; s++) { - if (next->src[s] == nullptr) continue; - if (ggml_backend_meta_get_split_state(next->src[s], false).axis - != GGML_BACKEND_SPLIT_AXIS_MIRRORED) { - all_srcs_mirrored = false; + if (next->src[s] == nullptr) { + continue; + } + if (next->src[s] == node) { + safe = false; break; } + if (ggml_backend_meta_get_split_state(next->src[s], false).axis != GGML_BACKEND_SPLIT_AXIS_MIRRORED) { + safe = false; + break; + } + } + if (!safe) { + break; } - if (!all_srcs_mirrored) break; id++; } };