From 4ce8fde4c1a9d866f32ef6c2885f523dc2a6775c Mon Sep 17 00:00:00 2001 From: Gaurav Garg Date: Sun, 19 Apr 2026 23:28:13 +0530 Subject: [PATCH] Fix delayed AllReduce on Gemma-4 MoE Skip forward past nodes that don't consume the current one, and allow a chain of MULs. --- ggml/src/ggml-backend-meta.cpp | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-backend-meta.cpp b/ggml/src/ggml-backend-meta.cpp index 39651adc1c..0c450b52c2 100644 --- a/ggml/src/ggml-backend-meta.cpp +++ b/ggml/src/ggml-backend-meta.cpp @@ -1661,6 +1661,24 @@ static enum ggml_status ggml_backend_meta_graph_compute(ggml_backend_t backend, ggml_tensor * node = cgraph->nodes[id]; int32_t n_used = ggml_node_get_use_count(cgraph, id); + + // Skip MIRRORED nodes that don't consume node + auto skip_unrelated = [&]() { + while (id + 1 < cgraph->n_nodes) { + ggml_tensor * next = cgraph->nodes[id+1]; + bool uses_node = false; + for (int s = 0; s < GGML_MAX_SRC; s++) { + if (next->src[s] == node) { uses_node = true; break; } + } + if (uses_node) break; + if (ggml_backend_meta_get_split_state(next, false).axis != GGML_BACKEND_SPLIT_AXIS_MIRRORED) { + break; + } + id++; + } + }; + + skip_unrelated(); if (id + 1 >= cgraph->n_nodes) { return idr; } @@ -1675,10 +1693,12 @@ static enum ggml_status ggml_backend_meta_graph_compute(ggml_backend_t backend, n_used = ggml_node_get_use_count(cgraph, id); } } - if (id + 1 >= cgraph->n_nodes) { - return idr; - } - { + // Chain of MULs with MIRRORED src[1] + while (true) { + skip_unrelated(); + if (id + 1 >= cgraph->n_nodes) { + return idr; + } ggml_tensor * next = cgraph->nodes[id+1]; if (next->op == GGML_OP_MUL && next->src[0] == node && ggml_backend_meta_get_split_state(next->src[1], false).axis == GGML_BACKEND_SPLIT_AXIS_MIRRORED) { @@ -1686,6 +1706,8 @@ static enum ggml_status ggml_backend_meta_graph_compute(ggml_backend_t backend, id++; idr = id; n_used = ggml_node_get_use_count(cgraph, id); + } else { + break; } }