From 4814d4ed966bec3ede8f8b1b48d98c0bddd6716d Mon Sep 17 00:00:00 2001
From: "yuan.xiong" <yuan.xiong@intel.com>
Date: Tue, 21 Apr 2026 14:31:14 +0800
Subject: [PATCH 1/2] fix
 smoke_MatMulCompressedWeights_extra_multiply/MatmulWeightsDecompression dGPU
 func testcase

Signed-off-by: yuan.xiong <yuan.xiong@intel.com>
---
 .../transformations/convert_matmul_to_fc.cpp  | 39 +++++++++++
 .../dynamic/matmul_weights_decompression.cpp  | 26 +++++++-
 .../convert_matmul_to_fc_test.cpp             | 66 +++++++++++++++++++
 3 files changed, 129 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp
index 3ab36108c4c154..470433d00de445 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp
@@ -54,6 +54,45 @@ ConvertMatMulToFullyConnected::ConvertMatMulToFullyConnected(bool supports_immad
         auto fc_input_a = pattern_map.at(activations_m);
         auto fc_input_b = pattern_map.at(weights_m);
 
+        auto introduces_non_trivial_batch_broadcast = [](const ov::PartialShape& original_shape,
+                                                         const ov::PartialShape& broadcasted_shape) {
+            if (!original_shape.rank().is_static() || !broadcasted_shape.rank().is_static()) {
+                return false;
+            }
+
+            const auto original_rank = static_cast<size_t>(original_shape.rank().get_length());
+            const auto broadcasted_rank = static_cast<size_t>(broadcasted_shape.rank().get_length());
+            if (broadcasted_rank < 2 || original_rank > broadcasted_rank) {
+                return false;
+            }
+
+            ov::PartialShape aligned_original_shape = original_shape;
+            for (size_t i = 0, cnt = broadcasted_rank - original_rank; i < cnt; ++i) {
+                aligned_original_shape.insert(aligned_original_shape.begin(), 1);
+            }
+
+            for (size_t i = 0; i < broadcasted_rank - 2; ++i) {
+                const auto& original_dim = aligned_original_shape[i];
+                const auto& broadcasted_dim = broadcasted_shape[i];
+                if (original_dim == 1 && broadcasted_dim.is_static() && broadcasted_dim.get_length() != 1) {
+                    return true;
+                }
+            }
+
+            return false;
+        };
+
+        auto mul2_it = pattern_map.find(mul2_m);
+        if (mul2_it != pattern_map.end() && mul2_it->second.get_node_shared_ptr() == fc_input_b.get_node_shared_ptr()) {
+            const auto reshape_output = pattern_map.at(reshape_m);
+            // Keep valid 3D compressed FC cases enabled. Only reject the extra post-reshape multiply when broadcasting changes the weights
+            // from a shared matrix into data with real batch dimensions. For example, reshape may first squeeze the weights to [16, 32],
+            // then an extra multiply with scale [8, 1, 32] broadcasts them to [8, 16, 32], which makes the weights effectively batched again.
+            if (introduces_non_trivial_batch_broadcast(reshape_output.get_partial_shape(), fc_input_b.get_partial_shape())) {
+                return false;
+            }
+        }
+
         // If 'fc_input_b' is shared with another matmul, transposing 'fc_input_b' is restricted.
         // If it is connected to the 'input_a' of another matmul, do not transpose
         // If it is connected to the 'input_b' of another matmul and the transpose option differs between the two matmuls, do not transpose.
diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp
index 948fb8a5595bff..b621e836cdd24f 100644
--- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp
+++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp
@@ -405,12 +405,19 @@ const std::vector<bool> transpose_weights = {true, false};
 const std::vector<bool> param_weights = {true, false};
 const std::vector<ShapeParams> input_shapes_basic = {
     {{{-1, -1, -1}, {{1, 4, 16}, {10, 16, 16}}}, {16, 32}},
-    {{{}, {{1, 4, 16}}}, {16, 32}, 2ul},
     {{{}, {{1, 4, 16}}}, {1, 16, 32}},
     {{{}, {{1, 4, 48}}}, {48, 256}},
     {{{}, {{11, 339, 377}}}, {377, 335}}
 };
 
+const std::vector<ShapeParams> input_shapes_extra_multiply = {
+    {{{}, {{1, 4, 2}}}, {2, 32}, 2ul},
+};
+
+const std::vector<ShapeParams> input_shapes_extra_multiply_non_trivial_batch_broadcast = {
+    {{{}, {{1, 4, 16}}}, {16, 32}, 2ul},
+};
+
 INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_basic,
                          MatmulWeightsDecompression,
                          ::testing::Combine(::testing::ValuesIn(input_shapes_basic),
@@ -428,7 +435,22 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_basic,
 
 INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_extra_multiply,
                          MatmulWeightsDecompression,
-                         ::testing::Combine(::testing::ValuesIn(input_shapes_basic),
+                         ::testing::Combine(::testing::ValuesIn(input_shapes_extra_multiply),
+                                            ::testing::ValuesIn(weights_precisions),
+                                            ::testing::ValuesIn(activations_precisions),
+                                            ::testing::Values(false),
+                                            ::testing::Values(false),
+                                            ::testing::Values(false),
+                                            ::testing::Values(true),
+                                            ::testing::Values(false),
+                                            ::testing::ValuesIn(param_weights),
+                                            ::testing::Values(0),
+                                            ::testing::Values(1.0f)),
+                         MatmulWeightsDecompression::get_test_case_name);
+
+INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_extra_multiply_non_trivial_batch_broadcast_no_convert,
+                         MatmulWeightsDecompression,
+                         ::testing::Combine(::testing::ValuesIn(input_shapes_extra_multiply_non_trivial_batch_broadcast),
                                             ::testing::ValuesIn(weights_precisions),
                                             ::testing::ValuesIn(activations_precisions),
                                             ::testing::Values(false),
diff --git a/src/plugins/intel_gpu/tests/unit/transformations/convert_matmul_to_fc_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/convert_matmul_to_fc_test.cpp
index f5a2f6be1474ce..87a72daeac8678 100644
--- a/src/plugins/intel_gpu/tests/unit/transformations/convert_matmul_to_fc_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/transformations/convert_matmul_to_fc_test.cpp
@@ -474,6 +474,72 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest_compressed_u8_par
     }
 }
 
+TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest_compressed_u8_weights_extra_multiply) {
+    {
+        auto data = std::make_shared<ov::opset1::Parameter>(ov::element::f16, ov::Shape{1, 4, 2});
+        auto weights = ov::opset1::Constant::create(ov::element::u8, ov::Shape{1, 2, 32}, {1});
+        auto convert = std::make_shared<ov::opset1::Convert>(weights, ov::element::f16);
+        auto mul_const = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 1, 32}, {1});
+        auto mul = std::make_shared<ov::opset1::Multiply>(convert, mul_const);
+        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{2}, {2, 32});
+        auto reshape = std::make_shared<ov::opset1::Reshape>(mul, reshape_const, false);
+        auto extra_mul = std::make_shared<ov::opset1::Multiply>(reshape, mul_const);
+        auto matmul = std::make_shared<ov::opset1::MatMul>(data, extra_mul);
+
+        model = std::make_shared<ov::Model>(ov::OutputVector{matmul}, ov::ParameterVector{data});
+        bool support_immad = true;
+        manager.register_pass<ConvertMatMulToFullyConnected>(support_immad);
+    }
+    {
+        auto data = std::make_shared<ov::opset1::Parameter>(ov::element::f16, ov::Shape{1, 4, 2});
+        auto weights = ov::opset1::Constant::create(ov::element::u8, ov::Shape{1, 2, 32}, {1});
+        auto convert = std::make_shared<ov::opset1::Convert>(weights, ov::element::f16);
+        auto mul_const = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 1, 32}, {1});
+        auto mul = std::make_shared<ov::opset1::Multiply>(convert, mul_const);
+        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{2}, {2, 32});
+        auto reshape = std::make_shared<ov::opset1::Reshape>(mul, reshape_const, false);
+        auto extra_mul = std::make_shared<ov::opset1::Multiply>(reshape, mul_const);
+
+        auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {3}, {0, 2, 1});
+        auto transpose = std::make_shared<ov::opset1::Transpose>(extra_mul, transpose_const);
+        auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
+        auto matmul = std::make_shared<op::FullyConnected>(data, transpose, no_bias);
+
+        model_ref = std::make_shared<ov::Model>(ov::OutputVector{matmul}, ov::ParameterVector{data});
+    }
+}
+
+TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest_compressed_u8_weights_extra_multiply_non_trivial_batch_broadcast) {
+    {
+        auto data = std::make_shared<ov::opset1::Parameter>(ov::element::f16, ov::Shape{1, 4, 16});
+        auto weights = ov::opset1::Constant::create(ov::element::u8, ov::Shape{8, 2, 32}, {1});
+        auto convert = std::make_shared<ov::opset1::Convert>(weights, ov::element::f16);
+        auto mul_const = ov::opset1::Constant::create(ov::element::f16, ov::Shape{8, 1, 32}, {1});
+        auto mul = std::make_shared<ov::opset1::Multiply>(convert, mul_const);
+        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{2}, {16, 32});
+        auto reshape = std::make_shared<ov::opset1::Reshape>(mul, reshape_const, false);
+        auto extra_mul = std::make_shared<ov::opset1::Multiply>(reshape, mul_const);
+        auto matmul = std::make_shared<ov::opset1::MatMul>(data, extra_mul);
+
+        model = std::make_shared<ov::Model>(ov::OutputVector{matmul}, ov::ParameterVector{data});
+        bool support_immad = true;
+        manager.register_pass<ConvertMatMulToFullyConnected>(support_immad);
+    }
+    {
+        auto data = std::make_shared<ov::opset1::Parameter>(ov::element::f16, ov::Shape{1, 4, 16});
+        auto weights = ov::opset1::Constant::create(ov::element::u8, ov::Shape{8, 2, 32}, {1});
+        auto convert = std::make_shared<ov::opset1::Convert>(weights, ov::element::f16);
+        auto mul_const = ov::opset1::Constant::create(ov::element::f16, ov::Shape{8, 1, 32}, {1});
+        auto mul = std::make_shared<ov::opset1::Multiply>(convert, mul_const);
+        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{2}, {16, 32});
+        auto reshape = std::make_shared<ov::opset1::Reshape>(mul, reshape_const, false);
+        auto extra_mul = std::make_shared<ov::opset1::Multiply>(reshape, mul_const);
+        auto matmul = std::make_shared<ov::opset1::MatMul>(data, extra_mul);
+
+        model_ref = std::make_shared<ov::Model>(ov::OutputVector{matmul}, ov::ParameterVector{data});
+    }
+}
+
 TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest_compressed_u4_weights_3D) {
     {
         auto data = std::make_shared<ov::opset1::Parameter>(ov::element::f16, ov::Shape{3, 2, 2});

From a0cef9b4f375b7a8e9e363b500a395a86126b9df Mon Sep 17 00:00:00 2001
From: "yuan.xiong" <yuan.xiong@intel.com>
Date: Wed, 22 Apr 2026 13:59:09 +0800
Subject: [PATCH 2/2] refine testcases

Signed-off-by: yuan.xiong <yuan.xiong@intel.com>
---
 .../subgraph_tests/dynamic/matmul_weights_decompression.cpp     | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp
index b621e836cdd24f..c799ead6124ad1 100644
--- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp
+++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp
@@ -405,6 +405,7 @@ const std::vector<bool> transpose_weights = {true, false};
 const std::vector<bool> param_weights = {true, false};
 const std::vector<ShapeParams> input_shapes_basic = {
     {{{-1, -1, -1}, {{1, 4, 16}, {10, 16, 16}}}, {16, 32}},
+    {{{}, {{1, 4, 16}}}, {16, 32}, 2ul},
     {{{}, {{1, 4, 16}}}, {1, 16, 32}},
     {{{}, {{1, 4, 48}}}, {48, 256}},
     {{{}, {{11, 339, 377}}}, {377, 335}}
@@ -412,6 +413,7 @@ const std::vector<ShapeParams> input_shapes_basic = {
 
 const std::vector<ShapeParams> input_shapes_extra_multiply = {
     {{{}, {{1, 4, 2}}}, {2, 32}, 2ul},
+    {{{}, {{1, 4, 16}}}, {1, 16, 32}},
 };
 
 const std::vector<ShapeParams> input_shapes_extra_multiply_non_trivial_batch_broadcast = {