From 4814d4ed966bec3ede8f8b1b48d98c0bddd6716d Mon Sep 17 00:00:00 2001 From: "yuan.xiong" Date: Tue, 21 Apr 2026 14:31:14 +0800 Subject: [PATCH 1/2] fix smoke_MatMulCompressedWeights_extra_multiply/MatmulWeightsDecompression dGPU func testcase Signed-off-by: yuan.xiong --- .../transformations/convert_matmul_to_fc.cpp | 39 +++++++++++ .../dynamic/matmul_weights_decompression.cpp | 26 +++++++- .../convert_matmul_to_fc_test.cpp | 66 +++++++++++++++++++ 3 files changed, 129 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp index 3ab36108c4c154..470433d00de445 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp @@ -54,6 +54,45 @@ ConvertMatMulToFullyConnected::ConvertMatMulToFullyConnected(bool supports_immad auto fc_input_a = pattern_map.at(activations_m); auto fc_input_b = pattern_map.at(weights_m); + auto introduces_non_trivial_batch_broadcast = [](const ov::PartialShape& original_shape, + const ov::PartialShape& broadcasted_shape) { + if (!original_shape.rank().is_static() || !broadcasted_shape.rank().is_static()) { + return false; + } + + const auto original_rank = static_cast(original_shape.rank().get_length()); + const auto broadcasted_rank = static_cast(broadcasted_shape.rank().get_length()); + if (broadcasted_rank < 2 || original_rank > broadcasted_rank) { + return false; + } + + ov::PartialShape aligned_original_shape = original_shape; + for (size_t i = 0, cnt = broadcasted_rank - original_rank; i < cnt; ++i) { + aligned_original_shape.insert(aligned_original_shape.begin(), 1); + } + + for (size_t i = 0; i < broadcasted_rank - 2; ++i) { + const auto& original_dim = aligned_original_shape[i]; + const auto& broadcasted_dim = broadcasted_shape[i]; + if (original_dim == 1 && broadcasted_dim.is_static() && broadcasted_dim.get_length() != 1) { + return true; + } + } + + return false; + }; + + auto mul2_it = pattern_map.find(mul2_m); + if (mul2_it != pattern_map.end() && mul2_it->second.get_node_shared_ptr() == fc_input_b.get_node_shared_ptr()) { + const auto reshape_output = pattern_map.at(reshape_m); + // Keep valid 3D compressed FC cases enabled. Only reject the extra post-reshape multiply when broadcasting changes the weights + // from a shared matrix into data with real batch dimensions. For example, reshape may first squeeze the weights to [16, 32], + // then an extra multiply with scale [8, 1, 32] broadcasts them to [8, 16, 32], which makes the weights effectively batched again. + if (introduces_non_trivial_batch_broadcast(reshape_output.get_partial_shape(), fc_input_b.get_partial_shape())) { + return false; + } + } + // If 'fc_input_b' is shared with another matmul, transposing 'fc_input_b' is restricted. // If it is connected to the 'input_a' of another matmul, do not transpose // If it is connected to the 'input_b' of another matmul and the transpose option differs between the two matmuls, do not transpose. diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp index 948fb8a5595bff..b621e836cdd24f 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp @@ -405,12 +405,19 @@ const std::vector transpose_weights = {true, false}; const std::vector param_weights = {true, false}; const std::vector input_shapes_basic = { {{{-1, -1, -1}, {{1, 4, 16}, {10, 16, 16}}}, {16, 32}}, - {{{}, {{1, 4, 16}}}, {16, 32}, 2ul}, {{{}, {{1, 4, 16}}}, {1, 16, 32}}, {{{}, {{1, 4, 48}}}, {48, 256}}, {{{}, {{11, 339, 377}}}, {377, 335}} }; +const std::vector input_shapes_extra_multiply = { + {{{}, {{1, 4, 2}}}, {2, 32}, 2ul}, +}; + +const std::vector input_shapes_extra_multiply_non_trivial_batch_broadcast = { + {{{}, {{1, 4, 16}}}, {16, 32}, 2ul}, +}; + INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_basic, MatmulWeightsDecompression, ::testing::Combine(::testing::ValuesIn(input_shapes_basic), @@ -428,7 +435,22 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_basic, INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_extra_multiply, MatmulWeightsDecompression, - ::testing::Combine(::testing::ValuesIn(input_shapes_basic), + ::testing::Combine(::testing::ValuesIn(input_shapes_extra_multiply), + ::testing::ValuesIn(weights_precisions), + ::testing::ValuesIn(activations_precisions), + ::testing::Values(false), + ::testing::Values(false), + ::testing::Values(false), + ::testing::Values(true), + ::testing::Values(false), + ::testing::ValuesIn(param_weights), + ::testing::Values(0), + ::testing::Values(1.0f)), + MatmulWeightsDecompression::get_test_case_name); + +INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_extra_multiply_non_trivial_batch_broadcast_no_convert, + MatmulWeightsDecompression, + ::testing::Combine(::testing::ValuesIn(input_shapes_extra_multiply_non_trivial_batch_broadcast), ::testing::ValuesIn(weights_precisions), ::testing::ValuesIn(activations_precisions), ::testing::Values(false), diff --git a/src/plugins/intel_gpu/tests/unit/transformations/convert_matmul_to_fc_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/convert_matmul_to_fc_test.cpp index f5a2f6be1474ce..87a72daeac8678 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/convert_matmul_to_fc_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/convert_matmul_to_fc_test.cpp @@ -474,6 +474,72 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest_compressed_u8_par } } +TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest_compressed_u8_weights_extra_multiply) { + { + auto data = std::make_shared(ov::element::f16, ov::Shape{1, 4, 2}); + auto weights = ov::opset1::Constant::create(ov::element::u8, ov::Shape{1, 2, 32}, {1}); + auto convert = std::make_shared(weights, ov::element::f16); + auto mul_const = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 1, 32}, {1}); + auto mul = std::make_shared(convert, mul_const); + auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{2}, {2, 32}); + auto reshape = std::make_shared(mul, reshape_const, false); + auto extra_mul = std::make_shared(reshape, mul_const); + auto matmul = std::make_shared(data, extra_mul); + + model = std::make_shared(ov::OutputVector{matmul}, ov::ParameterVector{data}); + bool support_immad = true; + manager.register_pass(support_immad); + } + { + auto data = std::make_shared(ov::element::f16, ov::Shape{1, 4, 2}); + auto weights = ov::opset1::Constant::create(ov::element::u8, ov::Shape{1, 2, 32}, {1}); + auto convert = std::make_shared(weights, ov::element::f16); + auto mul_const = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 1, 32}, {1}); + auto mul = std::make_shared(convert, mul_const); + auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{2}, {2, 32}); + auto reshape = std::make_shared(mul, reshape_const, false); + auto extra_mul = std::make_shared(reshape, mul_const); + + auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {3}, {0, 2, 1}); + auto transpose = std::make_shared(extra_mul, transpose_const); + auto no_bias = std::make_shared(); + auto matmul = std::make_shared(data, transpose, no_bias); + + model_ref = std::make_shared(ov::OutputVector{matmul}, ov::ParameterVector{data}); + } +} + +TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest_compressed_u8_weights_extra_multiply_non_trivial_batch_broadcast) { + { + auto data = std::make_shared(ov::element::f16, ov::Shape{1, 4, 16}); + auto weights = ov::opset1::Constant::create(ov::element::u8, ov::Shape{8, 2, 32}, {1}); + auto convert = std::make_shared(weights, ov::element::f16); + auto mul_const = ov::opset1::Constant::create(ov::element::f16, ov::Shape{8, 1, 32}, {1}); + auto mul = std::make_shared(convert, mul_const); + auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{2}, {16, 32}); + auto reshape = std::make_shared(mul, reshape_const, false); + auto extra_mul = std::make_shared(reshape, mul_const); + auto matmul = std::make_shared(data, extra_mul); + + model = std::make_shared(ov::OutputVector{matmul}, ov::ParameterVector{data}); + bool support_immad = true; + manager.register_pass(support_immad); + } + { + auto data = std::make_shared(ov::element::f16, ov::Shape{1, 4, 16}); + auto weights = ov::opset1::Constant::create(ov::element::u8, ov::Shape{8, 2, 32}, {1}); + auto convert = std::make_shared(weights, ov::element::f16); + auto mul_const = ov::opset1::Constant::create(ov::element::f16, ov::Shape{8, 1, 32}, {1}); + auto mul = std::make_shared(convert, mul_const); + auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{2}, {16, 32}); + auto reshape = std::make_shared(mul, reshape_const, false); + auto extra_mul = std::make_shared(reshape, mul_const); + auto matmul = std::make_shared(data, extra_mul); + + model_ref = std::make_shared(ov::OutputVector{matmul}, ov::ParameterVector{data}); + } +} + TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest_compressed_u4_weights_3D) { { auto data = std::make_shared(ov::element::f16, ov::Shape{3, 2, 2}); From a0cef9b4f375b7a8e9e363b500a395a86126b9df Mon Sep 17 00:00:00 2001 From: "yuan.xiong" Date: Wed, 22 Apr 2026 13:59:09 +0800 Subject: [PATCH 2/2] refine testcases Signed-off-by: yuan.xiong --- .../subgraph_tests/dynamic/matmul_weights_decompression.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp index b621e836cdd24f..c799ead6124ad1 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp @@ -405,6 +405,7 @@ const std::vector transpose_weights = {true, false}; const std::vector param_weights = {true, false}; const std::vector input_shapes_basic = { {{{-1, -1, -1}, {{1, 4, 16}, {10, 16, 16}}}, {16, 32}}, + {{{}, {{1, 4, 16}}}, {16, 32}, 2ul}, {{{}, {{1, 4, 16}}}, {1, 16, 32}}, {{{}, {{1, 4, 48}}}, {48, 256}}, {{{}, {{11, 339, 377}}}, {377, 335}} @@ -412,6 +413,7 @@ const std::vector input_shapes_basic = { const std::vector input_shapes_extra_multiply = { {{{}, {{1, 4, 2}}}, {2, 32}, 2ul}, + {{{}, {{1, 4, 16}}}, {1, 16, 32}}, }; const std::vector input_shapes_extra_multiply_non_trivial_batch_broadcast = {