From a5126ced9b081b230fe194019f110c75882e82d2 Mon Sep 17 00:00:00 2001 From: "Dvoretckii, Mikhail" Date: Thu, 23 Apr 2026 01:19:30 -0700 Subject: [PATCH 1/6] [GPU] Introduce a transformation to reduce FullyConnected activation dimensions --- .../transformations/reduce_fc_dimensions.cpp | 55 +++++++++++++++++++ .../transformations/reduce_fc_dimensions.hpp | 17 ++++++ .../src/plugin/transformations_pipeline.cpp | 2 + 3 files changed, 74 insertions(+) create mode 100644 src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp create mode 100644 src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.hpp diff --git a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp new file mode 100644 index 000000000000..794850f8726d --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp @@ -0,0 +1,55 @@ +// Copyright (C) 2018-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "reduce_fc_dimensions.hpp" +#include "intel_gpu/op/fully_connected.hpp" +#include "intel_gpu/op/placeholder.hpp" +#include "openvino/core/graph_util.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/pass/pattern/op/any.hpp" +#include "openvino/pass/pattern/op/pattern.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/utils.hpp" + +namespace ov::intel_gpu { + +ReduceFCDimensions::ReduceFCDimensions() { + auto activations_m = ov::pass::pattern::any_input(ov::pass::pattern::shape_matches("[1, 1, ?, ?]")); + auto weights_m = ov::pass::pattern::any_input(ov::pass::pattern::shape_matches("[?, ?]")); + auto no_bias_m = ov::pass::pattern::wrap_type(); + auto fc_m = ov::pass::pattern::wrap_type({activations_m, weights_m, no_bias_m}); + + ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + + auto activations = pattern_map.at(activations_m).get_node_shared_ptr(); + auto weights = pattern_map.at(weights_m).get_node_shared_ptr(); + auto no_bias = pattern_map.at(no_bias_m).get_node_shared_ptr(); + auto fc = pattern_map.at(fc_m).get_node_shared_ptr(); + + auto act_pshape = activations->get_output_partial_shape(0); + auto squeeze_const = + std::make_shared(ov::element::i64, ov::Shape{3}, std::vector{1, -1, act_pshape[-1].get_length()}); + auto squeeze = std::make_shared(activations, squeeze_const, false); + ov::copy_runtime_info(activations, squeeze); + squeeze->set_friendly_name(activations->get_friendly_name() + "_squeeze"); + + auto fc_new = fc->clone_with_new_inputs({squeeze, weights, no_bias}); + + auto shape_out = fc_new->get_output_partial_shape(0); + auto unsqueeze_const = + std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{1, 1, -1, shape_out[-1].get_length()}); + auto unsqueeze = std::make_shared(fc_new, unsqueeze_const, false); + unsqueeze->set_friendly_name(fc->get_friendly_name() + "_unsqueeze"); + + ov::replace_node(fc, unsqueeze); + return true; + }; + + auto m = std::make_shared(fc_m, "ReduceFCDimensions"); + this->register_matcher(m, callback); +} + +} // namespace ov::intel_gpu \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.hpp b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.hpp new file mode 100644 index 000000000000..229427826c9f --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.hpp @@ -0,0 +1,17 @@ +// Copyright (C) 2018-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov::intel_gpu { + +class ReduceFCDimensions : public ov::pass::MatcherPass { +public: + OPENVINO_MATCHER_PASS_RTTI("ReduceFCDimensions"); + ReduceFCDimensions(); +}; + +} // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index b6e281a28368..8b2986cbcffd 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -105,6 +105,7 @@ #include "plugin/transformations/move_fc_reshape_to_weights.hpp" #include "plugin/transformations/optimize_subsequent_reshapes.hpp" #include "plugin/transformations/print_model_statistics.hpp" +#include "plugin/transformations/reduce_fc_dimensions.hpp" #include "plugin/transformations/sink_reshape.hpp" #include "plugin/transformations/transpose_fusion.hpp" #include "plugin/transformations/unsqueeze_broadcast_reshape_matmul_fusion.hpp" @@ -1517,6 +1518,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(device_info.supports_immad); manager.register_pass(); + manager.register_pass(); manager.register_pass(); const bool disable_horizontal_fc_fusion = GPU_DEBUG_VALUE_OR(config.get_disable_horizontal_fc_fusion(), false); From 30ba6b5d4eb50e02eb8ac3a5fcd7675da956a3db Mon Sep 17 00:00:00 2001 From: "Dvoretckii, Mikhail" Date: Mon, 27 Apr 2026 07:10:48 -0700 Subject: [PATCH 2/6] Add tests, fix metadata transfer --- .../transformations/reduce_fc_dimensions.cpp | 3 + .../reduce_fc_dimensions_test.cpp | 142 ++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp diff --git a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp index 794850f8726d..a6b141eb7ecb 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp @@ -37,12 +37,15 @@ ReduceFCDimensions::ReduceFCDimensions() { squeeze->set_friendly_name(activations->get_friendly_name() + "_squeeze"); auto fc_new = fc->clone_with_new_inputs({squeeze, weights, no_bias}); + ov::copy_runtime_info(fc, fc_new); auto shape_out = fc_new->get_output_partial_shape(0); auto unsqueeze_const = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{1, 1, -1, shape_out[-1].get_length()}); + ov::copy_runtime_info(fc, unsqueeze_const); auto unsqueeze = std::make_shared(fc_new, unsqueeze_const, false); unsqueeze->set_friendly_name(fc->get_friendly_name() + "_unsqueeze"); + ov::copy_runtime_info(fc, unsqueeze); ov::replace_node(fc, unsqueeze); return true; diff --git a/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp new file mode 100644 index 000000000000..44af1cb87f81 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp @@ -0,0 +1,142 @@ +// Copyright (C) 2018-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "intel_gpu/op/fully_connected.hpp" +#include "intel_gpu/op/placeholder.hpp" +#include "openvino/core/model.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/result.hpp" +#include "openvino/pass/manager.hpp" +#include "plugin/transformations/reduce_fc_dimensions.hpp" + +using namespace testing; +using namespace ov::intel_gpu; + +namespace ov { +namespace test { +namespace intel_gpu { + +// Regular case, transformation should trigger +TEST_F(TransformationTestsF, ReduceFCDimensions1) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, -1, 16}); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1}); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1}); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, -1, 16}); + auto squeeze_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{3}, {1, -1, 16}); + auto squeeze = std::make_shared(input1, squeeze_const, false); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1}); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(squeeze, scale, no_bias); + auto unsqueeze_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, -1, 32}); + auto unsqueeze = std::make_shared(fc, unsqueeze_const, false); + + model_ref = std::make_shared(ov::OutputVector{unsqueeze}, ov::ParameterVector{input1}); + } +} + +// Incorrect input size, transformation should not trigger +TEST_F(TransformationTestsF, ReduceFCDimensions2) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 4, -1, 16}); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1}); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1}); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 4, -1, 16}); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1}); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model_ref = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1}); + } +} + +// Bias present, transformation should not trigger +TEST_F(TransformationTestsF, ReduceFCDimensions3) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, -1, 16}); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1}); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto bias = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 1, 1, 32}, {1.0}); + auto fc = std::make_shared(input1, scale, bias); + + model = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1}); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, -1, 16}); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1}); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto bias = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 1, 1, 32}, {1}); + auto fc = std::make_shared(input1, scale, bias); + + model_ref = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1}); + } +} + +// 3D weight, transformation should not trigger +TEST_F(TransformationTestsF, ReduceFCDimensions4) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, -1, 16}); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{4, 32, 16}, {1}); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{4, 32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1}); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, -1, 16}); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{4, 32, 16}, {1}); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{4, 32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model_ref = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1}); + } +} + +} // namespace intel_gpu +} // namespace test +} // namespace ov From e9a098154ee21a5f1616fe1a9226e543269107cf Mon Sep 17 00:00:00 2001 From: "Dvoretckii, Mikhail" Date: Wed, 3 Jun 2026 06:13:08 -0700 Subject: [PATCH 3/6] Remove the squeeze/unsqueeze from ConvertWeightCompressedConv1x1ToMatmul --- ...vert_weight_compressed_conv1x1_to_matmul.cpp | 17 ----------------- ...weight_compressed_conv1x1_to_matmul_test.cpp | 8 -------- .../transformations/reduce_fc_dimensions.cpp | 2 +- 3 files changed, 1 insertion(+), 26 deletions(-) diff --git a/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp b/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp index 6d68c072d7d0..43565d21f4a0 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp @@ -233,23 +233,6 @@ ov::pass::ConvertWeightCompressedConv1x1ToMatmul::ConvertWeightCompressedConv1x1 } } - // If the activation has a static leading dimension of 1, squeeze it. - // This is done to allow pre-selection of OCL implementations for non-IMMAD devices, reducing memory pressure. - bool squeeze_activation = false; - auto act_pshape = activation->get_output_partial_shape(0); - if (act_pshape.rank().is_static() && act_pshape.rank().get_length() >= 4 && act_pshape[0].is_static() && - act_pshape[0] == 1) { - squeeze_activation = true; - auto squeeze_const = - std::make_shared(ov::element::i64, - ov::Shape{3}, - std::vector{1, -1, act_pshape[-1].get_length()}); - auto squeeze = std::make_shared(activation, squeeze_const, false); - ov::copy_runtime_info(activation, squeeze); - squeeze->set_friendly_name(activation->get_friendly_name() + "_squeeze"); - activation = squeeze; - } - auto matmul = std::make_shared(activation, scaled_weight, false, true); ov::copy_runtime_info(conv1x1, matmul); std::shared_ptr matmul_out; diff --git a/src/common/transformations/tests/op_conversions/convert_weight_compressed_conv1x1_to_matmul_test.cpp b/src/common/transformations/tests/op_conversions/convert_weight_compressed_conv1x1_to_matmul_test.cpp index 96b15debc1a2..8aea8529fd71 100644 --- a/src/common/transformations/tests/op_conversions/convert_weight_compressed_conv1x1_to_matmul_test.cpp +++ b/src/common/transformations/tests/op_conversions/convert_weight_compressed_conv1x1_to_matmul_test.cpp @@ -178,10 +178,6 @@ std::shared_ptr gen_model_ref(const Conv1x1ToMatmulTestParams& p) { auto reshape_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, input_batch, 10}); act_node = std::make_shared(input, reshape_const, false); } - if (input_batch == 1 || (p.activation_op_type == "Reshape" && p.with_act_new_reshape)) { - auto squeeze_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{3}, {1, input_batch, 10}); - act_node = std::make_shared(act_node, squeeze_const, false); - } auto matmul = std::make_shared(act_node, mul, false, true); current_node = matmul; @@ -189,10 +185,6 @@ std::shared_ptr gen_model_ref(const Conv1x1ToMatmulTestParams& p) { auto bias_const = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 1, 1, 15}, {1}); current_node = std::make_shared(current_node, bias_const); } - if (input_batch == 1 || (p.activation_op_type == "Reshape" && p.with_act_new_reshape)) { - auto unsqueeze_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, input_batch, 15}); - current_node = std::make_shared(current_node, unsqueeze_const, false); - } if (p.with_convert) { current_node = std::make_shared(current_node, ov::element::f32); } diff --git a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp index a6b141eb7ecb..456a7b3257fb 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp @@ -55,4 +55,4 @@ ReduceFCDimensions::ReduceFCDimensions() { this->register_matcher(m, callback); } -} // namespace ov::intel_gpu \ No newline at end of file +} // namespace ov::intel_gpu From 2d0d79d713fa498593fe750795394e8adcc20ddc Mon Sep 17 00:00:00 2001 From: "Dvoretckii, Mikhail" Date: Wed, 3 Jun 2026 06:25:44 -0700 Subject: [PATCH 4/6] Refactor negative tests --- .../reduce_fc_dimensions_test.cpp | 30 ++----------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp index 44af1cb87f81..4e07468eb06d 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp @@ -71,15 +71,7 @@ TEST_F(TransformationTestsF, ReduceFCDimensions2) { manager.register_pass(); } { - auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 4, -1, 16}); - auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1}); - auto convert = std::make_shared(weights_const, ov::element::f32); - auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1}); - auto scale = std::make_shared(convert, scale_const); - auto no_bias = std::make_shared(); - auto fc = std::make_shared(input1, scale, no_bias); - - model_ref = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1}); + model_ref = model->clone(); } } @@ -98,15 +90,7 @@ TEST_F(TransformationTestsF, ReduceFCDimensions3) { manager.register_pass(); } { - auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, -1, 16}); - auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1}); - auto convert = std::make_shared(weights_const, ov::element::f32); - auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1}); - auto scale = std::make_shared(convert, scale_const); - auto bias = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 1, 1, 32}, {1}); - auto fc = std::make_shared(input1, scale, bias); - - model_ref = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1}); + model_ref = model->clone(); } } @@ -125,15 +109,7 @@ TEST_F(TransformationTestsF, ReduceFCDimensions4) { manager.register_pass(); } { - auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, -1, 16}); - auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{4, 32, 16}, {1}); - auto convert = std::make_shared(weights_const, ov::element::f32); - auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{4, 32, 1}, {1}); - auto scale = std::make_shared(convert, scale_const); - auto no_bias = std::make_shared(); - auto fc = std::make_shared(input1, scale, no_bias); - - model_ref = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1}); + model_ref = model->clone(); } } From 4b798367406bcfa2b5dddb7b16721d27cd132191 Mon Sep 17 00:00:00 2001 From: "Dvoretckii, Mikhail" Date: Wed, 3 Jun 2026 06:47:03 -0700 Subject: [PATCH 5/6] Remove missed code --- .../convert_weight_compressed_conv1x1_to_matmul.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp b/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp index 43565d21f4a0..582a4f3f6b54 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp @@ -258,18 +258,6 @@ ov::pass::ConvertWeightCompressedConv1x1ToMatmul::ConvertWeightCompressedConv1x1 matmul_out = matmul; } - if (squeeze_activation) { - auto shape_out = matmul_out->get_output_partial_shape(0); - auto unsqueeze_const = - std::make_shared(ov::element::i64, - ov::Shape{4}, - std::vector{1, 1, -1, shape_out[-1].get_length()}); - auto unsqueeze = std::make_shared(matmul_out, unsqueeze_const, false); - ov::copy_runtime_info(matmul_out, unsqueeze); - unsqueeze->set_friendly_name(matmul_out->get_friendly_name() + "_unsqueeze"); - matmul_out = unsqueeze; - } - if (reshape_out) { if (convert_out) { auto convert_final = convert_out->clone_with_new_inputs({matmul_out}); From 8dd786d308750fb9d2dbe89977fa5166c8f8580c Mon Sep 17 00:00:00 2001 From: "Dvoretckii, Mikhail" Date: Fri, 5 Jun 2026 06:13:14 -0700 Subject: [PATCH 6/6] Adjust to dynamic weight shapes --- .../transformations/reduce_fc_dimensions.cpp | 11 ++++-- .../src/plugin/transformations_pipeline.cpp | 4 +- .../reduce_fc_dimensions_test.cpp | 38 +++++++++++++++++++ 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp index 456a7b3257fb..176983b05868 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp @@ -29,9 +29,13 @@ ReduceFCDimensions::ReduceFCDimensions() { auto no_bias = pattern_map.at(no_bias_m).get_node_shared_ptr(); auto fc = pattern_map.at(fc_m).get_node_shared_ptr(); - auto act_pshape = activations->get_output_partial_shape(0); + auto wei_pshape = weights->get_output_partial_shape(0); + // Do not apply in case of dynamic weight shape + if (wei_pshape.is_dynamic()) { + return false; + } auto squeeze_const = - std::make_shared(ov::element::i64, ov::Shape{3}, std::vector{1, -1, act_pshape[-1].get_length()}); + std::make_shared(ov::element::i64, ov::Shape{3}, std::vector{1, -1, wei_pshape[1].get_length()}); auto squeeze = std::make_shared(activations, squeeze_const, false); ov::copy_runtime_info(activations, squeeze); squeeze->set_friendly_name(activations->get_friendly_name() + "_squeeze"); @@ -39,9 +43,8 @@ ReduceFCDimensions::ReduceFCDimensions() { auto fc_new = fc->clone_with_new_inputs({squeeze, weights, no_bias}); ov::copy_runtime_info(fc, fc_new); - auto shape_out = fc_new->get_output_partial_shape(0); auto unsqueeze_const = - std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{1, 1, -1, shape_out[-1].get_length()}); + std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{1, 1, -1, wei_pshape[0].get_length()}); ov::copy_runtime_info(fc, unsqueeze_const); auto unsqueeze = std::make_shared(fc_new, unsqueeze_const, false); unsqueeze->set_friendly_name(fc->get_friendly_name() + "_unsqueeze"); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 8b2986cbcffd..4feb759d74d3 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -1518,7 +1518,9 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(device_info.supports_immad); manager.register_pass(); - manager.register_pass(); + if (!device_info.supports_immad) { + manager.register_pass(); + } manager.register_pass(); const bool disable_horizontal_fc_fusion = GPU_DEBUG_VALUE_OR(config.get_disable_horizontal_fc_fusion(), false); diff --git a/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp index 4e07468eb06d..99031cf28638 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp @@ -113,6 +113,44 @@ TEST_F(TransformationTestsF, ReduceFCDimensions4) { } } +// Dynamic result dim, transformation should not trigger +TEST_F(TransformationTestsF, ReduceFCDimensions5) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, -1, 16}); + auto weights_param = std::make_shared(ov::element::u8, ov::PartialShape{-1, 16}); + auto convert = std::make_shared(weights_param, ov::element::f32); + auto scale_param = std::make_shared(ov::element::f32, ov::PartialShape{-1, 1}); + auto scale = std::make_shared(convert, scale_param); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1, weights_param, scale_param}); + manager.register_pass(); + } + { + model_ref = model->clone(); + } +} + +// Dynamic inner dim, transformation should not trigger +TEST_F(TransformationTestsF, ReduceFCDimensions6) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, 10, -1}); + auto weights_param = std::make_shared(ov::element::u8, ov::PartialShape{32, -1}); + auto convert = std::make_shared(weights_param, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1, weights_param}); + manager.register_pass(); + } + { + model_ref = model->clone(); + } +} + } // namespace intel_gpu } // namespace test } // namespace ov