From a5126ced9b081b230fe194019f110c75882e82d2 Mon Sep 17 00:00:00 2001
From: "Dvoretckii, Mikhail" <mikhail.dvoretckii@intel.com>
Date: Thu, 23 Apr 2026 01:19:30 -0700
Subject: [PATCH 1/6] [GPU] Introduce a transformation to reduce FullyConnected
 activation dimensions

---
 .../transformations/reduce_fc_dimensions.cpp  | 55 +++++++++++++++++++
 .../transformations/reduce_fc_dimensions.hpp  | 17 ++++++
 .../src/plugin/transformations_pipeline.cpp   |  2 +
 3 files changed, 74 insertions(+)
 create mode 100644 src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp
 create mode 100644 src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.hpp

diff --git a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp
new file mode 100644
index 000000000000..794850f8726d
--- /dev/null
+++ b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp
@@ -0,0 +1,55 @@
+// Copyright (C) 2018-2026 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "reduce_fc_dimensions.hpp"
+#include "intel_gpu/op/fully_connected.hpp"
+#include "intel_gpu/op/placeholder.hpp"
+#include "openvino/core/graph_util.hpp"
+#include "openvino/core/rt_info.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/pass/pattern/op/any.hpp"
+#include "openvino/pass/pattern/op/pattern.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "transformations/utils/utils.hpp"
+
+namespace ov::intel_gpu {
+
+ReduceFCDimensions::ReduceFCDimensions() {
+    auto activations_m = ov::pass::pattern::any_input(ov::pass::pattern::shape_matches("[1, 1, ?, ?]"));
+    auto weights_m = ov::pass::pattern::any_input(ov::pass::pattern::shape_matches("[?, ?]"));
+    auto no_bias_m = ov::pass::pattern::wrap_type<op::Placeholder>();
+    auto fc_m = ov::pass::pattern::wrap_type<op::FullyConnected>({activations_m, weights_m, no_bias_m});
+
+    ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+
+        auto activations = pattern_map.at(activations_m).get_node_shared_ptr();
+        auto weights = pattern_map.at(weights_m).get_node_shared_ptr();
+        auto no_bias = pattern_map.at(no_bias_m).get_node_shared_ptr();
+        auto fc = pattern_map.at(fc_m).get_node_shared_ptr();
+       
+        auto act_pshape = activations->get_output_partial_shape(0);
+        auto squeeze_const =
+            std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{3}, std::vector<int64_t>{1, -1, act_pshape[-1].get_length()});
+        auto squeeze = std::make_shared<ov::op::v1::Reshape>(activations, squeeze_const, false);
+        ov::copy_runtime_info(activations, squeeze);
+        squeeze->set_friendly_name(activations->get_friendly_name() + "_squeeze");
+
+        auto fc_new = fc->clone_with_new_inputs({squeeze, weights, no_bias});
+
+        auto shape_out = fc_new->get_output_partial_shape(0);
+        auto unsqueeze_const =
+            std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{1, 1, -1, shape_out[-1].get_length()});
+        auto unsqueeze = std::make_shared<ov::op::v1::Reshape>(fc_new, unsqueeze_const, false);
+        unsqueeze->set_friendly_name(fc->get_friendly_name() + "_unsqueeze");
+
+        ov::replace_node(fc, unsqueeze);
+        return true;
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(fc_m, "ReduceFCDimensions");
+    this->register_matcher(m, callback);
+}
+
+}  // namespace ov::intel_gpu
\ No newline at end of file
diff --git a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.hpp b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.hpp
new file mode 100644
index 000000000000..229427826c9f
--- /dev/null
+++ b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.hpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2018-2026 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+
+namespace ov::intel_gpu {
+
+class ReduceFCDimensions : public ov::pass::MatcherPass {
+public:
+    OPENVINO_MATCHER_PASS_RTTI("ReduceFCDimensions");
+    ReduceFCDimensions();
+};
+
+}  // namespace ov::intel_gpu
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index b6e281a28368..8b2986cbcffd 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -105,6 +105,7 @@
 #include "plugin/transformations/move_fc_reshape_to_weights.hpp"
 #include "plugin/transformations/optimize_subsequent_reshapes.hpp"
 #include "plugin/transformations/print_model_statistics.hpp"
+#include "plugin/transformations/reduce_fc_dimensions.hpp"
 #include "plugin/transformations/sink_reshape.hpp"
 #include "plugin/transformations/transpose_fusion.hpp"
 #include "plugin/transformations/unsqueeze_broadcast_reshape_matmul_fusion.hpp"
@@ -1517,6 +1518,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         manager.register_pass<ov::intel_gpu::ClampFP16Output>();
         manager.register_pass<ov::intel_gpu::ConvertMatMulToFullyConnected>(device_info.supports_immad);
         manager.register_pass<ov::intel_gpu::MoveFCReshapeToWeights>();
+        manager.register_pass<ov::intel_gpu::ReduceFCDimensions>();
         manager.register_pass<ov::intel_gpu::ConvertFullyConnectedToFullyConnectedCompressed>();
 
         const bool disable_horizontal_fc_fusion = GPU_DEBUG_VALUE_OR(config.get_disable_horizontal_fc_fusion(), false);

From 30ba6b5d4eb50e02eb8ac3a5fcd7675da956a3db Mon Sep 17 00:00:00 2001
From: "Dvoretckii, Mikhail" <mikhail.dvoretckii@intel.com>
Date: Mon, 27 Apr 2026 07:10:48 -0700
Subject: [PATCH 2/6] Add tests, fix metadata transfer

---
 .../transformations/reduce_fc_dimensions.cpp  |   3 +
 .../reduce_fc_dimensions_test.cpp             | 142 ++++++++++++++++++
 2 files changed, 145 insertions(+)
 create mode 100644 src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp

diff --git a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp
index 794850f8726d..a6b141eb7ecb 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp
@@ -37,12 +37,15 @@ ReduceFCDimensions::ReduceFCDimensions() {
         squeeze->set_friendly_name(activations->get_friendly_name() + "_squeeze");
 
         auto fc_new = fc->clone_with_new_inputs({squeeze, weights, no_bias});
+        ov::copy_runtime_info(fc, fc_new);
 
         auto shape_out = fc_new->get_output_partial_shape(0);
         auto unsqueeze_const =
             std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{1, 1, -1, shape_out[-1].get_length()});
+        ov::copy_runtime_info(fc, unsqueeze_const);
         auto unsqueeze = std::make_shared<ov::op::v1::Reshape>(fc_new, unsqueeze_const, false);
         unsqueeze->set_friendly_name(fc->get_friendly_name() + "_unsqueeze");
+        ov::copy_runtime_info(fc, unsqueeze);
 
         ov::replace_node(fc, unsqueeze);
         return true;
diff --git a/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp
new file mode 100644
index 000000000000..44af1cb87f81
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp
@@ -0,0 +1,142 @@
+// Copyright (C) 2018-2026 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+
+#include "common_test_utils/ov_test_utils.hpp"
+#include "intel_gpu/op/fully_connected.hpp"
+#include "intel_gpu/op/placeholder.hpp"
+#include "openvino/core/model.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/convert.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/parameter.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/op/result.hpp"
+#include "openvino/pass/manager.hpp"
+#include "plugin/transformations/reduce_fc_dimensions.hpp"
+
+using namespace testing;
+using namespace ov::intel_gpu;
+
+namespace ov {
+namespace test {
+namespace intel_gpu {
+
+// Regular case, transformation should trigger
+TEST_F(TransformationTestsF, ReduceFCDimensions1) {
+    {
+        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 1, -1, 16});
+        auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights_const, ov::element::f32);
+        auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1});
+        auto scale = std::make_shared<ov::op::v1::Multiply>(convert, scale_const);
+        auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
+        auto fc = std::make_shared<ov::intel_gpu::op::FullyConnected>(input1, scale, no_bias);
+
+        model = std::make_shared<ov::Model>(ov::OutputVector{fc}, ov::ParameterVector{input1});
+        manager.register_pass<ReduceFCDimensions>();
+    }
+    {
+        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 1, -1, 16});
+        auto squeeze_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{3}, {1, -1, 16});
+        auto squeeze = std::make_shared<ov::op::v1::Reshape>(input1, squeeze_const, false);
+        auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights_const, ov::element::f32);
+        auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1});
+        auto scale = std::make_shared<ov::op::v1::Multiply>(convert, scale_const);
+        auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
+        auto fc = std::make_shared<ov::intel_gpu::op::FullyConnected>(squeeze, scale, no_bias);
+        auto unsqueeze_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, -1, 32});
+        auto unsqueeze = std::make_shared<ov::op::v1::Reshape>(fc, unsqueeze_const, false);
+
+        model_ref = std::make_shared<ov::Model>(ov::OutputVector{unsqueeze}, ov::ParameterVector{input1});
+    }
+}
+
+// Incorrect input size, transformation should not trigger
+TEST_F(TransformationTestsF, ReduceFCDimensions2) {
+    {
+        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 4, -1, 16});
+        auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights_const, ov::element::f32);
+        auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1});
+        auto scale = std::make_shared<ov::op::v1::Multiply>(convert, scale_const);
+        auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
+        auto fc = std::make_shared<ov::intel_gpu::op::FullyConnected>(input1, scale, no_bias);
+
+        model = std::make_shared<ov::Model>(ov::OutputVector{fc}, ov::ParameterVector{input1});
+        manager.register_pass<ReduceFCDimensions>();
+    }
+    {
+        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 4, -1, 16});
+        auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights_const, ov::element::f32);
+        auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1});
+        auto scale = std::make_shared<ov::op::v1::Multiply>(convert, scale_const);
+        auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
+        auto fc = std::make_shared<ov::intel_gpu::op::FullyConnected>(input1, scale, no_bias);
+
+        model_ref = std::make_shared<ov::Model>(ov::OutputVector{fc}, ov::ParameterVector{input1});
+    }
+}
+
+// Bias present, transformation should not trigger
+TEST_F(TransformationTestsF, ReduceFCDimensions3) {
+    {
+        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 1, -1, 16});
+        auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights_const, ov::element::f32);
+        auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1});
+        auto scale = std::make_shared<ov::op::v1::Multiply>(convert, scale_const);
+        auto bias = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 1, 1, 32}, {1.0});
+        auto fc = std::make_shared<ov::intel_gpu::op::FullyConnected>(input1, scale, bias);
+
+        model = std::make_shared<ov::Model>(ov::OutputVector{fc}, ov::ParameterVector{input1});
+        manager.register_pass<ReduceFCDimensions>();
+    }
+    {
+        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 1, -1, 16});
+        auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights_const, ov::element::f32);
+        auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1});
+        auto scale = std::make_shared<ov::op::v1::Multiply>(convert, scale_const);
+        auto bias = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 1, 1, 32}, {1});
+        auto fc = std::make_shared<ov::intel_gpu::op::FullyConnected>(input1, scale, bias);
+
+        model_ref = std::make_shared<ov::Model>(ov::OutputVector{fc}, ov::ParameterVector{input1});
+    }
+}
+
+// 3D weight, transformation should not trigger
+TEST_F(TransformationTestsF, ReduceFCDimensions4) {
+    {
+        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 1, -1, 16});
+        auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{4, 32, 16}, {1});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights_const, ov::element::f32);
+        auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{4, 32, 1}, {1});
+        auto scale = std::make_shared<ov::op::v1::Multiply>(convert, scale_const);
+        auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
+        auto fc = std::make_shared<ov::intel_gpu::op::FullyConnected>(input1, scale, no_bias);
+
+        model = std::make_shared<ov::Model>(ov::OutputVector{fc}, ov::ParameterVector{input1});
+        manager.register_pass<ReduceFCDimensions>();
+    }
+    {
+        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 1, -1, 16});
+        auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{4, 32, 16}, {1});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights_const, ov::element::f32);
+        auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{4, 32, 1}, {1});
+        auto scale = std::make_shared<ov::op::v1::Multiply>(convert, scale_const);
+        auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
+        auto fc = std::make_shared<ov::intel_gpu::op::FullyConnected>(input1, scale, no_bias);
+
+        model_ref = std::make_shared<ov::Model>(ov::OutputVector{fc}, ov::ParameterVector{input1});
+    }
+}
+
+}  // namespace intel_gpu
+}  // namespace test
+}  // namespace ov

From e9a098154ee21a5f1616fe1a9226e543269107cf Mon Sep 17 00:00:00 2001
From: "Dvoretckii, Mikhail" <mikhail.dvoretckii@intel.com>
Date: Wed, 3 Jun 2026 06:13:08 -0700
Subject: [PATCH 3/6] Remove the squeeze/unsqueeze from
 ConvertWeightCompressedConv1x1ToMatmul

---
 ...vert_weight_compressed_conv1x1_to_matmul.cpp | 17 -----------------
 ...weight_compressed_conv1x1_to_matmul_test.cpp |  8 --------
 .../transformations/reduce_fc_dimensions.cpp    |  2 +-
 3 files changed, 1 insertion(+), 26 deletions(-)

diff --git a/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp b/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp
index 6d68c072d7d0..43565d21f4a0 100644
--- a/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp
@@ -233,23 +233,6 @@ ov::pass::ConvertWeightCompressedConv1x1ToMatmul::ConvertWeightCompressedConv1x1
             }
         }
 
-        // If the activation has a static leading dimension of 1, squeeze it.
-        // This is done to allow pre-selection of OCL implementations for non-IMMAD devices, reducing memory pressure.
-        bool squeeze_activation = false;
-        auto act_pshape = activation->get_output_partial_shape(0);
-        if (act_pshape.rank().is_static() && act_pshape.rank().get_length() >= 4 && act_pshape[0].is_static() &&
-            act_pshape[0] == 1) {
-            squeeze_activation = true;
-            auto squeeze_const =
-                std::make_shared<ov::op::v0::Constant>(ov::element::i64,
-                                                       ov::Shape{3},
-                                                       std::vector<int64_t>{1, -1, act_pshape[-1].get_length()});
-            auto squeeze = std::make_shared<ov::op::v1::Reshape>(activation, squeeze_const, false);
-            ov::copy_runtime_info(activation, squeeze);
-            squeeze->set_friendly_name(activation->get_friendly_name() + "_squeeze");
-            activation = squeeze;
-        }
-
         auto matmul = std::make_shared<ov::op::v0::MatMul>(activation, scaled_weight, false, true);
         ov::copy_runtime_info(conv1x1, matmul);
         std::shared_ptr<Node> matmul_out;
diff --git a/src/common/transformations/tests/op_conversions/convert_weight_compressed_conv1x1_to_matmul_test.cpp b/src/common/transformations/tests/op_conversions/convert_weight_compressed_conv1x1_to_matmul_test.cpp
index 96b15debc1a2..8aea8529fd71 100644
--- a/src/common/transformations/tests/op_conversions/convert_weight_compressed_conv1x1_to_matmul_test.cpp
+++ b/src/common/transformations/tests/op_conversions/convert_weight_compressed_conv1x1_to_matmul_test.cpp
@@ -178,10 +178,6 @@ std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) {
         auto reshape_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, input_batch, 10});
         act_node = std::make_shared<ov::opset1::Reshape>(input, reshape_const, false);
     }
-    if (input_batch == 1 || (p.activation_op_type == "Reshape" && p.with_act_new_reshape)) {
-        auto squeeze_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{3}, {1, input_batch, 10});
-        act_node = std::make_shared<ov::opset1::Reshape>(act_node, squeeze_const, false);
-    }
     auto matmul = std::make_shared<ov::op::v0::MatMul>(act_node, mul, false, true);
     current_node = matmul;
 
@@ -189,10 +185,6 @@ std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) {
         auto bias_const = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 1, 1, 15}, {1});
         current_node = std::make_shared<ov::opset1::Add>(current_node, bias_const);
     }
-    if (input_batch == 1 || (p.activation_op_type == "Reshape" && p.with_act_new_reshape)) {
-        auto unsqueeze_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, input_batch, 15});
-        current_node = std::make_shared<ov::opset1::Reshape>(current_node, unsqueeze_const, false);
-    }
     if (p.with_convert) {
         current_node = std::make_shared<ov::op::v0::Convert>(current_node, ov::element::f32);
     }
diff --git a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp
index a6b141eb7ecb..456a7b3257fb 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp
@@ -55,4 +55,4 @@ ReduceFCDimensions::ReduceFCDimensions() {
     this->register_matcher(m, callback);
 }
 
-}  // namespace ov::intel_gpu
\ No newline at end of file
+}  // namespace ov::intel_gpu

From 2d0d79d713fa498593fe750795394e8adcc20ddc Mon Sep 17 00:00:00 2001
From: "Dvoretckii, Mikhail" <mikhail.dvoretckii@intel.com>
Date: Wed, 3 Jun 2026 06:25:44 -0700
Subject: [PATCH 4/6] Refactor negative tests

---
 .../reduce_fc_dimensions_test.cpp             | 30 ++-----------------
 1 file changed, 3 insertions(+), 27 deletions(-)

diff --git a/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp
index 44af1cb87f81..4e07468eb06d 100644
--- a/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp
@@ -71,15 +71,7 @@ TEST_F(TransformationTestsF, ReduceFCDimensions2) {
         manager.register_pass<ReduceFCDimensions>();
     }
     {
-        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 4, -1, 16});
-        auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1});
-        auto convert = std::make_shared<ov::op::v0::Convert>(weights_const, ov::element::f32);
-        auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1});
-        auto scale = std::make_shared<ov::op::v1::Multiply>(convert, scale_const);
-        auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
-        auto fc = std::make_shared<ov::intel_gpu::op::FullyConnected>(input1, scale, no_bias);
-
-        model_ref = std::make_shared<ov::Model>(ov::OutputVector{fc}, ov::ParameterVector{input1});
+        model_ref = model->clone();
     }
 }
 
@@ -98,15 +90,7 @@ TEST_F(TransformationTestsF, ReduceFCDimensions3) {
         manager.register_pass<ReduceFCDimensions>();
     }
     {
-        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 1, -1, 16});
-        auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1});
-        auto convert = std::make_shared<ov::op::v0::Convert>(weights_const, ov::element::f32);
-        auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1});
-        auto scale = std::make_shared<ov::op::v1::Multiply>(convert, scale_const);
-        auto bias = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 1, 1, 32}, {1});
-        auto fc = std::make_shared<ov::intel_gpu::op::FullyConnected>(input1, scale, bias);
-
-        model_ref = std::make_shared<ov::Model>(ov::OutputVector{fc}, ov::ParameterVector{input1});
+        model_ref = model->clone();
     }
 }
 
@@ -125,15 +109,7 @@ TEST_F(TransformationTestsF, ReduceFCDimensions4) {
         manager.register_pass<ReduceFCDimensions>();
     }
     {
-        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 1, -1, 16});
-        auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{4, 32, 16}, {1});
-        auto convert = std::make_shared<ov::op::v0::Convert>(weights_const, ov::element::f32);
-        auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{4, 32, 1}, {1});
-        auto scale = std::make_shared<ov::op::v1::Multiply>(convert, scale_const);
-        auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
-        auto fc = std::make_shared<ov::intel_gpu::op::FullyConnected>(input1, scale, no_bias);
-
-        model_ref = std::make_shared<ov::Model>(ov::OutputVector{fc}, ov::ParameterVector{input1});
+        model_ref = model->clone();
     }
 }
 

From 4b798367406bcfa2b5dddb7b16721d27cd132191 Mon Sep 17 00:00:00 2001
From: "Dvoretckii, Mikhail" <mikhail.dvoretckii@intel.com>
Date: Wed, 3 Jun 2026 06:47:03 -0700
Subject: [PATCH 5/6] Remove missed code

---
 .../convert_weight_compressed_conv1x1_to_matmul.cpp  | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp b/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp
index 43565d21f4a0..582a4f3f6b54 100644
--- a/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp
@@ -258,18 +258,6 @@ ov::pass::ConvertWeightCompressedConv1x1ToMatmul::ConvertWeightCompressedConv1x1
             matmul_out = matmul;
         }
 
-        if (squeeze_activation) {
-            auto shape_out = matmul_out->get_output_partial_shape(0);
-            auto unsqueeze_const =
-                std::make_shared<ov::op::v0::Constant>(ov::element::i64,
-                                                       ov::Shape{4},
-                                                       std::vector<int64_t>{1, 1, -1, shape_out[-1].get_length()});
-            auto unsqueeze = std::make_shared<ov::op::v1::Reshape>(matmul_out, unsqueeze_const, false);
-            ov::copy_runtime_info(matmul_out, unsqueeze);
-            unsqueeze->set_friendly_name(matmul_out->get_friendly_name() + "_unsqueeze");
-            matmul_out = unsqueeze;
-        }
-
         if (reshape_out) {
             if (convert_out) {
                 auto convert_final = convert_out->clone_with_new_inputs({matmul_out});

From 8dd786d308750fb9d2dbe89977fa5166c8f8580c Mon Sep 17 00:00:00 2001
From: "Dvoretckii, Mikhail" <mikhail.dvoretckii@intel.com>
Date: Fri, 5 Jun 2026 06:13:14 -0700
Subject: [PATCH 6/6] Adjust to dynamic weight shapes

---
 .../transformations/reduce_fc_dimensions.cpp  | 11 ++++--
 .../src/plugin/transformations_pipeline.cpp   |  4 +-
 .../reduce_fc_dimensions_test.cpp             | 38 +++++++++++++++++++
 3 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp
index 456a7b3257fb..176983b05868 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp
@@ -29,9 +29,13 @@ ReduceFCDimensions::ReduceFCDimensions() {
         auto no_bias = pattern_map.at(no_bias_m).get_node_shared_ptr();
         auto fc = pattern_map.at(fc_m).get_node_shared_ptr();
        
-        auto act_pshape = activations->get_output_partial_shape(0);
+        auto wei_pshape = weights->get_output_partial_shape(0);
+        // Do not apply in case of dynamic weight shape
+        if (wei_pshape.is_dynamic()) {
+            return false;
+        }
         auto squeeze_const =
-            std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{3}, std::vector<int64_t>{1, -1, act_pshape[-1].get_length()});
+            std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{3}, std::vector<int64_t>{1, -1, wei_pshape[1].get_length()});
         auto squeeze = std::make_shared<ov::op::v1::Reshape>(activations, squeeze_const, false);
         ov::copy_runtime_info(activations, squeeze);
         squeeze->set_friendly_name(activations->get_friendly_name() + "_squeeze");
@@ -39,9 +43,8 @@ ReduceFCDimensions::ReduceFCDimensions() {
         auto fc_new = fc->clone_with_new_inputs({squeeze, weights, no_bias});
         ov::copy_runtime_info(fc, fc_new);
 
-        auto shape_out = fc_new->get_output_partial_shape(0);
         auto unsqueeze_const =
-            std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{1, 1, -1, shape_out[-1].get_length()});
+            std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{1, 1, -1, wei_pshape[0].get_length()});
         ov::copy_runtime_info(fc, unsqueeze_const);
         auto unsqueeze = std::make_shared<ov::op::v1::Reshape>(fc_new, unsqueeze_const, false);
         unsqueeze->set_friendly_name(fc->get_friendly_name() + "_unsqueeze");
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index 8b2986cbcffd..4feb759d74d3 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -1518,7 +1518,9 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         manager.register_pass<ov::intel_gpu::ClampFP16Output>();
         manager.register_pass<ov::intel_gpu::ConvertMatMulToFullyConnected>(device_info.supports_immad);
         manager.register_pass<ov::intel_gpu::MoveFCReshapeToWeights>();
-        manager.register_pass<ov::intel_gpu::ReduceFCDimensions>();
+        if (!device_info.supports_immad) {
+            manager.register_pass<ov::intel_gpu::ReduceFCDimensions>();
+        }
         manager.register_pass<ov::intel_gpu::ConvertFullyConnectedToFullyConnectedCompressed>();
 
         const bool disable_horizontal_fc_fusion = GPU_DEBUG_VALUE_OR(config.get_disable_horizontal_fc_fusion(), false);
diff --git a/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp
index 4e07468eb06d..99031cf28638 100644
--- a/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp
@@ -113,6 +113,44 @@ TEST_F(TransformationTestsF, ReduceFCDimensions4) {
     }
 }
 
+// Dynamic result dim, transformation should not trigger
+TEST_F(TransformationTestsF, ReduceFCDimensions5) {
+    {
+        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 1, -1, 16});
+        auto weights_param = std::make_shared<ov::op::v0::Parameter>(ov::element::u8, ov::PartialShape{-1, 16});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights_param, ov::element::f32);
+        auto scale_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{-1, 1});
+        auto scale = std::make_shared<ov::op::v1::Multiply>(convert, scale_param);
+        auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
+        auto fc = std::make_shared<ov::intel_gpu::op::FullyConnected>(input1, scale, no_bias);
+
+        model = std::make_shared<ov::Model>(ov::OutputVector{fc}, ov::ParameterVector{input1, weights_param, scale_param});
+        manager.register_pass<ReduceFCDimensions>();
+    }
+    {
+        model_ref = model->clone();
+    }
+}
+
+// Dynamic inner dim, transformation should not trigger
+TEST_F(TransformationTestsF, ReduceFCDimensions6) {
+    {
+        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 1, 10, -1});
+        auto weights_param = std::make_shared<ov::op::v0::Parameter>(ov::element::u8, ov::PartialShape{32, -1});
+        auto convert = std::make_shared<ov::op::v0::Convert>(weights_param, ov::element::f32);
+        auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1});
+        auto scale = std::make_shared<ov::op::v1::Multiply>(convert, scale_const);
+        auto no_bias = std::make_shared<ov::intel_gpu::op::Placeholder>();
+        auto fc = std::make_shared<ov::intel_gpu::op::FullyConnected>(input1, scale, no_bias);
+
+        model = std::make_shared<ov::Model>(ov::OutputVector{fc}, ov::ParameterVector{input1, weights_param});
+        manager.register_pass<ReduceFCDimensions>();
+    }
+    {
+        model_ref = model->clone();
+    }
+}
+
 }  // namespace intel_gpu
 }  // namespace test
 }  // namespace ov