diff --git a/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp b/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp index 6d68c072d7d0f6..582a4f3f6b5442 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp @@ -233,23 +233,6 @@ ov::pass::ConvertWeightCompressedConv1x1ToMatmul::ConvertWeightCompressedConv1x1 } } - // If the activation has a static leading dimension of 1, squeeze it. - // This is done to allow pre-selection of OCL implementations for non-IMMAD devices, reducing memory pressure. - bool squeeze_activation = false; - auto act_pshape = activation->get_output_partial_shape(0); - if (act_pshape.rank().is_static() && act_pshape.rank().get_length() >= 4 && act_pshape[0].is_static() && - act_pshape[0] == 1) { - squeeze_activation = true; - auto squeeze_const = - std::make_shared(ov::element::i64, - ov::Shape{3}, - std::vector{1, -1, act_pshape[-1].get_length()}); - auto squeeze = std::make_shared(activation, squeeze_const, false); - ov::copy_runtime_info(activation, squeeze); - squeeze->set_friendly_name(activation->get_friendly_name() + "_squeeze"); - activation = squeeze; - } - auto matmul = std::make_shared(activation, scaled_weight, false, true); ov::copy_runtime_info(conv1x1, matmul); std::shared_ptr matmul_out; @@ -275,18 +258,6 @@ ov::pass::ConvertWeightCompressedConv1x1ToMatmul::ConvertWeightCompressedConv1x1 matmul_out = matmul; } - if (squeeze_activation) { - auto shape_out = matmul_out->get_output_partial_shape(0); - auto unsqueeze_const = - std::make_shared(ov::element::i64, - ov::Shape{4}, - std::vector{1, 1, -1, shape_out[-1].get_length()}); - auto unsqueeze = std::make_shared(matmul_out, unsqueeze_const, false); - ov::copy_runtime_info(matmul_out, unsqueeze); - unsqueeze->set_friendly_name(matmul_out->get_friendly_name() + "_unsqueeze"); - matmul_out = unsqueeze; - } - if (reshape_out) { if (convert_out) { auto convert_final = convert_out->clone_with_new_inputs({matmul_out}); diff --git a/src/common/transformations/tests/op_conversions/convert_weight_compressed_conv1x1_to_matmul_test.cpp b/src/common/transformations/tests/op_conversions/convert_weight_compressed_conv1x1_to_matmul_test.cpp index 96b15debc1a2ff..8aea8529fd712f 100644 --- a/src/common/transformations/tests/op_conversions/convert_weight_compressed_conv1x1_to_matmul_test.cpp +++ b/src/common/transformations/tests/op_conversions/convert_weight_compressed_conv1x1_to_matmul_test.cpp @@ -178,10 +178,6 @@ std::shared_ptr gen_model_ref(const Conv1x1ToMatmulTestParams& p) { auto reshape_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, input_batch, 10}); act_node = std::make_shared(input, reshape_const, false); } - if (input_batch == 1 || (p.activation_op_type == "Reshape" && p.with_act_new_reshape)) { - auto squeeze_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{3}, {1, input_batch, 10}); - act_node = std::make_shared(act_node, squeeze_const, false); - } auto matmul = std::make_shared(act_node, mul, false, true); current_node = matmul; @@ -189,10 +185,6 @@ std::shared_ptr gen_model_ref(const Conv1x1ToMatmulTestParams& p) { auto bias_const = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 1, 1, 15}, {1}); current_node = std::make_shared(current_node, bias_const); } - if (input_batch == 1 || (p.activation_op_type == "Reshape" && p.with_act_new_reshape)) { - auto unsqueeze_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, input_batch, 15}); - current_node = std::make_shared(current_node, unsqueeze_const, false); - } if (p.with_convert) { current_node = std::make_shared(current_node, ov::element::f32); } diff --git a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp new file mode 100644 index 00000000000000..176983b0586898 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.cpp @@ -0,0 +1,61 @@ +// Copyright (C) 2018-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "reduce_fc_dimensions.hpp" +#include "intel_gpu/op/fully_connected.hpp" +#include "intel_gpu/op/placeholder.hpp" +#include "openvino/core/graph_util.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/pass/pattern/op/any.hpp" +#include "openvino/pass/pattern/op/pattern.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/utils.hpp" + +namespace ov::intel_gpu { + +ReduceFCDimensions::ReduceFCDimensions() { + auto activations_m = ov::pass::pattern::any_input(ov::pass::pattern::shape_matches("[1, 1, ?, ?]")); + auto weights_m = ov::pass::pattern::any_input(ov::pass::pattern::shape_matches("[?, ?]")); + auto no_bias_m = ov::pass::pattern::wrap_type(); + auto fc_m = ov::pass::pattern::wrap_type({activations_m, weights_m, no_bias_m}); + + ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + + auto activations = pattern_map.at(activations_m).get_node_shared_ptr(); + auto weights = pattern_map.at(weights_m).get_node_shared_ptr(); + auto no_bias = pattern_map.at(no_bias_m).get_node_shared_ptr(); + auto fc = pattern_map.at(fc_m).get_node_shared_ptr(); + + auto wei_pshape = weights->get_output_partial_shape(0); + // Do not apply in case of dynamic weight shape + if (wei_pshape.is_dynamic()) { + return false; + } + auto squeeze_const = + std::make_shared(ov::element::i64, ov::Shape{3}, std::vector{1, -1, wei_pshape[1].get_length()}); + auto squeeze = std::make_shared(activations, squeeze_const, false); + ov::copy_runtime_info(activations, squeeze); + squeeze->set_friendly_name(activations->get_friendly_name() + "_squeeze"); + + auto fc_new = fc->clone_with_new_inputs({squeeze, weights, no_bias}); + ov::copy_runtime_info(fc, fc_new); + + auto unsqueeze_const = + std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{1, 1, -1, wei_pshape[0].get_length()}); + ov::copy_runtime_info(fc, unsqueeze_const); + auto unsqueeze = std::make_shared(fc_new, unsqueeze_const, false); + unsqueeze->set_friendly_name(fc->get_friendly_name() + "_unsqueeze"); + ov::copy_runtime_info(fc, unsqueeze); + + ov::replace_node(fc, unsqueeze); + return true; + }; + + auto m = std::make_shared(fc_m, "ReduceFCDimensions"); + this->register_matcher(m, callback); +} + +} // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.hpp b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.hpp new file mode 100644 index 00000000000000..229427826c9f4b --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/reduce_fc_dimensions.hpp @@ -0,0 +1,17 @@ +// Copyright (C) 2018-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov::intel_gpu { + +class ReduceFCDimensions : public ov::pass::MatcherPass { +public: + OPENVINO_MATCHER_PASS_RTTI("ReduceFCDimensions"); + ReduceFCDimensions(); +}; + +} // namespace ov::intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index b6e281a2836831..4feb759d74d3bb 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -105,6 +105,7 @@ #include "plugin/transformations/move_fc_reshape_to_weights.hpp" #include "plugin/transformations/optimize_subsequent_reshapes.hpp" #include "plugin/transformations/print_model_statistics.hpp" +#include "plugin/transformations/reduce_fc_dimensions.hpp" #include "plugin/transformations/sink_reshape.hpp" #include "plugin/transformations/transpose_fusion.hpp" #include "plugin/transformations/unsqueeze_broadcast_reshape_matmul_fusion.hpp" @@ -1517,6 +1518,9 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(device_info.supports_immad); manager.register_pass(); + if (!device_info.supports_immad) { + manager.register_pass(); + } manager.register_pass(); const bool disable_horizontal_fc_fusion = GPU_DEBUG_VALUE_OR(config.get_disable_horizontal_fc_fusion(), false); diff --git a/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp new file mode 100644 index 00000000000000..99031cf286388f --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/transformations/reduce_fc_dimensions_test.cpp @@ -0,0 +1,156 @@ +// Copyright (C) 2018-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "intel_gpu/op/fully_connected.hpp" +#include "intel_gpu/op/placeholder.hpp" +#include "openvino/core/model.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/result.hpp" +#include "openvino/pass/manager.hpp" +#include "plugin/transformations/reduce_fc_dimensions.hpp" + +using namespace testing; +using namespace ov::intel_gpu; + +namespace ov { +namespace test { +namespace intel_gpu { + +// Regular case, transformation should trigger +TEST_F(TransformationTestsF, ReduceFCDimensions1) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, -1, 16}); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1}); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1}); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, -1, 16}); + auto squeeze_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{3}, {1, -1, 16}); + auto squeeze = std::make_shared(input1, squeeze_const, false); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1}); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(squeeze, scale, no_bias); + auto unsqueeze_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, -1, 32}); + auto unsqueeze = std::make_shared(fc, unsqueeze_const, false); + + model_ref = std::make_shared(ov::OutputVector{unsqueeze}, ov::ParameterVector{input1}); + } +} + +// Incorrect input size, transformation should not trigger +TEST_F(TransformationTestsF, ReduceFCDimensions2) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 4, -1, 16}); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1}); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1}); + manager.register_pass(); + } + { + model_ref = model->clone(); + } +} + +// Bias present, transformation should not trigger +TEST_F(TransformationTestsF, ReduceFCDimensions3) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, -1, 16}); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{32, 16}, {1}); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto bias = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 1, 1, 32}, {1.0}); + auto fc = std::make_shared(input1, scale, bias); + + model = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1}); + manager.register_pass(); + } + { + model_ref = model->clone(); + } +} + +// 3D weight, transformation should not trigger +TEST_F(TransformationTestsF, ReduceFCDimensions4) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, -1, 16}); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{4, 32, 16}, {1}); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{4, 32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1}); + manager.register_pass(); + } + { + model_ref = model->clone(); + } +} + +// Dynamic result dim, transformation should not trigger +TEST_F(TransformationTestsF, ReduceFCDimensions5) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, -1, 16}); + auto weights_param = std::make_shared(ov::element::u8, ov::PartialShape{-1, 16}); + auto convert = std::make_shared(weights_param, ov::element::f32); + auto scale_param = std::make_shared(ov::element::f32, ov::PartialShape{-1, 1}); + auto scale = std::make_shared(convert, scale_param); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1, weights_param, scale_param}); + manager.register_pass(); + } + { + model_ref = model->clone(); + } +} + +// Dynamic inner dim, transformation should not trigger +TEST_F(TransformationTestsF, ReduceFCDimensions6) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, 10, -1}); + auto weights_param = std::make_shared(ov::element::u8, ov::PartialShape{32, -1}); + auto convert = std::make_shared(weights_param, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{32, 1}, {1}); + auto scale = std::make_shared(convert, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model = std::make_shared(ov::OutputVector{fc}, ov::ParameterVector{input1, weights_param}); + manager.register_pass(); + } + { + model_ref = model->clone(); + } +} + +} // namespace intel_gpu +} // namespace test +} // namespace ov