From 5fc6d012fc791e47d83c8cfa482d5aab8f3c17b7 Mon Sep 17 00:00:00 2001
From: Luwei Zhou <luwei.zhou@intel.com>
Date: Wed, 27 Sep 2023 05:39:52 +0200
Subject: [PATCH 1/4] RNN not specify weight layout when creating primitive
 descriptor.

RNN weight expose planar layout to cpu graph.
---
 src/plugins/intel_cpu/src/nodes/rnn.cpp | 43 ++++++++-----------------
 src/plugins/intel_cpu/src/nodes/rnn.h   |  3 --
 2 files changed, 13 insertions(+), 33 deletions(-)
diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp
index 9992f0f392b8..3f1f93d50bfa 100644
--- a/src/plugins/intel_cpu/src/nodes/rnn.cpp
+++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp
@@ -610,9 +610,11 @@ void RNN::fillCellDesc() {
         inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(shapeS, inDataTypes[cIdx], memory::format_tag::nc));
         outCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(shapeS, outDataTypes[coIdx], memory::format_tag::nc));
     }
-
+    // The weight and weights_iter would expose nc layout to avoid unnecessary reorder.
+    // The onednn would determine the final layout when prepareParams.
     inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(WShape, inDataTypes[wIdx], memory::format_tag::nc));
     inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(RShape, inDataTypes[rIdx], memory::format_tag::nc));
+
     inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(BShape, inDataTypes[bIdx], memory::format_tag::x));
 
     if (haveAttention(cell_type)) {
@@ -715,8 +717,11 @@ void RNN::fillSequenceDesc() {
     }
 
     inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(TShape, inDataTypes[sIdx], memory::format_tag::x)); // sequence lengths
-    inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(WShape, inDataTypes[wIdx], memory::format_tag::ntc)); // W
-    inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(RShape, inDataTypes[rIdx], memory::format_tag::ntc)); // R
+    // The weight and weights_iter would expose tnc layout to avoid unnecessary reorder.
+    // The onednn would determine the final layout when prepareParams.
+    inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(WShape, inDataTypes[wIdx], memory::format_tag::tnc)); // W
+    inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(RShape, inDataTypes[rIdx], memory::format_tag::tnc)); // R
+
     inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(BShape, inDataTypes[bIdx], memory::format_tag::nc)); // B
 
     if (haveAttention(cell_type)) {
@@ -885,9 +890,6 @@ void RNN::copyWeightsData() {
     if (dataType == memory::data_type::bf16) {
         fillWeights<uint16_t>(gate_map, wIdx, rIdx);
     } else if (dataType == memory::data_type::f32) {
-        // WA To avoid different weights layer and iter formats in FP32 case
-        if (T.minVal > 1 || N.maxVal < optimalBatchSize)
-            wFormat = dnnl::memory::format_tag::ldigo;
         fillWeights<float>(gate_map, wIdx, rIdx);
     } else if (dataType == memory::data_type::u8 || dataType == memory::data_type::s8) {
         fillWeights<int8_t>(gate_map, wIdx, rIdx);
@@ -1026,9 +1028,11 @@ void RNN::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
            since internalBlobs are used for the execution, not the initial weights */
         const auto& targetWeightDataType = weightsByinputDataType.at(inDataTypes[xIdx]);
         auto weightsDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, DC, G, SC });
-        wDescs[0] = dnnl::memory::desc(weightsDims, targetWeightDataType, wFormat);
+        //onednn determines the preferred weight layout.
+        wDescs[0] = dnnl::memory::desc(weightsDims, targetWeightDataType, memory::format_tag::any);
         auto statesDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, SC, G, SC });
-        wDescs[1] = dnnl::memory::desc(statesDims, targetWeightDataType, wFormat);
+        //onednn determines the preferred weights_iter layout.
+        wDescs[1] = dnnl::memory::desc(statesDims, targetWeightDataType, memory::format_tag::any);
         auto biasDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, Gb, SC });
         wDescs[2] = dnnl::memory::desc(biasDims, inDataTypes[bIdx], memory::format_tag::ldgo);
 
@@ -1053,7 +1057,7 @@ void RNN::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
         config.outConfs.push_back(dataConfig);
     }
 
-    supportedPrimitiveDescriptors.emplace_back(config, ref_any);
+    supportedPrimitiveDescriptors.emplace_back(config, parse_impl_name(descs[0].impl_info_str()));
 }
 
 Node::AttrPtr RNN::initPrimitiveAttr() {
@@ -1103,27 +1107,6 @@ void RNN::prepareParams() {
         inDataDescs[2] = std::make_shared<DnnlBlockedMemoryDesc>(Shape{SL, B, 1}, inDataTypes[aIdx], memory::format_tag::tnc);
     }
 
-    bool wFormatWasChanged = false;
-    // WA To avoid different weights layer and iter formats in FP32 case.
-    if (one_of(inDataTypes[xIdx], memory::data_type::f32) &&
-        (SL != 1 || B < optimalBatchSize)) {
-        if (wFormat != dnnl::memory::format_tag::ldigo) {
-            wFormat = dnnl::memory::format_tag::ldigo;
-            wFormatWasChanged = true;
-        }
-    } else if (wFormat != dnnl::memory::format_tag::any) {
-        wFormat = dnnl::memory::format_tag::any;
-        wFormatWasChanged = true;
-    }
-
-    if (wFormatWasChanged) {
-        auto weightsDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, DC, G, SC });
-        const auto& targetWeightDataType = weightsByinputDataType.at(inDataTypes[xIdx]);
-        wDescs[0] = dnnl::memory::desc(weightsDims, targetWeightDataType, wFormat);
-        auto statesDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, SC, G, SC });
-        wDescs[1] = dnnl::memory::desc(statesDims, targetWeightDataType, wFormat);
-    }
-
     const auto attr = initPrimitiveAttr();
     RNNKey key = { inDataDescs, outDataDescs, wDescs, cell_type, cell_act, direction, *attr };
 
diff --git a/src/plugins/intel_cpu/src/nodes/rnn.h b/src/plugins/intel_cpu/src/nodes/rnn.h
index d16bcd10c507..692b52108ad6 100644
--- a/src/plugins/intel_cpu/src/nodes/rnn.h
+++ b/src/plugins/intel_cpu/src/nodes/rnn.h
@@ -105,9 +105,6 @@ class RNN : public Node {
     /** activation type for vanilla RNN cell */
     dnnl::algorithm cell_act = dnnl::algorithm::undef;
 
-    /** Weights data and state memory format: ldigo or any */
-    dnnl::memory::format_tag wFormat = dnnl::memory::format_tag::any;
-
     struct Interval {
         Interval() = default;
 

From db6cff68729e0c2f7975bb753126720a4d6ff0f4 Mon Sep 17 00:00:00 2001
From: Luwei Zhou <luwei.zhou@intel.com>
Date: Wed, 27 Sep 2023 08:59:14 +0200
Subject: [PATCH 2/4] WA to resotore refany to pass tests..

---
 src/plugins/intel_cpu/src/nodes/rnn.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp
index 3f1f93d50bfa..7f305cbacb5a 100644
--- a/src/plugins/intel_cpu/src/nodes/rnn.cpp
+++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp
@@ -1057,7 +1057,7 @@ void RNN::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
         config.outConfs.push_back(dataConfig);
     }
 
-    supportedPrimitiveDescriptors.emplace_back(config, parse_impl_name(descs[0].impl_info_str()));
+    supportedPrimitiveDescriptors.emplace_back(config, ref_any);
 }
 
 Node::AttrPtr RNN::initPrimitiveAttr() {

From 4356738184c7b63b96bdc765018d85be62e8f4ad Mon Sep 17 00:00:00 2001
From: Luwei Zhou <luwei.zhou@intel.com>
Date: Wed, 27 Sep 2023 10:57:08 +0200
Subject: [PATCH 3/4] Remove the fusing Reshape + FC transformation in CPU
 plugin.

---
 .../common/pass/reshape_fc_fusion.cpp         | 76 -------------------
 .../common/pass/reshape_fc_fusion.hpp         | 19 -----
 .../convert_to_cpu_specific_opset.hpp         |  4 -
 3 files changed, 99 deletions(-)
 delete mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.cpp
 delete mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.hpp

diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.cpp
deleted file mode 100644
index bacc0bbef6e7..000000000000
--- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "reshape_fc_fusion.hpp"
-#include "transformations/cpu_opset/common/op/fully_connected.hpp"
-#include <numeric>
-#include <ngraph/opsets/opset1.hpp>
-#include <ngraph/rt_info.hpp>
-#include <ngraph/pattern/op/wrap_type.hpp>
-#include <ngraph/pattern/op/or.hpp>
-
-#include "itt.hpp"
-
-ov::intel_cpu::ReshapeFullyConnectedFusion::ReshapeFullyConnectedFusion() {
-    MATCHER_SCOPE(ReshapeFullyConnectedFusion);
-    auto m_reshape = ngraph::pattern::wrap_type<ngraph::opset1::Reshape>({ngraph::pattern::any_input(ov::pass::pattern::has_static_shape()),
-                                                                          ngraph::pattern::any_input()},
-                                                                         ngraph::pattern::has_static_shape());
-    ngraph::OutputVector fcInputs = {m_reshape, ngraph::pattern::any_input()};
-    auto fc = ngraph::pattern::wrap_type<ov::intel_cpu::FullyConnectedNode>(fcInputs, ngraph::pattern::has_static_shape());
-
-    ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) {
-        auto fc = std::dynamic_pointer_cast<ov::intel_cpu::FullyConnectedNode>(m.get_match_root());
-        if (!fc)
-            return false;
-        auto reshape = std::dynamic_pointer_cast<ngraph::opset1::Reshape>(fc->get_input_node_shared_ptr(0));
-        if (!reshape)
-            return false;
-
-        // Check that Reshape reshapes 4D tensor to 2D or input shape = output shape
-        auto shape_in = reshape->input_value(0).get_shape();
-        auto shape_out = reshape->get_shape();
-        if (!((shape_in.size() == 4 && reshape->get_shape().size() == 2) || (shape_in == shape_out && !shape_in.empty()))) {
-            return false;
-        }
-
-        // Check that Weights[O, C*H*W] consistent with Input[N, C, H, W]
-        auto shape_w = fc->input_value(1).get_shape();
-        if (shape_in[0] != shape_out[0] || std::accumulate(shape_in.begin() + 1, shape_in.end(), size_t{1}, std::multiplies<size_t>()) != shape_w[1]) {
-            return false;
-        }
-
-        ngraph::NodeVector new_ops;
-        auto weightInput = fc->input(1).get_source_output();
-        ngraph::Shape newWeightsShape;
-        const auto outShape = fc->get_shape();
-        if (shape_in.size() == 3) {
-            newWeightsShape = ngraph::Shape({outShape[2], shape_in[2]});
-        } else {
-            newWeightsShape.push_back(outShape[1]);
-            for (size_t i = 1; i < shape_in.size(); i++)
-                newWeightsShape.push_back(shape_in[i]);
-        }
-
-        if (newWeightsShape != weightInput.get_shape()) {
-            auto newShape = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{newWeightsShape.size()}, newWeightsShape);
-            weightInput = std::make_shared<ngraph::opset1::Reshape>(weightInput, newShape, true);
-            new_ops.push_back(weightInput.get_node_shared_ptr());
-        }
-
-        std::shared_ptr<ngraph::Node> new_fc = std::make_shared<ov::intel_cpu::FullyConnectedNode>(
-                                                                        reshape->input_value(0),
-                                                                        weightInput,
-                                                                        ngraph::Rank(outShape.size()),
-                                                                        fc->output(0).get_element_type());
-        new_ops.push_back(new_fc);
-        new_fc->set_friendly_name(fc->get_friendly_name());
-        ngraph::copy_runtime_info({reshape, fc}, new_ops);
-        ngraph::replace_node(fc, new_fc);
-        return true;
-    };
-
-    auto m = std::make_shared<ngraph::pattern::Matcher>(fc, matcher_name);
-    register_matcher(m, callback);
-}
diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.hpp
deleted file mode 100644
index 61c163533561..000000000000
--- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.hpp
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <ngraph/pass/graph_rewrite.hpp>
-
-namespace ov {
-namespace intel_cpu {
-
-class ReshapeFullyConnectedFusion : public ov::pass::MatcherPass {
-public:
-    OPENVINO_RTTI("ReshapeFullyConnectedFusion", "0");
-    ReshapeFullyConnectedFusion();
-};
-
-}   // namespace intel_cpu
-}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
index 3e2158e7a383..c3e052cbda14 100644
--- a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
@@ -5,7 +5,6 @@
 #include <ngraph/pass/constant_folding.hpp>
 #include "ngraph/op/fake_quantize.hpp"
 #include "ngraph/pass/manager.hpp"
-#include "common/pass/reshape_fc_fusion.hpp"
 #include "common/pass/align_matmul_input_ranks.hpp"
 #include "transformations/common_optimizations/reshape_prelu.hpp"
 #include "common/pass/convert_broadcast_to_tiles.hpp"
@@ -42,9 +41,6 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphF
     CPU_REGISTER_PASS_COMMON(manager, ConvertToLeakyRelu);
     CPU_REGISTER_PASS_COMMON(manager, ConvertToSwishCPU);
     CPU_REGISTER_PASS_COMMON(manager, OptimizeSequenceTransposes);
-    if (!ov::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc)) {
-        CPU_REGISTER_PASS_COMMON(manager, ReshapeFullyConnectedFusion);
-    }
     // after transformation "MoveEltwiseUpThroughDataMov" there can be reshaped sequences that should be eliminated or fused
     CPU_REGISTER_PASS_COMMON(manager, ov::pass::ReshapeSequenceFusion);
     CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConstantFolding);

From 88f33b7d9644a56e3759c67d07d41f4a766ce95e Mon Sep 17 00:00:00 2001
From: Luwei Zhou <luwei.zhou@intel.com>
Date: Mon, 4 Dec 2023 03:41:31 +0100
Subject: [PATCH 4/4] Restore reshape+fc fusion.

---
 .../common/pass/reshape_fc_fusion.cpp         | 76 +++++++++++++++++++
 .../common/pass/reshape_fc_fusion.hpp         | 19 +++++
 .../convert_to_cpu_specific_opset.hpp         |  4 +
 3 files changed, 99 insertions(+)
 create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.cpp
 create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.hpp

diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.cpp
new file mode 100644
index 000000000000..2606a6f53987
--- /dev/null
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.cpp
@@ -0,0 +1,76 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "reshape_fc_fusion.hpp"
+#include "transformations/cpu_opset/common/op/fully_connected.hpp"
+#include <numeric>
+#include <openvino/opsets/opset1.hpp>
+#include "openvino/core/rt_info.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "openvino/pass/pattern/op/or.hpp"
+
+#include "itt.hpp"
+
+ov::intel_cpu::ReshapeFullyConnectedFusion::ReshapeFullyConnectedFusion() {
+    MATCHER_SCOPE(ReshapeFullyConnectedFusion);
+    auto m_reshape = ov::pass::pattern::wrap_type<ov::opset1::Reshape>({ov::pass::pattern::any_input(ov::pass::pattern::has_static_shape()),
+                                                                          ov::pass::pattern::any_input()},
+                                                                         ov::pass::pattern::has_static_shape());
+    ov::OutputVector fcInputs = {m_reshape, ov::pass::pattern::any_input()};
+    auto fc = ov::pass::pattern::wrap_type<ov::intel_cpu::FullyConnectedNode>(fcInputs, ov::pass::pattern::has_static_shape());
+
+    ov::matcher_pass_callback callback = [](ov::pass::pattern::Matcher &m) {
+        auto fc = std::dynamic_pointer_cast<ov::intel_cpu::FullyConnectedNode>(m.get_match_root());
+        if (!fc)
+            return false;
+        auto reshape = std::dynamic_pointer_cast<ov::opset1::Reshape>(fc->get_input_node_shared_ptr(0));
+        if (!reshape)
+            return false;
+
+        // Check that Reshape reshapes 4D tensor to 2D or input shape = output shape
+        auto shape_in = reshape->input_value(0).get_shape();
+        auto shape_out = reshape->get_shape();
+        if (!((shape_in.size() == 4 && reshape->get_shape().size() == 2) || (shape_in == shape_out && !shape_in.empty()))) {
+            return false;
+        }
+
+        // Check that Weights[O, C*H*W] consistent with Input[N, C, H, W]
+        auto shape_w = fc->input_value(1).get_shape();
+        if (shape_in[0] != shape_out[0] || std::accumulate(shape_in.begin() + 1, shape_in.end(), size_t{1}, std::multiplies<size_t>()) != shape_w[1]) {
+            return false;
+        }
+
+        ov::NodeVector new_ops;
+        auto weightInput = fc->input(1).get_source_output();
+        ov::Shape newWeightsShape;
+        const auto outShape = fc->get_shape();
+        if (shape_in.size() == 3) {
+            newWeightsShape = ov::Shape({outShape[2], shape_in[2]});
+        } else {
+            newWeightsShape.push_back(outShape[1]);
+            for (size_t i = 1; i < shape_in.size(); i++)
+                newWeightsShape.push_back(shape_in[i]);
+        }
+
+        if (newWeightsShape != weightInput.get_shape()) {
+            auto newShape = std::make_shared<ov::opset1::Constant>(ov::element::i64, ov::Shape{newWeightsShape.size()}, newWeightsShape);
+            weightInput = std::make_shared<ov::opset1::Reshape>(weightInput, newShape, true);
+            new_ops.push_back(weightInput.get_node_shared_ptr());
+        }
+
+        std::shared_ptr<ov::Node> new_fc = std::make_shared<ov::intel_cpu::FullyConnectedNode>(
+                                                                        reshape->input_value(0),
+                                                                        weightInput,
+                                                                        ov::Rank(outShape.size()),
+                                                                        fc->output(0).get_element_type());
+        new_ops.push_back(new_fc);
+        new_fc->set_friendly_name(fc->get_friendly_name());
+        ov::copy_runtime_info({reshape, fc}, new_ops);
+        ov::replace_node(fc, new_fc);
+        return true;
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(fc, matcher_name);
+    register_matcher(m, callback);
+}
diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.hpp
new file mode 100644
index 000000000000..8bf7026ab198
--- /dev/null
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/reshape_fc_fusion.hpp
@@ -0,0 +1,19 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+class ReshapeFullyConnectedFusion : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ReshapeFullyConnectedFusion", "0");
+    ReshapeFullyConnectedFusion();
+};
+
+}   // namespace intel_cpu
+}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
index e334d11babe0..f0f5c3e44d0d 100644
--- a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
@@ -5,6 +5,7 @@
 #include "openvino/pass/constant_folding.hpp"
 #include "openvino/op/fake_quantize.hpp"
 #include "openvino/pass/manager.hpp"
+#include "common/pass/reshape_fc_fusion.hpp"
 #include "common/pass/align_matmul_input_ranks.hpp"
 #include "transformations/common_optimizations/reshape_prelu.hpp"
 #include "common/pass/convert_broadcast_to_tiles.hpp"
@@ -41,6 +42,9 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr<ov::Model> &nGraphFunc) {
     CPU_REGISTER_PASS_COMMON(manager, ConvertToLeakyRelu);
     CPU_REGISTER_PASS_COMMON(manager, ConvertToSwishCPU);
     CPU_REGISTER_PASS_COMMON(manager, OptimizeSequenceTransposes);
+    if (!ov::op::util::has_op_with_type<ov::op::v0::FakeQuantize>(nGraphFunc)) {
+        CPU_REGISTER_PASS_COMMON(manager, ReshapeFullyConnectedFusion);
+    }
     // after transformation "MoveEltwiseUpThroughDataMov" there can be reshaped sequences that should be eliminated or fused
     CPU_REGISTER_PASS_COMMON(manager, ov::pass::ReshapeSequenceFusion);
     CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConstantFolding);