From 0959856a8cfaad774c2160b0b8377e0fd6343445 Mon Sep 17 00:00:00 2001
From: Nikolai Shchegolev <nikolay.shchegolev@intel.com>
Date: Fri, 3 Feb 2023 14:54:49 +0400
Subject: [PATCH] [CPU] I64 native support.

---
 .../src/transformations/convert_precision.cpp |   12 +-
 .../include/ngraph/runtime/reference/mvn.hpp  |    1 -
 .../ngraph/runtime/reference/reduce_l1.hpp    |    7 +-
 .../ngraph/runtime/reference/reduce_l2.hpp    |    2 +-
 src/core/src/op/reduce_l1.cpp                 |    2 +
 src/core/src/op/reduce_l2.cpp                 |    4 +
 .../interface/ie_internal_plugin_config.hpp   |    5 +
 src/plugins/intel_cpu/src/config.cpp          |   11 +-
 src/plugins/intel_cpu/src/config.h            |    1 +
 .../intel_cpu/src/dnnl_extension_utils.cpp    |   56 +-
 .../src/emitters/x64/jit_eltwise_emitters.cpp | 1638 +++++++-----
 .../src/emitters/x64/jit_eltwise_emitters.hpp |  202 +-
 .../src/emitters/x64/jit_emitter.cpp          |   19 +-
 .../src/emitters/x64/jit_emitter.hpp          |   54 +-
 .../emitters/x64/jit_snippets_emitters.cpp    |    4 +
 src/plugins/intel_cpu/src/graph.cpp           |   25 +-
 src/plugins/intel_cpu/src/graph_optimizer.cpp |    1 -
 src/plugins/intel_cpu/src/node.cpp            |   16 +-
 src/plugins/intel_cpu/src/node.h              |   16 +-
 src/plugins/intel_cpu/src/nodes/broadcast.cpp |  111 +-
 src/plugins/intel_cpu/src/nodes/broadcast.h   |    8 +-
 .../src/nodes/common/cpu_convert.cpp          |    7 +-
 .../src/nodes/common/tile_broadcast_utils.cpp |   14 +-
 src/plugins/intel_cpu/src/nodes/concat.cpp    |   47 +-
 src/plugins/intel_cpu/src/nodes/concat.h      |    8 +-
 src/plugins/intel_cpu/src/nodes/convert.cpp   |   45 +-
 src/plugins/intel_cpu/src/nodes/convert.h     |    6 +-
 src/plugins/intel_cpu/src/nodes/cum_sum.cpp   |   20 +-
 src/plugins/intel_cpu/src/nodes/cum_sum.h     |    5 +-
 src/plugins/intel_cpu/src/nodes/def_conv.cpp  |   11 +-
 src/plugins/intel_cpu/src/nodes/eltwise.cpp   |  414 ++-
 src/plugins/intel_cpu/src/nodes/eltwise.h     |    7 +-
 .../executors/common/ref_opt_transpose.cpp    |    3 +-
 src/plugins/intel_cpu/src/nodes/eye.cpp       |    3 +-
 src/plugins/intel_cpu/src/nodes/gather.cpp    |  139 +-
 src/plugins/intel_cpu/src/nodes/gather.h      |   16 +-
 src/plugins/intel_cpu/src/nodes/gather_nd.cpp |  168 +-
 src/plugins/intel_cpu/src/nodes/gather_nd.h   |    8 +-
 .../intel_cpu/src/nodes/grid_sample.cpp       |    4 +-
 .../intel_cpu/src/nodes/grid_sample.hpp       |    6 +-
 src/plugins/intel_cpu/src/nodes/input.cpp     |   12 +-
 src/plugins/intel_cpu/src/nodes/input.h       |    6 +-
 .../nodes/kernels/x64/gather_uni_kernel.cpp   |    3 +
 .../nodes/kernels/x64/gather_uni_kernel.hpp   |    2 +
 .../src/nodes/kernels/x64/grid_sample.cpp     |   21 +-
 .../src/nodes/kernels/x64/grid_sample.hpp     |   21 +-
 .../src/nodes/kernels/x64/jit_kernel_base.cpp | 1072 ++++++--
 .../src/nodes/kernels/x64/jit_kernel_base.hpp |  284 ++-
 .../src/nodes/kernels/x64/reduce.cpp          | 1915 ++++++++++++++
 .../src/nodes/kernels/x64/reduce.hpp          |  246 ++
 .../src/nodes/kernels/x64/registers_pool.hpp  |    5 +-
 .../intel_cpu/src/nodes/mathematics.cpp       |   60 +-
 src/plugins/intel_cpu/src/nodes/mathematics.h |    6 +-
 src/plugins/intel_cpu/src/nodes/non_zero.cpp  |   34 +-
 src/plugins/intel_cpu/src/nodes/non_zero.h    |    5 +-
 src/plugins/intel_cpu/src/nodes/one_hot.cpp   |  101 +-
 src/plugins/intel_cpu/src/nodes/one_hot.h     |   16 +-
 src/plugins/intel_cpu/src/nodes/pooling.cpp   |    4 +-
 src/plugins/intel_cpu/src/nodes/range.cpp     |   14 +-
 src/plugins/intel_cpu/src/nodes/range.h       |    2 +-
 src/plugins/intel_cpu/src/nodes/reduce.cpp    | 2239 +++--------------
 src/plugins/intel_cpu/src/nodes/reduce.h      |  117 +-
 src/plugins/intel_cpu/src/nodes/reference.cpp |    9 +-
 src/plugins/intel_cpu/src/nodes/reference.h   |    4 +-
 src/plugins/intel_cpu/src/nodes/reorder.cpp   |    4 +-
 src/plugins/intel_cpu/src/nodes/reorder.h     |    4 +-
 src/plugins/intel_cpu/src/nodes/reshape.cpp   |   82 +-
 src/plugins/intel_cpu/src/nodes/reshape.h     |   16 +-
 src/plugins/intel_cpu/src/nodes/rnn.cpp       |   10 +-
 .../intel_cpu/src/nodes/scatter_update.cpp    |   28 +-
 .../intel_cpu/src/nodes/scatter_update.h      |    4 +-
 src/plugins/intel_cpu/src/nodes/shapeof.cpp   |   55 +-
 src/plugins/intel_cpu/src/nodes/shapeof.h     |    5 +-
 .../intel_cpu/src/nodes/shuffle_channels.cpp  |    5 +-
 src/plugins/intel_cpu/src/nodes/split.cpp     |   60 +-
 src/plugins/intel_cpu/src/nodes/split.h       |    4 +-
 .../intel_cpu/src/nodes/strided_slice.cpp     |   18 +-
 .../intel_cpu/src/nodes/strided_slice.h       |    2 +-
 src/plugins/intel_cpu/src/nodes/subgraph.cpp  |   46 +-
 src/plugins/intel_cpu/src/nodes/subgraph.h    |   11 +-
 .../intel_cpu/src/nodes/tensoriterator.cpp    |    2 +-
 src/plugins/intel_cpu/src/nodes/tile.cpp      |   33 +-
 src/plugins/intel_cpu/src/nodes/tile.h        |    4 +-
 src/plugins/intel_cpu/src/nodes/topk.cpp      |  126 +-
 src/plugins/intel_cpu/src/nodes/topk.h        |   15 +-
 src/plugins/intel_cpu/src/nodes/transpose.cpp |   56 +-
 src/plugins/intel_cpu/src/nodes/transpose.h   |    5 +-
 src/plugins/intel_cpu/src/nodes/unique.cpp    |  193 +-
 src/plugins/intel_cpu/src/nodes/unique.hpp    |    9 +-
 src/plugins/intel_cpu/src/plugin.cpp          |   26 +-
 .../convert_to_cpu_specific_opset.hpp         |   12 +-
 .../x64/pass/convert_precision_i64_i32.cpp    |  155 ++
 .../x64/pass/convert_precision_i64_i32.hpp    |   21 +
 .../transformation_pipeline.cpp               |   40 +-
 .../transformations/transformation_pipeline.h |    2 +-
 src/plugins/intel_cpu/src/utils/blob_dump.cpp |   12 +
 src/plugins/intel_cpu/src/utils/cpu_utils.hpp |   11 +-
 .../single_layer_tests/comparison.cpp         |   48 +-
 .../single_layer_tests/concat.cpp             |    7 +-
 .../single_layer_tests/eltwise.cpp            |    2 +-
 .../single_layer_tests/minimum_maximum.cpp    |   51 -
 .../non_max_suppression.cpp                   |   15 +
 .../single_layer_tests/range.cpp              |    3 +-
 .../single_layer_tests/reduce_ops.cpp         |    4 +-
 .../single_layer_tests/reshape.cpp            |    3 +-
 .../single_layer_tests/scatter_ND_update.cpp  |    1 +
 .../scatter_elements_update.cpp               |    1 +
 .../single_layer_tests/scatter_update.cpp     |    2 +-
 .../single_layer_tests/select.cpp             |    4 +-
 .../single_layer_tests/squeeze_unsqueeze.cpp  |    3 +-
 .../single_layer_tests/tile.cpp               |    3 +-
 .../skip_tests_config.cpp                     |    7 +-
 .../single_layer_tests/broadcast.cpp          |  123 +-
 .../single_layer_tests/classes/activation.cpp |   68 +-
 .../single_layer_tests/classes/activation.hpp |   16 +-
 .../single_layer_tests/classes/conversion.cpp |   93 +-
 .../single_layer_tests/classes/conversion.hpp |   34 +-
 .../single_layer_tests/classes/eltwise.cpp    |   48 +-
 .../single_layer_tests/classes/reduce.cpp     |  106 +-
 .../single_layer_tests/classes/reduce.hpp     |   37 +-
 .../single_layer_tests/classes/transpose.cpp  |   43 +-
 .../single_layer_tests/classes/transpose.hpp  |   30 +-
 .../functional/single_layer_tests/concat.cpp  |  133 +-
 .../functional/single_layer_tests/cum_sum.cpp |  121 +-
 .../functional/single_layer_tests/gather.cpp  |  190 +-
 .../single_layer_tests/gather_nd.cpp          |  106 +-
 .../instances/common/activation.cpp           |   22 +-
 .../instances/common/conversion.cpp           |    8 +-
 .../instances/common/reduce.cpp               |   59 +-
 .../instances/common/transpose.cpp            |   20 +-
 .../instances/x64/activation.cpp              |  107 +-
 .../instances/x64/conversion.cpp              |   61 +-
 .../instances/x64/eltwise.cpp                 |   51 +
 .../instances/x64/reduce.cpp                  |  453 +++-
 .../instances/x64/transpose.cpp               |   93 +-
 .../single_layer_tests/minimum_maximum.cpp    |  170 ++
 .../non_max_suppression.cpp                   |   53 +-
 .../functional/single_layer_tests/one_hot.cpp |  169 +-
 .../single_layer_tests/scatter_ND_update.cpp  |    1 +
 .../scatter_elements_update.cpp               |    1 +
 .../single_layer_tests/scatter_update.cpp     |   59 +-
 .../functional/single_layer_tests/split.cpp   |  169 +-
 .../single_layer_tests/strided_slice.cpp      |  101 +-
 .../functional/single_layer_tests/tile.cpp    |  115 +-
 .../functional/single_layer_tests/topk.cpp    |   49 +-
 .../functional/single_layer_tests/unique.cpp  |   52 +-
 .../functional/test_utils/cpu_test_utils.cpp  |   76 +-
 .../functional/test_utils/cpu_test_utils.hpp  |   17 +-
 src/plugins/intel_cpu/thirdparty/onednn       |    2 +-
 src/plugins/template/backend/ops/reduce.cpp   |   74 +
 .../template/backend/opset_int_tbl.hpp        |    2 +
 .../src/non_max_suppression.cpp               |   10 +-
 152 files changed, 8732 insertions(+), 4964 deletions(-)
 create mode 100644 src/plugins/intel_cpu/src/nodes/kernels/x64/reduce.cpp
 create mode 100644 src/plugins/intel_cpu/src/nodes/kernels/x64/reduce.hpp
 create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/convert_precision_i64_i32.cpp
 create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/convert_precision_i64_i32.hpp
 delete mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/minimum_maximum.cpp
 create mode 100644 src/plugins/intel_cpu/tests/functional/single_layer_tests/minimum_maximum.cpp
 create mode 100644 src/plugins/template/backend/ops/reduce.cpp

diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp
index b3b474c3b989ed..88fcc0e899e0ea 100644
--- a/src/common/transformations/src/transformations/convert_precision.cpp
+++ b/src/common/transformations/src/transformations/convert_precision.cpp
@@ -871,6 +871,14 @@ inline int32_t convert_value<uint32_t, int32_t>(uint32_t val) {
     return static_cast<int32_t>(val);
 }
 
+template <>
+inline int64_t convert_value<uint64_t, int64_t>(uint64_t val) {
+    if (val >= static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
+        return std::numeric_limits<int64_t>::max();
+    }
+    return static_cast<int64_t>(val);
+}
+
 namespace {
 template <ov::element::Type_t PREC_FROM, ov::element::Type_t PREC_TO>
 std::shared_ptr<ngraph::Node> change_constant_precision(std::shared_ptr<opset4::Constant>& constant) {
@@ -1110,7 +1118,9 @@ bool fuse_type_to_constant(const std::shared_ptr<ngraph::Node>& node,
     const auto& to = it->second;
     if (auto constant = ov::as_type_ptr<opset4::Constant>(node)) {
         std::shared_ptr<ngraph::Node> new_const;
-        if (from == ov::element::u64 && to == ov::element::i32) {
+        if (from == ov::element::u64 && to == ov::element::i64) {
+            new_const = change_constant_precision<ov::element::Type_t::u64, ov::element::Type_t::i64>(constant);
+        } else if (from == ov::element::u64 && to == ov::element::i32) {
             new_const = change_constant_precision<ov::element::Type_t::u64, ov::element::Type_t::i32>(constant);
         } else if (from == ov::element::i64 && to == ov::element::i32) {
             new_const = change_constant_precision<ov::element::Type_t::i64, ov::element::Type_t::i32>(constant);
diff --git a/src/core/reference/include/ngraph/runtime/reference/mvn.hpp b/src/core/reference/include/ngraph/runtime/reference/mvn.hpp
index 7ffc557b185cc7..89ddd2d27c5484 100644
--- a/src/core/reference/include/ngraph/runtime/reference/mvn.hpp
+++ b/src/core/reference/include/ngraph/runtime/reference/mvn.hpp
@@ -12,7 +12,6 @@
 #include <ngraph/runtime/reference/multiply.hpp>
 #include <ngraph/runtime/reference/sqrt.hpp>
 #include <ngraph/runtime/reference/subtract.hpp>
-#include <ngraph/runtime/reference/sum.hpp>
 #include <ngraph/shape.hpp>
 
 namespace ngraph {
diff --git a/src/core/reference/include/ngraph/runtime/reference/reduce_l1.hpp b/src/core/reference/include/ngraph/runtime/reference/reduce_l1.hpp
index 37477aa7e727f7..9ae28cf3e712f9 100644
--- a/src/core/reference/include/ngraph/runtime/reference/reduce_l1.hpp
+++ b/src/core/reference/include/ngraph/runtime/reference/reduce_l1.hpp
@@ -32,7 +32,12 @@ void reduce_l1(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduc
         const size_t out_idx =
             std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), uint64_t(0));
 
-        out[out_idx] = out[out_idx] + std::abs(arg[in_idx]);
+        // WA for abs function, due to it's not defined for some data types.
+        auto val = arg[in_idx];
+        if (val < T(0)) {
+            val *= T(-1);
+        }
+        out[out_idx] = out[out_idx] + val;
     }
     OPENVINO_SUPPRESS_DEPRECATED_END
 }
diff --git a/src/core/reference/include/ngraph/runtime/reference/reduce_l2.hpp b/src/core/reference/include/ngraph/runtime/reference/reduce_l2.hpp
index c338f340be8958..21918c9f5f010e 100644
--- a/src/core/reference/include/ngraph/runtime/reference/reduce_l2.hpp
+++ b/src/core/reference/include/ngraph/runtime/reference/reduce_l2.hpp
@@ -35,7 +35,7 @@ void reduce_l2(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduc
         out[out_idx] = out[out_idx] + arg[in_idx] * arg[in_idx];
     }
     std::transform(out, out + shape_size(out_shape), out, [](T elem) {
-        return sqrt(elem);
+        return static_cast<T>(std::sqrt(static_cast<double>(elem)));
     });
     OPENVINO_SUPPRESS_DEPRECATED_END
 }
diff --git a/src/core/src/op/reduce_l1.cpp b/src/core/src/op/reduce_l1.cpp
index 74d522a47f869d..8f6a88f0347363 100644
--- a/src/core/src/op/reduce_l1.cpp
+++ b/src/core/src/op/reduce_l1.cpp
@@ -43,6 +43,7 @@ bool evaluate_sum(const HostTensorPtr& arg, const HostTensorPtr& out, const Axis
     switch (arg->get_element_type()) {
         NGRAPH_TYPE_CASE(evaluate_reducel1_sum, i32, arg, out, axes, keep_dims);
         NGRAPH_TYPE_CASE(evaluate_reducel1_sum, i64, arg, out, axes, keep_dims);
+        NGRAPH_TYPE_CASE(evaluate_reducel1_sum, u64, arg, out, axes, keep_dims);
         NGRAPH_TYPE_CASE(evaluate_reducel1_sum, bf16, arg, out, axes, keep_dims);
         NGRAPH_TYPE_CASE(evaluate_reducel1_sum, f16, arg, out, axes, keep_dims);
         NGRAPH_TYPE_CASE(evaluate_reducel1_sum, f32, arg, out, axes, keep_dims);
@@ -73,6 +74,7 @@ bool op::v4::ReduceL1::has_evaluate() const {
     switch (get_input_element_type(0)) {
     case ngraph::element::i32:
     case ngraph::element::i64:
+    case ngraph::element::u64:
     case ngraph::element::bf16:
     case ngraph::element::f16:
     case ngraph::element::f32:
diff --git a/src/core/src/op/reduce_l2.cpp b/src/core/src/op/reduce_l2.cpp
index a56160415de1df..1f5e33c1c81154 100644
--- a/src/core/src/op/reduce_l2.cpp
+++ b/src/core/src/op/reduce_l2.cpp
@@ -44,6 +44,8 @@ bool evaluate_reduce_l2(const HostTensorPtr& arg, const HostTensorPtr& out, cons
         NGRAPH_TYPE_CASE(evaluate_reduce_l2, bf16, arg, out, axes, keep_dims);
         NGRAPH_TYPE_CASE(evaluate_reduce_l2, f16, arg, out, axes, keep_dims);
         NGRAPH_TYPE_CASE(evaluate_reduce_l2, f32, arg, out, axes, keep_dims);
+        NGRAPH_TYPE_CASE(evaluate_reduce_l2, i64, arg, out, axes, keep_dims);
+        NGRAPH_TYPE_CASE(evaluate_reduce_l2, u64, arg, out, axes, keep_dims);
     default:
         rc = false;
         break;
@@ -72,6 +74,8 @@ bool op::v4::ReduceL2::has_evaluate() const {
     case ngraph::element::bf16:
     case ngraph::element::f16:
     case ngraph::element::f32:
+    case ngraph::element::i64:
+    case ngraph::element::u64:
         return true;
     default:
         break;
diff --git a/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp b/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp
index eeac793acc7dcc..4af3d785d0fd41 100644
--- a/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp
+++ b/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp
@@ -110,6 +110,11 @@ INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(ENABLE);
 INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(IGNORE_CALLBACK);
 INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(DISABLE);
 
+/**
+ * @brief Enables inference with INT64 data type in CPU plugin if it's presented in the original model.
+ */
+DECLARE_CONFIG_KEY(CPU_NATIVE_I64);
+
 }  // namespace PluginConfigInternalParams
 
 }  // namespace InferenceEngine
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index a1a4eac265b3f7..245ac784619297 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -230,6 +230,15 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
                 IE_THROW() << "Wrong value for property key " << ov::hint::execution_mode.name()
                     << ". Supported values: PERFORMANCE, ACCURACY";
             }
+        } else if (key == PluginConfigInternalParams::KEY_CPU_NATIVE_I64) {
+            if (val == PluginConfigParams::YES) {
+                enableNativeI64 = true;
+            } else if (val == PluginConfigParams::NO) {
+                enableNativeI64 = false;
+            } else {
+                IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_CPU_NATIVE_I64 << ": " << val
+                                    << ". Expected only YES or NO values.";
+            }
         } else {
             IE_THROW(NotFound) << "Unsupported property " << key << " by CPU plugin";
         }
@@ -314,4 +323,4 @@ void Config::updateProperties() {
 }
 
 }  // namespace intel_cpu
-}   // namespace ov
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 4be16563c8991c..4e74e086252780 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -57,6 +57,7 @@ struct Config {
     // TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives
     size_t rtCacheCapacity = 0ul;
 #endif
+    bool enableNativeI64 = false;
     InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
     InferenceEngine::PerfHintsConfig  perfHintsConfig;
     bool enableCpuPinning = true;
diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
index 1cef0551d1eb08..0146c0cfa7b9af 100644
--- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
+++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
@@ -4,45 +4,43 @@
 
 #include "dnnl_extension_utils.h"
 
-#include "utils/general_utils.h"
 #include <oneapi/dnnl/dnnl.hpp>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
-#include "onednn/iml_type_mapper.h"
-#include <common/primitive_desc.hpp>
 #include <common/primitive_desc_iface.hpp>
 
-#include <vector>
-
 using namespace dnnl;
 
 namespace ov {
 namespace intel_cpu {
 
-uint8_t DnnlExtensionUtils::sizeOfDataType(dnnl::memory::data_type dataType) {
+uint8_t DnnlExtensionUtils::sizeOfDataType(memory::data_type dataType) {
     switch (dataType) {
-    case dnnl::memory::data_type::f32:
-        return 4;
-    case dnnl::memory::data_type::s32:
+    case memory::data_type::f64:
+    case memory::data_type::s64:
+        return 8;
+    case memory::data_type::f32:
+    case memory::data_type::s32:
         return 4;
-    case dnnl::memory::data_type::bf16:
+    case memory::data_type::bf16:
+    case memory::data_type::f16:
         return 2;
-    case dnnl::memory::data_type::s8:
-        return 1;
-    case dnnl::memory::data_type::u8:
+    case memory::data_type::s8:
+    case memory::data_type::u8:
+    case memory::data_type::bin:
         return 1;
-    case dnnl::memory::data_type::bin:
-        return 1;
-    case dnnl::memory::data_type::f16:
-        return 2;
-    case dnnl::memory::data_type::undef:
+    case memory::data_type::undef:
         return 0;
     default:
-        IE_THROW() << "Unsupported data type.";
+        IE_THROW() << "Unsupported data type: " << DataTypeToIEPrecision(dataType);
     }
 }
 
 memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngine::Precision& prec) {
     switch (prec) {
+        case InferenceEngine::Precision::FP64:
+            return memory::data_type::f64;
+        case InferenceEngine::Precision::I64:
+            return memory::data_type::s64;
         case InferenceEngine::Precision::FP32:
             return memory::data_type::f32;
         case InferenceEngine::Precision::I32:
@@ -68,6 +66,10 @@ memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngin
 
 InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::data_type dataType) {
     switch (dataType) {
+        case memory::data_type::f64:
+            return InferenceEngine::Precision::FP64;
+        case memory::data_type::s64:
+            return InferenceEngine::Precision::I64;
         case memory::data_type::f32:
             return InferenceEngine::Precision::FP32;
         case memory::data_type::s32:
@@ -90,11 +92,11 @@ InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::dat
     }
 }
 
-Dim DnnlExtensionUtils::convertToDim(const dnnl::memory::dim &dim) {
+Dim DnnlExtensionUtils::convertToDim(const memory::dim &dim) {
     return dim == DNNL_RUNTIME_DIM_VAL ?  Shape::UNDEFINED_DIM : static_cast<size_t>(dim);
 }
-dnnl::memory::dim DnnlExtensionUtils::convertToDnnlDim(const Dim &dim) {
-    return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast<dnnl::memory::dim>(dim);
+memory::dim DnnlExtensionUtils::convertToDnnlDim(const Dim &dim) {
+    return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast<memory::dim>(dim);
 }
 
 VectorDims DnnlExtensionUtils::convertToVectorDims(const memory::dims& dims) {
@@ -133,19 +135,19 @@ memory::format_tag DnnlExtensionUtils::GetPlainFormatByRank(size_t rank) {
     }
 }
 
-DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const dnnl::memory::desc &desc) {
+DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const memory::desc &desc) {
     return makeDescriptor(desc.get());
 }
 
 DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const_dnnl_memory_desc_t desc) {
-    if (desc->format_kind == dnnl::impl::format_kind_t::dnnl_blocked) {
+    if (desc->format_kind == impl::format_kind_t::dnnl_blocked) {
         return std::shared_ptr<DnnlBlockedMemoryDesc>(new DnnlBlockedMemoryDesc(desc));
     } else {
         return std::shared_ptr<DnnlMemoryDesc>(new DnnlMemoryDesc(desc));
     }
 }
 
-size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const dnnl::memory::desc& desc) {
+size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const memory::desc& desc) {
     auto tmpDesc = desc;
 
     const auto offset0 = tmpDesc.get()->offset0;
@@ -167,8 +169,8 @@ std::shared_ptr<DnnlBlockedMemoryDesc> DnnlExtensionUtils::makeUndefinedDesc(con
     }
 }
 
-DnnlMemoryDescPtr DnnlExtensionUtils::query_md(const const_dnnl_primitive_desc_t& pd, const dnnl::query& what, int idx) {
-    auto query = dnnl::convert_to_c(what);
+DnnlMemoryDescPtr DnnlExtensionUtils::query_md(const const_dnnl_primitive_desc_t& pd, const query& what, int idx) {
+    auto query = convert_to_c(what);
     const auto* cdesc = dnnl_primitive_desc_query_md(pd, query, idx);
 
     if (!cdesc)
diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.cpp
index 0ba374b68b93be..02bf09290e37c1 100644
--- a/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.cpp
@@ -36,9 +36,9 @@ InferenceEngine::Precision get_arithmetic_binary_exec_precision(const std::share
 
 /// ADD ///
 jit_add_emitter::jit_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node)
-: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
-jit_add_emitter::jit_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {}
+    : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
+jit_add_emitter::jit_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+    : jit_emitter(host, host_isa, exec_prc) {}
 
 size_t jit_add_emitter::get_inputs_num() const { return 2; }
 
@@ -50,42 +50,34 @@ void jit_add_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const st
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_add_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_add_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
 
-    auto uni_vadd = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
-        switch (exec_prc_) {
-            case Precision::FP32: h->uni_vaddps(vmm_dst, vmm_src0, vmm_src1); break;
-            case Precision::I32:  h->uni_vpaddd(vmm_dst, vmm_src0, vmm_src1); break;
-            default: assert(!"unsupported precision");
-        }
-    };
-
-    if (isa == x64::sse41) {
-        h->uni_vmovups(vmm_dst, vmm_src0);
-        uni_vadd(vmm_dst, vmm_dst, vmm_src1);
-    } else {
-        uni_vadd(vmm_dst, vmm_src0, vmm_src1);
+    switch (exec_prc_) {
+        case Precision::FP32: h->uni_vaddps(vmm_dst, vmm_src_0, vmm_src_1); break;
+        case Precision::I32:  h->uni_vpaddd(vmm_dst, vmm_src_0, vmm_src_1); break;
+        case Precision::I64:  h->uni_vpaddq(vmm_dst, vmm_src_0, vmm_src_1); break;
+        default: IE_THROW() << "jit_add_emitter doesn't support precision '" << exec_prc_ << "'";
     }
 }
 
-std::set<std::vector<element::Type>> jit_add_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
-    return {{element::f32, element::f32}, {element::i32, element::i32}};
+std::set<std::vector<element::Type>> jit_add_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}, {element::i64, element::i64}};
 }
 
 /// MUL_ADD ///
 jit_mul_add_emitter::jit_mul_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node)
-: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
-jit_mul_add_emitter::jit_mul_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {}
+    : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
+jit_mul_add_emitter::jit_mul_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+    : jit_emitter(host, host_isa, exec_prc) {}
 
 size_t jit_mul_add_emitter::get_inputs_num() const { return 3; }
 
@@ -97,86 +89,139 @@ void jit_mul_add_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, cons
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_mul_add_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_mul_add_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
-    Vmm vmm_src2 = Vmm(in_vec_idxs[2]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_2 = Vmm(in_vec_idxs[2]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
 
-    auto uni_vfmadd231_xmm = [this](Xmm vmm_dst, Xmm vmm_src0, Xmm vmm_src1, Xmm vmm_src2) {
-        h->uni_vmovups(vmm_dst, vmm_src0);
+    auto uni_madd_xmm = [this](const Xmm &vmm_dst, const Xmm &vmm_src_0, const Xmm &vmm_src_1, const Xmm &vmm_src_2) {
         switch (exec_prc_) {
             case Precision::FP32: {
-                h->uni_vmulps(vmm_dst, vmm_dst, vmm_src1);
-                h->uni_vaddps(vmm_dst, vmm_dst, vmm_src2);
+                if (vmm_dst.getIdx() == vmm_src_1.getIdx()) {
+                    h->uni_vmulps(vmm_dst, vmm_src_1, vmm_src_0);
+                    h->uni_vaddps(vmm_dst, vmm_dst, vmm_src_2);
+                } else if (vmm_dst.getIdx() == vmm_src_2.getIdx()) {
+                    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+                    h->uni_vmulps(vmm_aux_0, vmm_src_0, vmm_src_1);
+                    h->uni_vaddps(vmm_dst, vmm_dst, vmm_aux_0);
+                } else {
+                    h->uni_vmulps(vmm_dst, vmm_src_0, vmm_src_1);
+                    h->uni_vaddps(vmm_dst, vmm_dst, vmm_src_2);
+                }
             } break;
             case Precision::I32: {
-                h->uni_vpmulld(vmm_dst, vmm_dst, vmm_src1);
-                h->uni_vpaddd(vmm_dst, vmm_dst, vmm_src2);
+                if (vmm_dst.getIdx() == vmm_src_1.getIdx()) {
+                    h->uni_vpmulld(vmm_dst, vmm_src_1, vmm_src_0);
+                    h->uni_vpaddd(vmm_dst, vmm_dst, vmm_src_2);
+                } else if (vmm_dst.getIdx() == vmm_src_2.getIdx()) {
+                    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+                    h->uni_vpmulld(vmm_aux_0, vmm_src_0, vmm_src_1);
+                    h->uni_vpaddd(vmm_dst, vmm_dst, vmm_aux_0);
+                } else {
+                    h->uni_vpmulld(vmm_dst, vmm_src_0, vmm_src_1);
+                    h->uni_vpaddd(vmm_dst, vmm_dst, vmm_src_2);
+                }
+            } break;
+            case Precision::I64: {
+                Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+                Vmm vmm_aux_1 = Vmm(aux_vec_idxs[1]);
+                // There is no multiply int64 instruction on AVX2 and SSE41, thus the WA is used.
+                // vmm_src_0 = ab; vmm_src_1 = cd;
+                h->uni_vpsrlq(vmm_aux_0, vmm_src_0, 32);
+                h->uni_vpmuludq(vmm_aux_0, vmm_aux_0, vmm_src_1); // a * d
+                h->uni_vpsrlq(vmm_aux_1, vmm_src_1, 32);
+                h->uni_vpmuludq(vmm_aux_1, vmm_aux_1, vmm_src_0); // b * c
+                h->uni_vpaddq(vmm_aux_1, vmm_aux_1, vmm_aux_0);   // a * d + b * c
+                h->uni_vpsllq(vmm_aux_1, vmm_aux_1, 32);
+                h->uni_vpmuludq(vmm_aux_0, vmm_src_0, vmm_src_1); // b * d
+                h->uni_vpaddq(vmm_aux_0, vmm_aux_0, vmm_aux_1);   // (a * d + b * c) << 32 + b * d
+
+                h->uni_vpaddq(vmm_dst, vmm_src_2, vmm_aux_0);
             } break;
-            default: assert(!"unsupported precision");
+            default: IE_THROW() << "jit_mul_add_emitter doesn't support precision '" << exec_prc_ << "'";
         }
     };
 
-    auto uni_vfmadd231_vmm = [this, vmm_aux0](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1, Vmm vmm_src2) {
+    auto uni_madd_vmm = [this](const Vmm &vmm_dst, const Vmm &vmm_src_0, const Vmm &vmm_src_1, const Vmm &vmm_src_2) {
         switch (exec_prc_) {
             case Precision::FP32: {
-                Vmm vmm_mul0;
-                if (vmm_dst.getIdx() == vmm_src0.getIdx()) {
-                    h->uni_vmovups(vmm_aux0, vmm_src0);
-                    vmm_mul0 = vmm_aux0;
+                if (vmm_dst.getIdx() == vmm_src_0.getIdx()) {
+                    h->uni_vfmadd132ps(vmm_src_0, vmm_src_2, vmm_src_1);
+                } else if (vmm_dst.getIdx() == vmm_src_1.getIdx()) {
+                    h->uni_vfmadd132ps(vmm_src_1, vmm_src_2, vmm_src_0);
+                } else if (vmm_dst.getIdx() == vmm_src_2.getIdx()) {
+                    h->uni_vfmadd231ps(vmm_src_2, vmm_src_0, vmm_src_1);
                 } else {
-                    vmm_mul0 = vmm_src0;
+                    h->uni_vmovups(vmm_dst, vmm_src_2);
+                    h->uni_vfmadd231ps(vmm_dst, vmm_src_0, vmm_src_1);
                 }
-
-                Vmm vmm_mul1;
-                if (vmm_dst.getIdx() == vmm_src1.getIdx()) {
-                    h->uni_vmovups(vmm_aux0, vmm_src1);
-                    vmm_mul1 = vmm_aux0;
+            } break;
+            case Precision::I32: {
+                if (vmm_dst.getIdx() == vmm_src_2.getIdx()) {
+                    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+                    h->uni_vpmulld(vmm_aux_0, vmm_src_0, vmm_src_1);
+                    h->uni_vpaddd(vmm_dst, vmm_dst, vmm_aux_0);
                 } else {
-                    vmm_mul1 = vmm_src1;
+                    h->uni_vpmulld(vmm_dst, vmm_src_0, vmm_src_1);
+                    h->uni_vpaddd(vmm_dst, vmm_dst, vmm_src_2);
                 }
-
-                if (vmm_dst.getIdx() != vmm_src2.getIdx())
-                    h->uni_vmovups(vmm_dst, vmm_src2);
-
-                h->uni_vfmadd231ps(vmm_dst, vmm_mul0, vmm_mul1);
             } break;
-            case Precision::I32: {
-                h->uni_vpmulld(vmm_dst, vmm_src0, vmm_src1);
-                h->uni_vpaddd(vmm_dst, vmm_dst, vmm_src2);
+            case Precision::I64: {
+                if (isa == x64::avx512_core) {
+                    h->vpmullq(vmm_dst, vmm_src_0, vmm_src_1);
+                    h->uni_vpaddq(vmm_dst, vmm_dst, vmm_src_2);
+                } else {
+                    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+                    Vmm vmm_aux_1 = Vmm(aux_vec_idxs[1]);
+                    // There is no multiply int64 instruction on AVX2 and SSE41, thus the WA is used.
+                    // vmm_src_0 = ab; vmm_src_1 = cd;
+                    h->uni_vpsrlq(vmm_aux_0, vmm_src_0, 32);
+                    h->uni_vpmuludq(vmm_aux_0, vmm_aux_0, vmm_src_1); // a * d
+                    h->uni_vpsrlq(vmm_aux_1, vmm_src_1, 32);
+                    h->uni_vpmuludq(vmm_aux_1, vmm_aux_1, vmm_src_0); // b * c
+                    h->uni_vpaddq(vmm_aux_1, vmm_aux_1, vmm_aux_0);   // a * d + b * c
+                    h->uni_vpsllq(vmm_aux_1, vmm_aux_1, 32);
+                    h->uni_vpmuludq(vmm_aux_0, vmm_src_0, vmm_src_1); // b * d
+                    h->uni_vpaddq(vmm_aux_0, vmm_aux_0, vmm_aux_1);   // (a * d + b * c) << 32 + b * d
+
+                    h->uni_vpaddq(vmm_dst, vmm_aux_0, vmm_src_2);
+                }
             } break;
-            default: assert(!"unsupported precision");
+            default: IE_THROW() << "jit_mul_add_emitter doesn't support precision '" << exec_prc_ << "'";
         }
     };
 
     if (isa == x64::sse41) {
-        uni_vfmadd231_xmm(vmm_dst, vmm_src0, vmm_src1, vmm_src2);
+        uni_madd_xmm(vmm_dst, vmm_src_0, vmm_src_1, vmm_src_2);
     } else {
-        uni_vfmadd231_vmm(vmm_dst, vmm_src0, vmm_src1, vmm_src2);
+        uni_madd_vmm(vmm_dst, vmm_src_0, vmm_src_1, vmm_src_2);
     }
 }
 
-size_t jit_mul_add_emitter::aux_vecs_count() const {
-    return 1;
+std::set<std::vector<element::Type>> jit_mul_add_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
+    return {{element::f32, element::f32, element::f32}, {element::i32, element::i32, element::i32}, {element::i64, element::i64, element::i64}};
 }
 
-std::set<std::vector<element::Type>> jit_mul_add_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
-    return {{element::f32, element::f32, element::f32}, {element::i32, element::i32, element::i32}};
+size_t jit_mul_add_emitter::aux_vecs_count() const {
+    if (!x64::mayiuse(x64::avx512_core) && exec_prc_ == Precision::I64) {
+        return 2;
+    } else {
+        return 0;
+    }
 }
 
 /// SUB ///
 jit_subtract_emitter::jit_subtract_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node)
-: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
-jit_subtract_emitter::jit_subtract_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {}
+    : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
+jit_subtract_emitter::jit_subtract_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+    : jit_emitter(host, host_isa, exec_prc) {}
 
 size_t jit_subtract_emitter::get_inputs_num() const { return 2; }
 
@@ -188,42 +233,34 @@ void jit_subtract_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, con
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_subtract_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_subtract_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
 
-    auto uni_vsub = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
-        switch (exec_prc_) {
-            case Precision::FP32: h->uni_vsubps(vmm_dst, vmm_src0, vmm_src1); break;
-            case Precision::I32:  h->uni_vpsubd(vmm_dst, vmm_src0, vmm_src1); break;
-            default: assert(!"unsupported precision");
-        }
-    };
-
-    if (isa == x64::sse41) {
-        h->uni_vmovups(vmm_dst, vmm_src0);
-        uni_vsub(vmm_dst, vmm_dst, vmm_src1);
-    } else {
-        uni_vsub(vmm_dst, vmm_src0, vmm_src1);
+    switch (exec_prc_) {
+        case Precision::FP32: h->uni_vsubps(vmm_dst, vmm_src_0, vmm_src_1); break;
+        case Precision::I32:  h->uni_vpsubd(vmm_dst, vmm_src_0, vmm_src_1); break;
+        case Precision::I64:  h->uni_vpsubq(vmm_dst, vmm_src_0, vmm_src_1); break;
+        default: IE_THROW() << "jit_subtract_emitter doesn't support precision '" << exec_prc_ << "'";
     }
 }
 
-std::set<std::vector<element::Type>> jit_subtract_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
-    return {{element::f32, element::f32}, {element::i32, element::i32}};
+std::set<std::vector<element::Type>> jit_subtract_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}, {element::i64, element::i64}};
 }
 
 /// MULTIPLY ///
 jit_multiply_emitter::jit_multiply_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node)
-: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
-jit_multiply_emitter::jit_multiply_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {}
+    : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
+jit_multiply_emitter::jit_multiply_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+    : jit_emitter(host, host_isa, exec_prc) {}
 
 size_t jit_multiply_emitter::get_inputs_num() const { return 2; }
 
@@ -235,44 +272,82 @@ void jit_multiply_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, con
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_multiply_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
-void jit_multiply_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+void jit_multiply_emitter::emit_isa(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
-    Vmm vmm_dst = Vmm(out_vec_idxs[0]);
 
-    auto uni_vmul = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
-        switch (exec_prc_) {
-            case Precision::FP32: h->uni_vmulps(vmm_dst, vmm_src0, vmm_src1); break;
-            case Precision::I32:  h->uni_vpmulld(vmm_dst, vmm_src0, vmm_src1); break;
-            default: assert(!"unsupported precision");
-        }
-    };
-
-    if (isa == x64::sse41) {
-        h->uni_vmovups(vmm_dst, vmm_src0);
-        uni_vmul(vmm_dst, vmm_dst, vmm_src1);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Operand op_src_1;
+    if (in_vec_idxs.size() > 1) {
+        op_src_1 = Vmm(in_vec_idxs[1]);
+    } else if (aux_gpr_idxs.size() > 0) {
+        op_src_1 = h->ptr[Reg64(aux_gpr_idxs[0])];
     } else {
-        uni_vmul(vmm_dst, vmm_src0, vmm_src1);
+        IE_THROW() << "jit_multiply_emitter has invalid inputs number.";
+    }
+    Vmm vmm_dst = Vmm(out_vec_idxs[0]);
+
+    switch (exec_prc_) {
+        case Precision::FP32: h->uni_vmulps(vmm_dst, vmm_src_0, op_src_1); break;
+        case Precision::I32:  h->uni_vpmulld(vmm_dst, vmm_src_0, op_src_1); break;
+        case Precision::I64: {
+            if (isa == x64::avx512_core) {
+                h->vpmullq(vmm_dst, vmm_src_0, op_src_1);
+            } else {
+                if (aux_vec_idxs.size() < 2) {
+                    IE_THROW() << "jit_multiply_emitter has invalid number of aux vectors.";
+                }
+                auto vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+                auto vmm_aux_1 = Vmm(aux_vec_idxs[1]);
+                // There is no multiply int64 instruction on AVX2 and SSE41, thus the WA is used.
+                // Represent inputs as vmm_src_0 -> AB and op_src_1 -> CD
+                h->uni_vpsrlq(vmm_aux_0, vmm_src_0, 32);
+                h->uni_vpmuludq(vmm_aux_0, vmm_aux_0, op_src_1);      // A * D
+                if (!op_src_1.isMEM() && vmm_src_0.getIdx() == op_src_1.getIdx()) { // Optimization for the case of src ^ 2
+                    h->uni_vpaddq(vmm_aux_1, vmm_aux_0, vmm_aux_0);   // A * B + A * B
+                } else {
+                    h->uni_vpsrlq(vmm_aux_1, op_src_1, 32);
+                    h->uni_vpmuludq(vmm_aux_1, vmm_aux_1, vmm_src_0); // B * C
+                    h->uni_vpaddq(vmm_aux_1, vmm_aux_1, vmm_aux_0);   // A * D + B * C
+                }
+                h->uni_vpsllq(vmm_aux_1, vmm_aux_1, 32);
+                h->uni_vpmuludq(vmm_aux_0, vmm_src_0, op_src_1);      // B * D
+                h->uni_vpaddq(vmm_dst, vmm_aux_0, vmm_aux_1);         // (A * D + B * C) << 32 + B * D
+            }
+        } break;
+        default: IE_THROW() << "jit_multiply_emitter doesn't support precision '" << exec_prc_ << "'";
     }
 }
 
-std::set<std::vector<element::Type>> jit_multiply_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
-    return {{element::f32, element::f32}, {element::i32, element::i32}};
+std::set<std::vector<element::Type>> jit_multiply_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}, {element::i64, element::i64}};
+}
+
+size_t jit_multiply_emitter::aux_vecs_count() const {
+    if (exec_prc_ == Precision::I64 && !x64::mayiuse(x64::avx512_core)) {
+        return 2;
+    } else {
+        return 0;
+    }
 }
 
 /// DIVIDE ///
-jit_divide_emitter::jit_divide_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
-jit_divide_emitter::jit_divide_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {}
+jit_divide_emitter::jit_divide_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node)
+        : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {
+    prepare_table();
+}
+jit_divide_emitter::jit_divide_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+        : jit_emitter(host, host_isa, exec_prc) {
+    prepare_table();
+}
 
-size_t jit_divide_emitter::get_inputs_num() const { return 2; }
+size_t jit_divide_emitter::get_inputs_num() const {
+    return 2;
+}
 
 void jit_divide_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
@@ -282,64 +357,103 @@ void jit_divide_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_divide_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_divide_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
 
-    auto uni_vdiv = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
-        switch (exec_prc_) {
-            case Precision::FP32: {
-                h->uni_vdivps(vmm_dst, vmm_src0, vmm_src1);
-                break;
-            }
-            case Precision::I32: {
-                Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
-
-                // The opset doesn't contain vector instruction for integer divide operation
-                // As WA we emulate its behavior via fp divide followed by rounding to zero
-                h->uni_vcvtdq2ps(vmm_dst, vmm_src0);
-                h->uni_vcvtdq2ps(vmm_aux0, vmm_src1);
-                h->uni_vdivps(vmm_dst, vmm_dst, vmm_aux0);
-                h->uni_vroundps(vmm_dst, vmm_dst, 3); // rounding to zero
-                h->uni_vcvtps2dq(vmm_dst, vmm_dst);
-                break;
-            }
-            default: assert(!"unsupported precision");
+    // The opset doesn't contain vector instruction for integer divide operation
+    // As WA we emulate its behavior via fp divide followed by rounding to zero
+    switch (exec_prc_) {
+        case Precision::FP32: {
+            h->uni_vdivps(vmm_dst, vmm_src_0, vmm_src_1);
+            break;
         }
-    };
+        case Precision::I32: {
+            Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
 
-    if (isa == x64::sse41) {
-        h->uni_vmovups(vmm_dst, vmm_src0);
-        uni_vdiv(vmm_dst, vmm_dst, vmm_src1);
-    } else {
-        uni_vdiv(vmm_dst, vmm_src0, vmm_src1);
+            h->uni_vcvtdq2ps(vmm_dst, vmm_src_0);
+            if (second_is_float) {
+                h->uni_vdivps(vmm_dst, vmm_dst, vmm_src_1);
+            } else {
+                h->uni_vcvtdq2ps(vmm_aux_0, vmm_src_1);
+                h->uni_vdivps(vmm_dst, vmm_dst, vmm_aux_0);
+            }
+            h->uni_vroundps(vmm_dst, vmm_dst, 3); // rounding to zero
+            h->uni_vcvtps2dq(vmm_dst, vmm_dst);
+        } break;
+        case Precision::I64: {
+            if (isa == x64::avx512_core) {
+                Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+
+                h->vcvtqq2pd(vmm_dst, vmm_src_0);
+                if (second_is_float) {
+                    h->uni_vdivpd(vmm_dst, vmm_dst, vmm_src_1);
+                } else {
+                    h->vcvtqq2pd(vmm_aux_0, vmm_src_1);
+                    h->uni_vdivpd(vmm_dst, vmm_dst, vmm_aux_0);
+                }
+                h->uni_vroundpd(vmm_dst, vmm_dst, 3); // rounding to zero
+                h->vcvtpd2qq(vmm_dst, vmm_dst);
+            } else {
+                Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+                Vmm vmm_aux_1 = Vmm(aux_vec_idxs[1]);
+                h->uni_vmovups(vmm_aux_1, table_val_64("dMask"));
+
+                h->uni_vpaddq(vmm_dst,  vmm_src_0, vmm_aux_1);
+                h->uni_vsubpd(vmm_dst,  vmm_dst,  vmm_aux_1);
+
+                if (second_is_float) {
+                    h->uni_vdivpd(vmm_dst, vmm_dst, vmm_src_1);
+                } else {
+                    h->uni_vpaddq(vmm_aux_0, vmm_src_1, vmm_aux_1);
+                    h->uni_vsubpd(vmm_aux_0, vmm_aux_0, vmm_aux_1);
+
+                    h->uni_vdivpd(vmm_dst, vmm_dst, vmm_aux_0);
+                }
+                h->uni_vroundpd(vmm_dst, vmm_dst, 3); // rounding to zero
+
+                h->uni_vaddpd(vmm_dst, vmm_dst, vmm_aux_1);
+                h->uni_vpsubq(vmm_dst, vmm_dst, vmm_aux_1);
+            } break;
+        }
+        default: IE_THROW() << "jit_divide_emitter doesn't support precision '" << exec_prc_ << "'";
     }
 }
 
-std::set<std::vector<element::Type>> jit_divide_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
-    return {{element::f32, element::f32}, {element::i32, element::i32}};
+std::set<std::vector<element::Type>> jit_divide_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}, {element::i64, element::i64}};
 }
 
 size_t jit_divide_emitter::aux_vecs_count() const {
-    return exec_prc_ == Precision::I32 ? 1 : 0;
+    if (x64::mayiuse(x64::avx512_core)) {
+        return (exec_prc_ == Precision::I32 || exec_prc_ == Precision::I64) ? 1 : 0;
+    } else {
+        return exec_prc_ == Precision::I32 ? 1 : exec_prc_ == Precision::I64 ? 2 : 0;
+    }
+}
+
+void jit_divide_emitter::register_table_entries() {
+    if (host_isa_ != x64::avx512_core) {
+        push_arg_entry_of_64("dMask",  0x433800002150d000, true);
+    }
 }
 
 /// FLOOR ///
-jit_floor_emitter::jit_floor_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
-jit_floor_emitter::jit_floor_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {}
+jit_floor_emitter::jit_floor_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, const Precision& exec_prc)
+    : jit_emitter(host, host_isa, node, exec_prc) {}
+jit_floor_emitter::jit_floor_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+    : jit_emitter(host, host_isa, exec_prc) {}
 
 size_t jit_floor_emitter::get_inputs_num() const { return 1; }
 
-std::set<std::vector<element::Type>> jit_floor_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_floor_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32}};
 }
 
@@ -351,7 +465,7 @@ void jit_floor_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_floor_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
@@ -364,14 +478,14 @@ void jit_floor_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const s
 }
 
 /// CEILING ///
-jit_ceiling_emitter::jit_ceiling_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
+jit_ceiling_emitter::jit_ceiling_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, const Precision& exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_ceiling_emitter::jit_ceiling_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
+jit_ceiling_emitter::jit_ceiling_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
     : jit_emitter(host, host_isa, exec_prc) {}
 
 size_t jit_ceiling_emitter::get_inputs_num() const { return 1; }
 
-std::set<std::vector<element::Type>> jit_ceiling_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_ceiling_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32}};
 }
 
@@ -384,7 +498,7 @@ void jit_ceiling_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs,
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_ceiling_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
@@ -397,15 +511,23 @@ void jit_ceiling_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const
 }
 
 /// FLOOR_MOD ///
-jit_floor_mod_emitter::jit_floor_mod_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
-jit_floor_mod_emitter::jit_floor_mod_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {}
+jit_floor_mod_emitter::jit_floor_mod_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node,
+        const Precision& exec_prc) : jit_emitter(host, host_isa, node, exec_prc) {
+    prepare_table();
+}
+jit_floor_mod_emitter::jit_floor_mod_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+        : jit_emitter(host, host_isa, exec_prc) {
+    prepare_table();
+}
 
 size_t jit_floor_mod_emitter::get_inputs_num() const { return 2; }
 
-std::set<std::vector<element::Type>> jit_floor_mod_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
-    return {{element::f32, element::f32}};
+std::set<std::vector<element::Type>> jit_floor_mod_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
+    if (x64::mayiuse(x64::avx512_core)) {
+        return {{element::f32, element::f32}, {element::f64, element::f64}};
+    } else {
+        return {{element::f32, element::f32}, {element::i64, element::i64}};
+    }
 }
 
 void jit_floor_mod_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
@@ -416,49 +538,120 @@ void jit_floor_mod_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, co
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_floor_mod_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_floor_mod_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
+    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
 
-    if (isa == x64::sse41) {
-        if (vmm_dst.getIdx() != vmm_src0.getIdx())
-            h->uni_vmovups(vmm_dst, vmm_src0);
-        h->uni_vmovups(vmm_aux0, vmm_src0);
-        h->uni_vdivps(vmm_aux0, vmm_aux0, vmm_src1);
-        h->uni_vroundps(vmm_aux0, vmm_aux0, 1); // rounding down
-        h->uni_vmulps(vmm_aux0, vmm_aux0, vmm_src1);
-        h->uni_vsubps(vmm_dst, vmm_dst, vmm_aux0);
-    } else {
-        if (vmm_dst.getIdx() != vmm_src0.getIdx())
-            h->uni_vmovups(vmm_dst, vmm_src0);
-        h->uni_vdivps(vmm_aux0, vmm_src0, vmm_src1);
-        h->uni_vroundps(vmm_aux0, vmm_aux0, 1); // rounding down
-        h->uni_vmulps(vmm_aux0, vmm_aux0, vmm_src1);
-        h->uni_vsubps(vmm_dst, vmm_dst, vmm_aux0);
+    switch (exec_prc_) {
+        case Precision::FP32: {
+            if (isa == x64::sse41) {
+                if (vmm_dst.getIdx() != vmm_src_0.getIdx()) {
+                    h->uni_vmovups(vmm_dst, vmm_src_0);
+                }
+                h->uni_vdivps(vmm_aux_0, vmm_src_0, vmm_src_1);
+                h->uni_vroundps(vmm_aux_0, vmm_aux_0, 1); // rounding down
+                h->uni_vmulps(vmm_aux_0, vmm_aux_0, vmm_src_1);
+                h->uni_vsubps(vmm_dst, vmm_dst, vmm_aux_0);
+            } else {
+                if (vmm_dst.getIdx() != vmm_src_0.getIdx()) {
+                    h->uni_vdivps(vmm_dst, vmm_src_0, vmm_src_1);
+                    h->uni_vroundps(vmm_dst, vmm_dst, 1); // rounding down
+                    h->vfnmadd132ps(vmm_dst, vmm_src_0, vmm_src_1);
+                } else {
+                    h->uni_vdivps(vmm_aux_0, vmm_src_0, vmm_src_1);
+                    h->uni_vroundps(vmm_aux_0, vmm_aux_0, 1); // rounding down
+                    h->vfnmadd231ps(vmm_dst, vmm_aux_0, vmm_src_1);
+                }
+            }
+        } break;
+        case Precision::I64: {
+            Vmm vmm_aux_1 = Vmm(aux_vec_idxs[1]);
+
+            if (isa == x64::avx512_core) {
+                h->vcvtqq2pd(vmm_aux_0, vmm_src_0);
+                h->vcvtqq2pd(vmm_aux_1, vmm_src_1);
+
+                h->uni_vdivpd(vmm_dst, vmm_aux_0, vmm_aux_1);
+                h->uni_vroundpd(vmm_dst, vmm_dst, 1); // rounding down
+                h->vfnmadd132pd(vmm_dst, vmm_aux_0, vmm_aux_1);
+
+                h->vcvtpd2qq(vmm_dst, vmm_dst);
+            } else {
+                Vmm vmm_aux2 = Vmm(aux_vec_idxs[2]);
+                h->uni_vmovups(vmm_aux2, table_val_64("dMask"));
+
+                h->uni_vpaddq(vmm_aux_0, vmm_src_0, vmm_aux2);
+                h->uni_vsubpd(vmm_aux_0, vmm_aux_0, vmm_aux2);
+                h->uni_vpaddq(vmm_aux_1, vmm_src_1, vmm_aux2);
+                h->uni_vsubpd(vmm_aux_1, vmm_aux_1, vmm_aux2);
+
+                if (isa == x64::sse41) {
+                    h->uni_vdivpd(vmm_dst, vmm_aux_0, vmm_aux_1);
+                    h->uni_vroundpd(vmm_dst, vmm_dst, 1); // rounding down
+                    h->uni_vmulpd(vmm_aux_1, vmm_aux_1, vmm_dst);
+                    h->uni_vsubpd(vmm_dst, vmm_aux_0, vmm_aux_1);
+                } else {
+                    h->uni_vdivpd(vmm_dst, vmm_aux_0, vmm_aux_1);
+                    h->uni_vroundpd(vmm_dst, vmm_dst, 1); // rounding down
+                    h->vfnmadd132pd(vmm_dst, vmm_aux_0, vmm_aux_1);
+                }
+
+                h->uni_vaddpd(vmm_dst, vmm_dst, vmm_aux2);
+                h->uni_vpsubq(vmm_dst, vmm_dst, vmm_aux2);
+            }
+        } break;
+        case Precision::FP64: {
+            if (isa == x64::sse41) {
+                if (vmm_dst.getIdx() != vmm_src_0.getIdx()) {
+                    h->uni_vmovups(vmm_dst, vmm_src_0);
+                }
+                h->uni_vdivpd(vmm_aux_0, vmm_src_0, vmm_src_1);
+                h->uni_vroundpd(vmm_aux_0, vmm_aux_0, 1); // rounding down
+                h->uni_vmulpd(vmm_aux_0, vmm_aux_0, vmm_src_1);
+                h->uni_vsubpd(vmm_dst, vmm_dst, vmm_aux_0);
+            } else {
+                if (vmm_dst.getIdx() != vmm_src_0.getIdx()) {
+                    h->uni_vdivpd(vmm_dst, vmm_src_0, vmm_src_1);
+                    h->uni_vroundpd(vmm_dst, vmm_dst, 1); // rounding down
+                    h->vfnmadd132pd(vmm_dst, vmm_src_0, vmm_src_1);
+                } else {
+                    h->uni_vdivpd(vmm_aux_0, vmm_src_0, vmm_src_1);
+                    h->uni_vroundpd(vmm_aux_0, vmm_aux_0, 1); // rounding down
+                    h->vfnmadd231pd(vmm_dst, vmm_aux_0, vmm_src_1);
+                }
+            }
+        } break;
+        default: IE_THROW() << "jit_floor_mod_emitter doesn't support precision '" << exec_prc_ << "'";
     }
 }
 
 size_t jit_floor_mod_emitter::aux_vecs_count() const {
-    return 1;
+    return (host_isa_ != x64::avx512_core && exec_prc_ == Precision::I64) ? 3 : 1;
+}
+
+void jit_floor_mod_emitter::register_table_entries() {
+    if (host_isa_ != x64::avx512_core && exec_prc_ == Precision::I64) {
+        push_arg_entry_of_64("dMask",  0x433800002150d000, true);
+    }
 }
 
 /// MOD ///
-jit_mod_emitter::jit_mod_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
-jit_mod_emitter::jit_mod_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {}
+jit_mod_emitter::jit_mod_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, const Precision& exec_prc)
+    : jit_emitter(host, host_isa, node, exec_prc) {}
+jit_mod_emitter::jit_mod_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+    : jit_emitter(host, host_isa, exec_prc) {}
 
 size_t jit_mod_emitter::get_inputs_num() const { return 2; }
 
-std::set<std::vector<element::Type>> jit_mod_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_mod_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32, element::f32}};
 }
 
@@ -470,33 +663,33 @@ void jit_mod_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const st
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_mod_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_mod_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
+    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
 
     if (isa == x64::sse41) {
-        if (vmm_dst.getIdx() != vmm_src0.getIdx())
-            h->uni_vmovups(vmm_dst, vmm_src0);
-        h->uni_vmovups(vmm_aux0, vmm_src0);
-        h->uni_vdivps(vmm_aux0, vmm_aux0, vmm_src1);
-        h->uni_vroundps(vmm_aux0, vmm_aux0, 3); // truncate
-        h->uni_vmulps(vmm_aux0, vmm_aux0, vmm_src1);
-        h->uni_vsubps(vmm_dst, vmm_dst, vmm_aux0);
+        if (vmm_dst.getIdx() != vmm_src_0.getIdx())
+            h->uni_vmovups(vmm_dst, vmm_src_0);
+        h->uni_vmovups(vmm_aux_0, vmm_src_0);
+        h->uni_vdivps(vmm_aux_0, vmm_aux_0, vmm_src_1);
+        h->uni_vroundps(vmm_aux_0, vmm_aux_0, 3); // truncate
+        h->uni_vmulps(vmm_aux_0, vmm_aux_0, vmm_src_1);
+        h->uni_vsubps(vmm_dst, vmm_dst, vmm_aux_0);
     } else {
-        if (vmm_dst.getIdx() != vmm_src0.getIdx())
-            h->uni_vmovups(vmm_dst, vmm_src0);
-        h->uni_vdivps(vmm_aux0, vmm_src0, vmm_src1);
-        h->uni_vroundps(vmm_aux0, vmm_aux0, 3); // truncate
-        h->uni_vmulps(vmm_aux0, vmm_aux0, vmm_src1);
-        h->uni_vsubps(vmm_dst, vmm_dst, vmm_aux0);
+        if (vmm_dst.getIdx() != vmm_src_0.getIdx())
+            h->uni_vmovups(vmm_dst, vmm_src_0);
+        h->uni_vdivps(vmm_aux_0, vmm_src_0, vmm_src_1);
+        h->uni_vroundps(vmm_aux_0, vmm_aux_0, 3); // truncate
+        h->uni_vmulps(vmm_aux_0, vmm_aux_0, vmm_src_1);
+        h->uni_vsubps(vmm_dst, vmm_dst, vmm_aux_0);
     }
 }
 
@@ -506,9 +699,9 @@ size_t jit_mod_emitter::aux_vecs_count() const {
 
 /// MAXIMUM ///
 jit_maximum_emitter::jit_maximum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node)
-: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
-jit_maximum_emitter::jit_maximum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {}
+    : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
+jit_maximum_emitter::jit_maximum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+    : jit_emitter(host, host_isa, exec_prc) {}
 
 size_t jit_maximum_emitter::get_inputs_num() const { return 2; }
 
@@ -520,43 +713,71 @@ void jit_maximum_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, cons
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_maximum_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_maximum_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
-    Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-
-    auto uni_vmax = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
-        switch (exec_prc_) {
-            case Precision::FP32: h->uni_vmaxps(vmm_dst, vmm_src0, vmm_src1); break;
-            case Precision::I32:  h->uni_vpmaxsd(vmm_dst, vmm_src0, vmm_src1); break;
-            default: assert(!"unsupported precision");
-        }
-    };
 
-    if (isa == x64::sse41) {
-        if (vmm_src0.getIdx() != vmm_dst.getIdx())
-            h->uni_vmovups(vmm_dst, vmm_src0);
-        uni_vmax(vmm_dst, vmm_dst, vmm_src1);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Operand op_src_1;
+    if (in_vec_idxs.size() > 1) {
+        op_src_1 = Vmm(in_vec_idxs[1]);
+    } else if (aux_gpr_idxs.size() > 0) {
+        op_src_1 = h->ptr[Reg64(aux_gpr_idxs[0])];
     } else {
-        uni_vmax(vmm_dst, vmm_src0, vmm_src1);
+        IE_THROW() << "jit_maximum_emitter has invalid inputs number.";
+    }
+    Vmm vmm_dst = Vmm(out_vec_idxs[0]);
+
+    switch (exec_prc_) {
+        case Precision::FP32: h->uni_vmaxps(vmm_dst, vmm_src_0, op_src_1); break;
+        case Precision::I32:  h->uni_vpmaxsd(vmm_dst, vmm_src_0, op_src_1); break;
+        case Precision::I64: {
+            if (isa == x64::avx512_core) {
+                h->vpmaxsq(vmm_dst, vmm_src_0, op_src_1);
+            } else {
+                if (aux_vec_idxs.size() < 1) {
+                    IE_THROW() << "jit_maximum_emitter has invalid number of aux vectors.";
+                }
+                auto vmm_aux = Vmm(aux_vec_idxs[0]);
+                if (isa == x64::avx2) {
+                    h->vpcmpgtq(vmm_aux, vmm_src_0, op_src_1);
+                    h->vandpd(vmm_dst, vmm_src_0, vmm_aux);
+                    h->vandnpd(vmm_aux, vmm_aux, op_src_1);
+                    h->vorpd(vmm_dst, vmm_dst, vmm_aux);
+                } else {
+                    h->movups(vmm_aux, vmm_src_0);
+                    h->pcmpgtq(vmm_aux, op_src_1);
+                    h->andpd(vmm_aux, vmm_src_0);
+                    if (vmm_dst.getIdx() != vmm_src_0.getIdx()) {
+                        h->movups(vmm_dst, vmm_src_0);
+                    }
+                    h->pcmpgtq(vmm_dst, op_src_1);
+                    h->andnpd(vmm_dst, op_src_1);
+                    h->orpd(vmm_dst, vmm_aux);
+                }
+            }
+        } break;
+        default: IE_THROW() << "jit_maximum_emitter doesn't support precision '" << exec_prc_ << "'";
     }
 }
 
-std::set<std::vector<element::Type>> jit_maximum_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
-    return {{element::f32, element::f32}, {element::i32, element::i32}};
+std::set<std::vector<element::Type>> jit_maximum_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}, {element::i64, element::i64}};
+}
+
+size_t jit_maximum_emitter::aux_vecs_count() const {
+    return (host_isa_ != x64::avx512_core && exec_prc_ == Precision::I64) ? 1 : 0;
 }
 
 /// MINIMUM ///
 jit_minimum_emitter::jit_minimum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node)
-: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
-jit_minimum_emitter::jit_minimum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {}
+    : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
+jit_minimum_emitter::jit_minimum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+    : jit_emitter(host, host_isa, exec_prc) {}
 
 size_t jit_minimum_emitter::get_inputs_num() const { return 2; }
 
@@ -568,44 +789,75 @@ void jit_minimum_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, cons
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_minimum_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_minimum_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
-    Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-
-    auto uni_vmin = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
-        switch (exec_prc_) {
-            case Precision::FP32: h->uni_vminps(vmm_dst, vmm_src0, vmm_src1); break;
-            case Precision::I32:  h->uni_vpminsd(vmm_dst, vmm_src0, vmm_src1); break;
-            default: assert(!"unsupported precision");
-        }
-    };
 
-    if (isa == x64::sse41) {
-        if (vmm_src0.getIdx() != vmm_dst.getIdx())
-            h->uni_vmovups(vmm_dst, vmm_src0);
-        uni_vmin(vmm_dst, vmm_dst, vmm_src1);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Operand op_src_1;
+    if (in_vec_idxs.size() > 1) {
+        op_src_1 = Vmm(in_vec_idxs[1]);
+    } else if (aux_gpr_idxs.size() > 0) {
+        op_src_1 = h->ptr[Reg64(aux_gpr_idxs[0])];
     } else {
-        uni_vmin(vmm_dst, vmm_src0, vmm_src1);
+        IE_THROW() << "jit_minimum_emitter has invalid inputs number.";
     }
+    Vmm vmm_dst = Vmm(out_vec_idxs[0]);
+
+    switch (exec_prc_) {
+        case Precision::FP32: h->uni_vminps(vmm_dst, vmm_src_0, op_src_1); break;
+        case Precision::I32:  h->uni_vpminsd(vmm_dst, vmm_src_0, op_src_1); break;
+        case Precision::I64: {
+            if (isa == x64::avx512_core) {
+                h->vpminsq(vmm_dst, vmm_src_0, op_src_1);
+            } else {
+                if (aux_vec_idxs.size() < 1) {
+                    IE_THROW() << "jit_minimum_emitter has invalid number of aux vectors.";
+                }
+                auto vmm_aux = Vmm(aux_vec_idxs[0]);
+                if (isa == x64::avx2) {
+                    h->vpcmpgtq(vmm_aux, vmm_src_0, op_src_1);
+                    h->vandnpd(vmm_dst, vmm_aux, vmm_src_0);
+                    h->vandpd(vmm_aux, vmm_aux, op_src_1);
+                    h->vorpd(vmm_dst, vmm_dst, vmm_aux);
+                } else {
+                    h->movups(vmm_aux, vmm_src_0);
+                    h->pcmpgtq(vmm_aux, op_src_1);
+                    h->andpd(vmm_aux, op_src_1);
+                    if (vmm_dst.getIdx() != vmm_src_0.getIdx()) {
+                        h->movups(vmm_dst, vmm_src_0);
+                    }
+                    h->pcmpgtq(vmm_dst, op_src_1);
+                    h->andnpd(vmm_dst, vmm_src_0);
+                    h->orpd(vmm_dst, vmm_aux);
+                }
+            }
+        } break;
+        default: IE_THROW() << "jit_minimum_emitter doesn't support precision '" << exec_prc_ << "'";
+    }
+}
+
+std::set<std::vector<element::Type>> jit_minimum_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}, {element::i64, element::i64}};
 }
 
-std::set<std::vector<element::Type>> jit_minimum_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
-    return {{element::f32, element::f32}, {element::i32, element::i32}};
+size_t jit_minimum_emitter::aux_vecs_count() const {
+    return (host_isa_ != x64::avx512_core && exec_prc_ == Precision::I64) ? 1 : 0;
 }
 
 /// SQUARED_DIFFERENCE ///
-jit_squared_difference_emitter::jit_squared_difference_emitter(
-    x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
-jit_squared_difference_emitter::jit_squared_difference_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {}
+jit_squared_difference_emitter::jit_squared_difference_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node,
+        const Precision& exec_prc) : jit_emitter(host, host_isa, node, exec_prc) {
+    prepare_table();
+}
+jit_squared_difference_emitter::jit_squared_difference_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+        : jit_emitter(host, host_isa, exec_prc) {
+    prepare_table();
+}
 
 size_t jit_squared_difference_emitter::get_inputs_num() const { return 2; }
 
@@ -617,54 +869,66 @@ void jit_squared_difference_emitter::emit_impl(const std::vector<size_t> &in_vec
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_squared_difference_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_squared_difference_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
 
-    auto uni_vsqdiff = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
-        switch (exec_prc_) {
-            case Precision::FP32: {
-                h->uni_vsubps(vmm_dst, vmm_src0, vmm_src1);
-                h->uni_vmulps(vmm_dst, vmm_dst, vmm_dst);
-            } break;
-            case Precision::I32: {
-                h->uni_vpsubd(vmm_dst, vmm_src0, vmm_src1);
-                h->uni_vpmulld(vmm_dst, vmm_dst, vmm_dst);
-            } break;
-            default: assert(!"unsupported precision");
-        }
-    };
-
-    if (isa == x64::sse41) {
-        if (vmm_src0.getIdx() != vmm_dst.getIdx())
-            h->uni_vmovups(vmm_dst, vmm_src0);
-        uni_vsqdiff(vmm_dst, vmm_dst, vmm_src1);
-    } else {
-        uni_vsqdiff(vmm_dst, vmm_src0, vmm_src1);
+    switch (exec_prc_) {
+        case Precision::FP32: {
+            h->uni_vsubps(vmm_dst, vmm_src_0, vmm_src_1);
+            h->uni_vmulps(vmm_dst, vmm_dst, vmm_dst);
+        } break;
+        case Precision::I32: {
+            h->uni_vpsubd(vmm_dst, vmm_src_0, vmm_src_1);
+            h->uni_vpmulld(vmm_dst, vmm_dst, vmm_dst);
+        } break;
+        case Precision::I64: {
+            if (isa == x64::avx512_core) {
+                h->uni_vpsubq(vmm_dst, vmm_src_0, vmm_src_1);
+                h->vpmullq(vmm_dst, vmm_dst, vmm_dst);
+            } else {
+                h->uni_vpsubq(vmm_dst, vmm_src_0, vmm_src_1);
+
+                Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+                // There is no multiply int64 instruction on AVX2 and SSE41, thus the WA is used.
+                // vmm_src_0 = ab; vmm_src_1 = cd;
+                h->uni_vpsrlq(vmm_aux_0, vmm_dst, 32);
+                h->uni_vpmuludq(vmm_aux_0, vmm_aux_0, vmm_dst); // a * d
+                h->uni_vpaddq(vmm_aux_0, vmm_aux_0, vmm_aux_0);  // a * d + b * c
+                h->uni_vpsllq(vmm_aux_0, vmm_aux_0, 32);
+                h->uni_vpmuludq(vmm_dst, vmm_dst, vmm_dst);   // b * d
+                h->uni_vpaddq(vmm_dst, vmm_dst, vmm_aux_0);    // (a * d + b * c) << 32 + b * d
+            }
+        } break;
+        default: IE_THROW() << "jit_squared_difference_emitter doesn't support precision '" << exec_prc_ << "'";
     }
 }
 
-std::set<std::vector<element::Type>> jit_squared_difference_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
-    return {{element::f32, element::f32}, {element::i32, element::i32}};
+std::set<std::vector<element::Type>> jit_squared_difference_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}, {element::i64, element::i64}};
+}
+
+size_t jit_squared_difference_emitter::aux_vecs_count() const {
+    return (!x64::mayiuse(x64::avx512_core) && exec_prc_ == Precision::I64) ? 1 : 0;
 }
 
 /// POWER_DYNAMIC ///
 jit_power_dynamic_emitter::jit_power_dynamic_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node,
-                                                     Precision exec_prc)
+                                                     const Precision& exec_prc)
     : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_power_dynamic_emitter::jit_power_dynamic_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
+jit_power_dynamic_emitter::jit_power_dynamic_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
     : jit_emitter(host, host_isa, exec_prc) {}
 
 size_t jit_power_dynamic_emitter::get_inputs_num() const { return 2; }
 
-std::set<std::vector<element::Type>> jit_power_dynamic_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_power_dynamic_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32, element::f32}};
 }
 
@@ -676,23 +940,23 @@ void jit_power_dynamic_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_power_dynamic_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_power_dynamic_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
 
     Xmm xmm0 = Xmm(0), xmm1 = Xmm(1);
 
     // caller obligation to save gprs as callee may use them
     size_t gpr_size = 8;
-    Xbyak::Operand gprs_to_save[] = {h->r8, h->r9, h->r10, h->r11, h->rax,
-                                     h->rcx, h->rdx, h->rdi, h->rsi, h->rbp, h->rbx};
+    Operand gprs_to_save[] = {h->r8, h->r9, h->r10, h->r11, h->rax,
+                              h->rcx, h->rdx, h->rdi, h->rsi, h->rbp, h->rbx};
     size_t n_gprs_to_save = sizeof(gprs_to_save) / sizeof(gprs_to_save[0]);
 
     h->sub(h->rsp, n_gprs_to_save * gpr_size);
@@ -721,8 +985,8 @@ void jit_power_dynamic_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs,
     h->sub(h->rsp, (get_max_vecs_count() + 2) * get_vec_length());
     for (size_t i = 2; i < get_max_vecs_count() + 2; ++i)
         h->uni_vmovups(h->ptr[h->rsp + i * get_vec_length()], Vmm(i - 2));
-    h->uni_vmovups(h->ptr[h->rsp + 0 * get_vec_length()], vmm_src0); // src
-    h->uni_vmovups(h->ptr[h->rsp + 1 * get_vec_length()], vmm_src1); // beta
+    h->uni_vmovups(h->ptr[h->rsp + 0 * get_vec_length()], vmm_src_0); // src
+    h->uni_vmovups(h->ptr[h->rsp + 1 * get_vec_length()], vmm_src_1); // beta
 
     // save function address in gpr to pass in in call instruction
     h->mov(h->rbp, reinterpret_cast<uintptr_t>(powf));
@@ -768,19 +1032,19 @@ void jit_power_dynamic_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs,
 
 
 /// EQUAL ///
-jit_equal_emitter::jit_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_equal_emitter::jit_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, const Precision& exec_prc)
+        : jit_emitter(host, host_isa, node, exec_prc) {
     prepare_table();
 }
-jit_equal_emitter::jit_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {
+jit_equal_emitter::jit_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+        : jit_emitter(host, host_isa, exec_prc) {
     prepare_table();
 }
 
 size_t jit_equal_emitter::get_inputs_num() const { return 2; }
 
-std::set<std::vector<element::Type>> jit_equal_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
-    return {{element::f32, element::f32}};
+std::set<std::vector<element::Type>> jit_equal_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}, {element::i64, element::i64}};
 }
 
 void jit_equal_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
@@ -791,58 +1055,80 @@ void jit_equal_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_equal_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_equal_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
-    Vmm vmm_aux1 = Vmm(aux_vec_idxs[1]);
 
-    if (isa == x64::sse41) {
-        h->movups(vmm_aux0, vmm_src0);
-        h->cmpps(vmm_aux0, vmm_src1, _cmp_eq_oq);
-        h->movups(vmm_aux1, table_val("one"));
-        h->pxor(vmm_dst, vmm_dst);
-        h->blendvps(vmm_dst, vmm_aux1);
-    } else if (isa == x64::avx2) {
-        h->vcmpeqps(vmm_aux0, vmm_src0, vmm_src1);
-        h->uni_vmovups(vmm_dst, table_val("zero"));
-        h->uni_vblendvps(vmm_dst, vmm_dst, table_val("one"), vmm_aux0);
+    // TODO: Actually the Result is bool in U8 representation. 0x01 or 0xFF - is there a difference for real models?
+    // Remove all vpsrld instructions if there is no difference.
+    if (isa == x64::sse41 || isa == x64::avx2) {
+        Vmm vmm_src0_t = vmm_src_0;
+        if (isa == x64::sse41 && vmm_dst.getIdx() != vmm_src_0.getIdx()) {
+            h->uni_vmovups(vmm_dst, vmm_src_0);
+            vmm_src0_t = vmm_dst;
+        }
+        switch (exec_prc_) {
+            case Precision::FP32:
+                h->uni_vcmpps(vmm_dst, vmm_src0_t, vmm_src_1, _cmp_eq_oq);
+                h->uni_vandps(vmm_dst, vmm_dst, table_val("oneF"));
+                break;
+            case Precision::I32:
+                h->uni_vpcmpeqd(vmm_dst, vmm_src0_t, vmm_src_1);
+                h->uni_vpsrld(vmm_dst, vmm_dst, 31);
+                break;
+            case Precision::I64:
+                h->uni_vpcmpeqq(vmm_dst, vmm_src0_t, vmm_src_1);
+                h->uni_vpsrlq(vmm_dst, vmm_dst, 63);
+                break;
+            default: IE_THROW() << "jit_equal_emitter doesn't support precision '" << exec_prc_ << "'";
+        }
     } else {
-        h->vcmpps(k_mask, vmm_src0, vmm_src1, _cmp_eq_oq);
-        h->uni_vmovups(vmm_dst, table_val("zero"));
-        h->vblendmps(vmm_dst | k_mask, vmm_dst, table_val("one"));
+        switch (exec_prc_) {
+            case Precision::FP32:
+                h->vcmpps(k_mask, vmm_src_0, vmm_src_1, _cmp_eq_oq);
+                h->uni_vmovups(vmm_dst | k_mask | h->T_z, table_val("oneF"));
+                break;
+            case Precision::I32:
+                h->vpcmpeqd(k_mask, vmm_src_0, vmm_src_1);
+                h->vpmovm2d(vmm_dst, k_mask);
+                h->uni_vpsrld(vmm_dst, vmm_dst, 31);
+                break;
+            case Precision::I64:
+                h->vpcmpeqq(k_mask, vmm_src_0, vmm_src_1);
+                h->vpmovm2q(vmm_dst, k_mask);
+                h->vpsrlq(vmm_dst, vmm_dst, 63);
+                break;
+            default: IE_THROW() << "jit_equal_emitter doesn't support precision '" << exec_prc_ << "'";
+        }
     }
 }
 
 void jit_equal_emitter::register_table_entries() {
-    push_arg_entry_of("zero", 0x00000000, true);
-    push_arg_entry_of("one", CONST_1_F, true);
-}
-
-size_t jit_equal_emitter::aux_vecs_count() const {
-    return 2;
+    if (exec_prc_ == Precision::FP32) {
+        push_arg_entry_of("oneF", CONST_1_F, true);
+    }
 }
 
 /// NOT_EQUAL ///
-jit_not_equal_emitter::jit_not_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_not_equal_emitter::jit_not_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node,
+        const Precision& exec_prc) : jit_emitter(host, host_isa, node, exec_prc) {
     prepare_table();
 }
-jit_not_equal_emitter::jit_not_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
+jit_not_equal_emitter::jit_not_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {
     prepare_table();
 }
 
 size_t jit_not_equal_emitter::get_inputs_num() const { return 2; }
 
-std::set<std::vector<element::Type>> jit_not_equal_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_not_equal_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32, element::f32}};
 }
 
@@ -854,31 +1140,31 @@ void jit_not_equal_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, co
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_not_equal_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_not_equal_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
-    Vmm vmm_aux1 = Vmm(aux_vec_idxs[1]);
+    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+    Vmm vmm_aux_1 = Vmm(aux_vec_idxs[1]);
 
     if (isa == x64::sse41) {
-        h->movups(vmm_aux0, vmm_src0);
-        h->cmpps(vmm_aux0, vmm_src1, _cmp_eq_oq);
+        h->movups(vmm_aux_0, vmm_src_0);
+        h->cmpps(vmm_aux_0, vmm_src_1, _cmp_eq_oq);
         h->movups(vmm_dst, table_val("one"));
-        h->pxor(vmm_aux1, vmm_aux1);
-        h->blendvps(vmm_dst, vmm_aux1);
+        h->pxor(vmm_aux_1, vmm_aux_1);
+        h->blendvps(vmm_dst, vmm_aux_1);
     } else if (isa == x64::avx2) {
-        h->vcmpeqps(vmm_aux0, vmm_src0, vmm_src1);
+        h->vcmpeqps(vmm_aux_0, vmm_src_0, vmm_src_1);
         h->uni_vmovups(vmm_dst, table_val("one"));
-        h->vblendvps(vmm_dst, vmm_dst, table_val("zero"), vmm_aux0);
+        h->vblendvps(vmm_dst, vmm_dst, table_val("zero"), vmm_aux_0);
     } else {
-        h->vcmpps(k_mask, vmm_src0, vmm_src1, _cmp_eq_oq);
+        h->vcmpps(k_mask, vmm_src_0, vmm_src_1, _cmp_eq_oq);
         h->uni_vmovups(vmm_dst, table_val("one"));
         h->vblendmps(vmm_dst | k_mask, vmm_dst, table_val("zero"));
     }
@@ -894,19 +1180,19 @@ size_t jit_not_equal_emitter::aux_vecs_count() const {
 }
 
 /// GREATER ///
-jit_greater_emitter::jit_greater_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_greater_emitter::jit_greater_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, const Precision& exec_prc)
+        : jit_emitter(host, host_isa, node, exec_prc) {
     prepare_table();
 }
-jit_greater_emitter::jit_greater_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {
+jit_greater_emitter::jit_greater_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+        : jit_emitter(host, host_isa, exec_prc) {
     prepare_table();
 }
 
 size_t jit_greater_emitter::get_inputs_num() const { return 2; }
 
-std::set<std::vector<element::Type>> jit_greater_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
-    return {{element::f32, element::f32}};
+std::set<std::vector<element::Type>> jit_greater_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}, {element::i64, element::i64}};
 }
 
 void jit_greater_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
@@ -917,59 +1203,79 @@ void jit_greater_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, cons
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_greater_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_greater_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
-    Vmm vmm_aux1 = Vmm(aux_vec_idxs[1]);
 
-    if (isa == x64::sse41) {
-        h->movups(vmm_aux0, vmm_src0);
-        h->cmpps(vmm_aux0, vmm_src1, _cmp_gt_os);
-        h->movups(vmm_aux1, table_val("one"));
-        h->pxor(vmm_dst, vmm_dst);
-        h->blendvps(vmm_dst, vmm_aux1);
-    } else if (isa == x64::avx2) {
-        h->vcmpgtps(vmm_aux0, vmm_src0, vmm_src1);
-        h->uni_vmovups(vmm_dst, table_val("zero"));
-        h->vblendvps(vmm_dst, vmm_dst, table_val("one"), vmm_aux0);
+    if (isa == x64::sse41 || isa == x64::avx2) {
+        Vmm vmm_src0_t = vmm_src_0;
+        if (isa == x64::sse41 && vmm_dst.getIdx() != vmm_src_0.getIdx()) {
+            h->uni_vmovups(vmm_dst, vmm_src_0);
+            vmm_src0_t = vmm_dst;
+        }
+        switch (exec_prc_) {
+            case Precision::FP32:
+                h->uni_vcmpps(vmm_dst, vmm_src0_t, vmm_src_1, _cmp_gt_os);
+                h->uni_vandps(vmm_dst, vmm_dst, table_val("oneF"));
+                break;
+            case Precision::I32:
+                h->uni_vpcmpgtd(vmm_dst, vmm_src0_t, vmm_src_1);
+                h->uni_vpsrld(vmm_dst, vmm_dst, 31);
+                break;
+            case Precision::I64:
+                h->uni_vpcmpgtq(vmm_dst, vmm_src0_t, vmm_src_1);
+                h->uni_vpsrlq(vmm_dst, vmm_dst, 63);
+                break;
+            default: IE_THROW() << "jit_greater_emitter doesn't support precision '" << exec_prc_ << "'";
+        }
     } else {
-        h->vcmpps(k_mask, vmm_src0, vmm_src1, _cmp_gt_os);
-        h->uni_vmovups(vmm_dst, table_val("zero"));
-        h->vblendmps(vmm_dst | k_mask, vmm_dst, table_val("one"));
+        switch (exec_prc_) {
+            case Precision::FP32:
+                h->vcmpps(k_mask, vmm_src_0, vmm_src_1, _cmp_gt_os);
+                h->uni_vmovups(vmm_dst | k_mask | h->T_z, table_val("oneF"));
+                break;
+            case Precision::I32:
+                h->vpcmpgtd(k_mask, vmm_src_0, vmm_src_1);
+                h->vpmovm2d(vmm_dst, k_mask);
+                h->uni_vpsrld(vmm_dst, vmm_dst, 31);
+                break;
+            case Precision::I64:
+                h->vpcmpgtq(k_mask, vmm_src_0, vmm_src_1);
+                h->vpmovm2q(vmm_dst, k_mask);
+                h->vpsrlq(vmm_dst, vmm_dst, 63);
+                break;
+            default: IE_THROW() << "jit_greater_emitter doesn't support precision '" << exec_prc_ << "'";
+        }
     }
 }
 
 void jit_greater_emitter::register_table_entries() {
-    push_arg_entry_of("zero", 0x00000000, true);
-    push_arg_entry_of("one", CONST_1_F, true);
-}
-
-size_t jit_greater_emitter::aux_vecs_count() const {
-    return 2;
+    if (exec_prc_ == Precision::FP32) {
+        push_arg_entry_of("oneF", CONST_1_F, true);
+    }
 }
 
 /// GREATER_EQUAL ///
 jit_greater_equal_emitter::jit_greater_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node,
-                                                     Precision exec_prc)
+                                                     const Precision& exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {
     prepare_table();
 }
-jit_greater_equal_emitter::jit_greater_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
+jit_greater_equal_emitter::jit_greater_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {
     prepare_table();
 }
 
 size_t jit_greater_equal_emitter::get_inputs_num() const { return 2; }
 
-std::set<std::vector<element::Type>> jit_greater_equal_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_greater_equal_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32, element::f32}};
 }
 
@@ -981,31 +1287,31 @@ void jit_greater_equal_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_greater_equal_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_greater_equal_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
-    Vmm vmm_aux1 = Vmm(aux_vec_idxs[1]);
+    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+    Vmm vmm_aux_1 = Vmm(aux_vec_idxs[1]);
 
     if (isa == x64::sse41) {
-        h->movups(vmm_aux0, vmm_src0);
-        h->cmpps(vmm_aux0, vmm_src1, _cmp_ge_os);
-        h->movups(vmm_aux1, table_val("one"));
+        h->movups(vmm_aux_0, vmm_src_0);
+        h->cmpps(vmm_aux_0, vmm_src_1, _cmp_ge_os);
+        h->movups(vmm_aux_1, table_val("one"));
         h->pxor(vmm_dst, vmm_dst);
-        h->blendvps(vmm_dst, vmm_aux1);
+        h->blendvps(vmm_dst, vmm_aux_1);
     } else if (isa == x64::avx2) {
-        h->vcmpgeps(vmm_aux0, vmm_src0, vmm_src1);
+        h->vcmpgeps(vmm_aux_0, vmm_src_0, vmm_src_1);
         h->uni_vmovups(vmm_dst, table_val("zero"));
-        h->vblendvps(vmm_dst, vmm_dst, table_val("one"), vmm_aux0);
+        h->vblendvps(vmm_dst, vmm_dst, table_val("one"), vmm_aux_0);
     } else {
-        h->vcmpps(k_mask, vmm_src0, vmm_src1, _cmp_ge_os);
+        h->vcmpps(k_mask, vmm_src_0, vmm_src_1, _cmp_ge_os);
         h->uni_vmovups(vmm_dst, table_val("zero"));
         h->vblendmps(vmm_dst | k_mask, vmm_dst, table_val("one"));
     }
@@ -1021,19 +1327,19 @@ size_t jit_greater_equal_emitter::aux_vecs_count() const {
 }
 
 /// LESS ///
-jit_less_emitter::jit_less_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_less_emitter::jit_less_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, const Precision& exec_prc)
+        : jit_emitter(host, host_isa, node, exec_prc) {
     prepare_table();
 }
-jit_less_emitter::jit_less_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {
+jit_less_emitter::jit_less_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+        : jit_emitter(host, host_isa, exec_prc) {
     prepare_table();
 }
 
 size_t jit_less_emitter::get_inputs_num() const { return 2; }
 
-std::set<std::vector<element::Type>> jit_less_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
-    return {{element::f32, element::f32}};
+std::set<std::vector<element::Type>> jit_less_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}, {element::i64, element::i64}};
 }
 
 void jit_less_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
@@ -1044,58 +1350,78 @@ void jit_less_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const s
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_less_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_less_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
-    Vmm vmm_aux1 = Vmm(aux_vec_idxs[1]);
 
-    if (isa == x64::sse41) {
-        h->movups(vmm_aux0, vmm_src0);
-        h->cmpps(vmm_aux0, vmm_src1, _cmp_lt_os);
-        h->movups(vmm_aux1, table_val("one"));
-        h->pxor(vmm_dst, vmm_dst);
-        h->blendvps(vmm_dst, vmm_aux1);
-    } else if (isa == x64::avx2) {
-        h->vcmpltps(vmm_aux0, vmm_src0, vmm_src1);
-        h->uni_vmovups(vmm_dst, table_val("zero"));
-        h->vblendvps(vmm_dst, vmm_dst, table_val("one"), vmm_aux0);
+    if (isa == x64::sse41 || isa == x64::avx2) {
+        Vmm vmm_src0_t = vmm_src_0;
+        if (isa == x64::sse41 && vmm_dst.getIdx() != vmm_src_0.getIdx()) {
+            h->uni_vmovups(vmm_dst, vmm_src_0);
+            vmm_src0_t = vmm_dst;
+        }
+        switch (exec_prc_) {
+            case Precision::FP32:
+                h->uni_vcmpps(vmm_dst, vmm_src0_t, vmm_src_1, _cmp_lt_os);
+                h->uni_vandps(vmm_dst, vmm_dst, table_val("oneF"));
+                break;
+            case Precision::I32:
+                h->uni_vpcmpgtd(vmm_dst, vmm_src_1, vmm_src0_t);
+                h->uni_vpsrld(vmm_dst, vmm_dst, 31);
+                break;
+            case Precision::I64:
+                h->uni_vpcmpgtq(vmm_dst, vmm_src_1, vmm_src0_t);
+                h->uni_vpsrlq(vmm_dst, vmm_dst, 63);
+                break;
+            default: IE_THROW() << "jit_less_emitter doesn't support precision '" << exec_prc_ << "'";
+        }
     } else {
-        h->vcmpps(k_mask, vmm_src0, vmm_src1, _cmp_lt_os);
-        h->uni_vmovups(vmm_dst, table_val("zero"));
-        h->vblendmps(vmm_dst | k_mask, vmm_dst, table_val("one"));
+        switch (exec_prc_) {
+            case Precision::FP32:
+                h->vcmpps(k_mask, vmm_src_0, vmm_src_1, _cmp_lt_os);
+                h->uni_vmovups(vmm_dst | k_mask | h->T_z, table_val("oneF"));
+                break;
+            case Precision::I32:
+                h->vpcmpgtd(k_mask, vmm_src_1, vmm_src_0);
+                h->vpmovm2d(vmm_dst, k_mask);
+                h->uni_vpsrld(vmm_dst, vmm_dst, 31);
+                break;
+            case Precision::I64:
+                h->vpcmpgtq(k_mask, vmm_src_1, vmm_src_0);
+                h->vpmovm2q(vmm_dst, k_mask);
+                h->vpsrlq(vmm_dst, vmm_dst, 63);
+                break;
+            default: IE_THROW() << "jit_less_emitter doesn't support precision '" << exec_prc_ << "'";
+        }
     }
 }
 
 void jit_less_emitter::register_table_entries() {
-    push_arg_entry_of("zero", 0x00000000, true);
-    push_arg_entry_of("one", CONST_1_F, true);
-}
-
-size_t jit_less_emitter::aux_vecs_count() const {
-    return 2;
+    if (exec_prc_ == Precision::FP32) {
+        push_arg_entry_of("oneF", CONST_1_F, true);
+    }
 }
 
 /// LESS_EQUAL ///
-jit_less_equal_emitter::jit_less_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_less_equal_emitter::jit_less_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node,
+        const Precision& exec_prc) : jit_emitter(host, host_isa, node, exec_prc) {
     prepare_table();
 }
-jit_less_equal_emitter::jit_less_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
+jit_less_equal_emitter::jit_less_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {
     prepare_table();
 }
 
 size_t jit_less_equal_emitter::get_inputs_num() const { return 2; }
 
-std::set<std::vector<element::Type>> jit_less_equal_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_less_equal_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32, element::f32}};
 }
 
@@ -1107,32 +1433,32 @@ void jit_less_equal_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, c
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_less_equal_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_less_equal_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
 
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
-    Vmm vmm_aux1 = Vmm(aux_vec_idxs[1]);
+    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+    Vmm vmm_aux_1 = Vmm(aux_vec_idxs[1]);
 
     if (isa == x64::sse41) {
-        h->movups(vmm_aux0, vmm_src0);
-        h->cmpps(vmm_aux0, vmm_src1, _cmp_le_os);
-        h->movups(vmm_aux1, table_val("one"));
+        h->movups(vmm_aux_0, vmm_src_0);
+        h->cmpps(vmm_aux_0, vmm_src_1, _cmp_le_os);
+        h->movups(vmm_aux_1, table_val("one"));
         h->pxor(vmm_dst, vmm_dst);
-        h->blendvps(vmm_dst, vmm_aux1);
+        h->blendvps(vmm_dst, vmm_aux_1);
     } else if (isa == x64::avx2) {
-        h->vcmpleps(vmm_aux0, vmm_src0, vmm_src1);
+        h->vcmpleps(vmm_aux_0, vmm_src_0, vmm_src_1);
         h->uni_vmovups(vmm_dst, table_val("zero"));
-        h->vblendvps(vmm_dst, vmm_dst, table_val("one"), vmm_aux0);
+        h->vblendvps(vmm_dst, vmm_dst, table_val("one"), vmm_aux_0);
     } else {
-        h->vcmpps(k_mask, vmm_src0, vmm_src1, _cmp_le_os);
+        h->vcmpps(k_mask, vmm_src_0, vmm_src_1, _cmp_le_os);
         h->uni_vmovups(vmm_dst, table_val("zero"));
         h->vblendmps(vmm_dst | k_mask, vmm_dst, table_val("one"));
     }
@@ -1148,18 +1474,18 @@ size_t jit_less_equal_emitter::aux_vecs_count() const {
 }
 
 /// LOGICAL_AND ///
-jit_logical_and_emitter::jit_logical_and_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_logical_and_emitter::jit_logical_and_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node,
+        const Precision& exec_prc) : jit_emitter(host, host_isa, node, exec_prc) {
     prepare_table();
 }
-jit_logical_and_emitter::jit_logical_and_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
+jit_logical_and_emitter::jit_logical_and_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {
     prepare_table();
 }
 
 size_t jit_logical_and_emitter::get_inputs_num() const { return 2; }
 
-std::set<std::vector<element::Type>> jit_logical_and_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_logical_and_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32, element::f32}};
 }
 
@@ -1171,53 +1497,53 @@ void jit_logical_and_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs,
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_logical_and_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_logical_and_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
-    Vmm vmm_aux1 = Vmm(aux_vec_idxs[1]);
+    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+    Vmm vmm_aux_1 = Vmm(aux_vec_idxs[1]);
     Vmm vmm_aux2 = Vmm(aux_vec_idxs[2]);
 
     if (isa == x64::sse41) {
-        h->pxor(vmm_aux0, vmm_aux0);
-        h->cmpps(vmm_aux0, vmm_src0, _cmp_eq_oq);
+        h->pxor(vmm_aux_0, vmm_aux_0);
+        h->cmpps(vmm_aux_0, vmm_src_0, _cmp_eq_oq);
         h->movups(vmm_dst, table_val("one"));
-        h->pxor(vmm_aux1, vmm_aux1);
-        h->blendvps(vmm_dst, vmm_aux1);
+        h->pxor(vmm_aux_1, vmm_aux_1);
+        h->blendvps(vmm_dst, vmm_aux_1);
 
-        h->pxor(vmm_aux0, vmm_aux0);
-        h->cmpps(vmm_aux0, vmm_src1, _cmp_eq_oq);
+        h->pxor(vmm_aux_0, vmm_aux_0);
+        h->cmpps(vmm_aux_0, vmm_src_1, _cmp_eq_oq);
         h->movups(vmm_aux2, table_val("one"));
-        h->pxor(vmm_aux1, vmm_aux1);
-        h->blendvps(vmm_aux2, vmm_aux1);
+        h->pxor(vmm_aux_1, vmm_aux_1);
+        h->blendvps(vmm_aux2, vmm_aux_1);
 
         h->uni_vandps(vmm_dst, vmm_dst, vmm_aux2);
     } else if (isa == x64::avx2) {
-        h->vcmpeqps(vmm_aux0, vmm_src0, table_val("zero"));
+        h->vcmpeqps(vmm_aux_0, vmm_src_0, table_val("zero"));
         h->uni_vmovups(vmm_dst, table_val("one"));
-        h->vblendvps(vmm_dst, vmm_dst, table_val("zero"), vmm_aux0);
+        h->vblendvps(vmm_dst, vmm_dst, table_val("zero"), vmm_aux_0);
 
-        h->vcmpeqps(vmm_aux1, vmm_src1, table_val("zero"));
-        h->uni_vmovups(vmm_aux0, table_val("one"));
-        h->vblendvps(vmm_aux0, vmm_aux0, table_val("zero"), vmm_aux1);
+        h->vcmpeqps(vmm_aux_1, vmm_src_1, table_val("zero"));
+        h->uni_vmovups(vmm_aux_0, table_val("one"));
+        h->vblendvps(vmm_aux_0, vmm_aux_0, table_val("zero"), vmm_aux_1);
 
-        h->uni_vandps(vmm_dst, vmm_dst, vmm_aux0);
+        h->uni_vandps(vmm_dst, vmm_dst, vmm_aux_0);
     } else {
-        h->vcmpps(k_mask, vmm_src0, table_val("zero"), _cmp_eq_oq);
-        h->uni_vmovups(vmm_aux0, table_val("one"));
-        h->vblendmps(vmm_dst | k_mask, vmm_aux0, table_val("zero"));
+        h->vcmpps(k_mask, vmm_src_0, table_val("zero"), _cmp_eq_oq);
+        h->uni_vmovups(vmm_aux_0, table_val("one"));
+        h->vblendmps(vmm_dst | k_mask, vmm_aux_0, table_val("zero"));
 
-        h->vcmpps(k_mask, vmm_src1, table_val("zero"), _cmp_eq_oq);
-        h->vblendmps(vmm_aux0 | k_mask, vmm_aux0, table_val("zero"));
+        h->vcmpps(k_mask, vmm_src_1, table_val("zero"), _cmp_eq_oq);
+        h->vblendmps(vmm_aux_0 | k_mask, vmm_aux_0, table_val("zero"));
 
-        h->uni_vandps(vmm_dst, vmm_dst, vmm_aux0);
+        h->uni_vandps(vmm_dst, vmm_dst, vmm_aux_0);
     }
 }
 
@@ -1232,18 +1558,18 @@ size_t jit_logical_and_emitter::aux_vecs_count() const {
 
 
 /// LOGICAL_OR ///
-jit_logical_or_emitter::jit_logical_or_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_logical_or_emitter::jit_logical_or_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node,
+        const Precision& exec_prc) : jit_emitter(host, host_isa, node, exec_prc) {
     prepare_table();
 }
-jit_logical_or_emitter::jit_logical_or_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
+jit_logical_or_emitter::jit_logical_or_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {
     prepare_table();
 }
 
 size_t jit_logical_or_emitter::get_inputs_num() const { return 2; }
 
-std::set<std::vector<element::Type>> jit_logical_or_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_logical_or_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32, element::f32}};
 }
 
@@ -1255,53 +1581,53 @@ void jit_logical_or_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, c
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_logical_or_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_logical_or_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
-    Vmm vmm_aux1 = Vmm(aux_vec_idxs[1]);
+    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+    Vmm vmm_aux_1 = Vmm(aux_vec_idxs[1]);
     Vmm vmm_aux2 = Vmm(aux_vec_idxs[2]);
 
     if (isa == x64::sse41) {
-        h->pxor(vmm_aux0, vmm_aux0);
-        h->cmpps(vmm_aux0, vmm_src0, _cmp_eq_oq);
+        h->pxor(vmm_aux_0, vmm_aux_0);
+        h->cmpps(vmm_aux_0, vmm_src_0, _cmp_eq_oq);
         h->movups(vmm_dst, table_val("one"));
-        h->pxor(vmm_aux1, vmm_aux1);
-        h->blendvps(vmm_dst, vmm_aux1);
+        h->pxor(vmm_aux_1, vmm_aux_1);
+        h->blendvps(vmm_dst, vmm_aux_1);
 
-        h->pxor(vmm_aux0, vmm_aux0);
-        h->cmpps(vmm_aux0, vmm_src1, _cmp_eq_oq);
+        h->pxor(vmm_aux_0, vmm_aux_0);
+        h->cmpps(vmm_aux_0, vmm_src_1, _cmp_eq_oq);
         h->movups(vmm_aux2, table_val("one"));
-        h->pxor(vmm_aux1, vmm_aux1);
-        h->blendvps(vmm_aux2, vmm_aux1);
+        h->pxor(vmm_aux_1, vmm_aux_1);
+        h->blendvps(vmm_aux2, vmm_aux_1);
 
         h->uni_vorps(vmm_dst, vmm_dst, vmm_aux2);
     } else if (isa == x64::avx2) {
-        h->vcmpeqps(vmm_aux0, vmm_src0, table_val("zero"));
+        h->vcmpeqps(vmm_aux_0, vmm_src_0, table_val("zero"));
         h->uni_vmovups(vmm_dst, table_val("one"));
-        h->vblendvps(vmm_dst, vmm_dst, table_val("zero"), vmm_aux0);
+        h->vblendvps(vmm_dst, vmm_dst, table_val("zero"), vmm_aux_0);
 
-        h->vcmpeqps(vmm_aux1, vmm_src1, table_val("zero"));
-        h->uni_vmovups(vmm_aux0, table_val("one"));
-        h->vblendvps(vmm_aux0, vmm_aux0, table_val("zero"), vmm_aux1);
+        h->vcmpeqps(vmm_aux_1, vmm_src_1, table_val("zero"));
+        h->uni_vmovups(vmm_aux_0, table_val("one"));
+        h->vblendvps(vmm_aux_0, vmm_aux_0, table_val("zero"), vmm_aux_1);
 
-        h->uni_vorps(vmm_dst, vmm_dst, vmm_aux0);
+        h->uni_vorps(vmm_dst, vmm_dst, vmm_aux_0);
     } else {
-        h->vcmpps(k_mask, vmm_src0, table_val("zero"), _cmp_eq_oq);
-        h->uni_vmovups(vmm_aux0, table_val("one"));
-        h->vblendmps(vmm_dst | k_mask, vmm_aux0, table_val("zero"));
+        h->vcmpps(k_mask, vmm_src_0, table_val("zero"), _cmp_eq_oq);
+        h->uni_vmovups(vmm_aux_0, table_val("one"));
+        h->vblendmps(vmm_dst | k_mask, vmm_aux_0, table_val("zero"));
 
-        h->vcmpps(k_mask, vmm_src1, table_val("zero"), _cmp_eq_oq);
-        h->vblendmps(vmm_aux0 | k_mask, vmm_aux0, table_val("zero"));
+        h->vcmpps(k_mask, vmm_src_1, table_val("zero"), _cmp_eq_oq);
+        h->vblendmps(vmm_aux_0 | k_mask, vmm_aux_0, table_val("zero"));
 
-        h->uni_vorps(vmm_dst, vmm_dst, vmm_aux0);
+        h->uni_vorps(vmm_dst, vmm_dst, vmm_aux_0);
     }
 }
 
@@ -1315,18 +1641,18 @@ size_t jit_logical_or_emitter::aux_vecs_count() const {
 }
 
 /// LOGICAL_XOR ///
-jit_logical_xor_emitter::jit_logical_xor_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_logical_xor_emitter::jit_logical_xor_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node,
+        const Precision& exec_prc) : jit_emitter(host, host_isa, node, exec_prc) {
     prepare_table();
 }
-jit_logical_xor_emitter::jit_logical_xor_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
+jit_logical_xor_emitter::jit_logical_xor_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {
     prepare_table();
 }
 
 size_t jit_logical_xor_emitter::get_inputs_num() const { return 2; }
 
-std::set<std::vector<element::Type>> jit_logical_xor_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_logical_xor_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32, element::f32}};
 }
 
@@ -1338,53 +1664,53 @@ void jit_logical_xor_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs,
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_logical_xor_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_logical_xor_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
-    Vmm vmm_aux1 = Vmm(aux_vec_idxs[1]);
+    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+    Vmm vmm_aux_1 = Vmm(aux_vec_idxs[1]);
     Vmm vmm_aux2 = Vmm(aux_vec_idxs[2]);
 
     if (isa == x64::sse41) {
-        h->pxor(vmm_aux0, vmm_aux0);
-        h->cmpps(vmm_aux0, vmm_src0, _cmp_eq_oq);
+        h->pxor(vmm_aux_0, vmm_aux_0);
+        h->cmpps(vmm_aux_0, vmm_src_0, _cmp_eq_oq);
         h->movups(vmm_dst, table_val("one"));
-        h->pxor(vmm_aux1, vmm_aux1);
-        h->blendvps(vmm_dst, vmm_aux1);
+        h->pxor(vmm_aux_1, vmm_aux_1);
+        h->blendvps(vmm_dst, vmm_aux_1);
 
-        h->pxor(vmm_aux0, vmm_aux0);
-        h->cmpps(vmm_aux0, vmm_src1, _cmp_eq_oq);
+        h->pxor(vmm_aux_0, vmm_aux_0);
+        h->cmpps(vmm_aux_0, vmm_src_1, _cmp_eq_oq);
         h->movups(vmm_aux2, table_val("one"));
-        h->pxor(vmm_aux1, vmm_aux1);
-        h->blendvps(vmm_aux2, vmm_aux1);
+        h->pxor(vmm_aux_1, vmm_aux_1);
+        h->blendvps(vmm_aux2, vmm_aux_1);
 
         h->uni_vxorps(vmm_dst, vmm_dst, vmm_aux2);
     } else if (isa == x64::avx2) {
-        h->vcmpeqps(vmm_aux0, vmm_src0, table_val("zero"));
+        h->vcmpeqps(vmm_aux_0, vmm_src_0, table_val("zero"));
         h->uni_vmovups(vmm_dst, table_val("one"));
-        h->vblendvps(vmm_dst, vmm_dst, table_val("zero"), vmm_aux0);
+        h->vblendvps(vmm_dst, vmm_dst, table_val("zero"), vmm_aux_0);
 
-        h->vcmpeqps(vmm_aux1, vmm_src1, table_val("zero"));
-        h->uni_vmovups(vmm_aux0, table_val("one"));
-        h->vblendvps(vmm_aux0, vmm_aux0, table_val("zero"), vmm_aux1);
+        h->vcmpeqps(vmm_aux_1, vmm_src_1, table_val("zero"));
+        h->uni_vmovups(vmm_aux_0, table_val("one"));
+        h->vblendvps(vmm_aux_0, vmm_aux_0, table_val("zero"), vmm_aux_1);
 
-        h->uni_vxorps(vmm_dst, vmm_dst, vmm_aux0);
+        h->uni_vxorps(vmm_dst, vmm_dst, vmm_aux_0);
     } else {
-        h->vcmpps(k_mask, vmm_src0, table_val("zero"), _cmp_eq_oq);
-        h->uni_vmovups(vmm_aux0, table_val("one"));
-        h->vblendmps(vmm_dst | k_mask, vmm_aux0, table_val("zero"));
+        h->vcmpps(k_mask, vmm_src_0, table_val("zero"), _cmp_eq_oq);
+        h->uni_vmovups(vmm_aux_0, table_val("one"));
+        h->vblendmps(vmm_dst | k_mask, vmm_aux_0, table_val("zero"));
 
-        h->vcmpps(k_mask, vmm_src1, table_val("zero"), _cmp_eq_oq);
-        h->vblendmps(vmm_aux0 | k_mask, vmm_aux0, table_val("zero"));
+        h->vcmpps(k_mask, vmm_src_1, table_val("zero"), _cmp_eq_oq);
+        h->vblendmps(vmm_aux_0 | k_mask, vmm_aux_0, table_val("zero"));
 
-        h->uni_vxorps(vmm_dst, vmm_dst, vmm_aux0);
+        h->uni_vxorps(vmm_dst, vmm_dst, vmm_aux_0);
     }
 }
 
@@ -1398,18 +1724,18 @@ size_t jit_logical_xor_emitter::aux_vecs_count() const {
 }
 
 /// LOGICAL_NOT ///
-jit_logical_not_emitter::jit_logical_not_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_logical_not_emitter::jit_logical_not_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node,
+        const Precision& exec_prc) : jit_emitter(host, host_isa, node, exec_prc) {
     prepare_table();
 }
-jit_logical_not_emitter::jit_logical_not_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
+jit_logical_not_emitter::jit_logical_not_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {
     prepare_table();
 }
 
 size_t jit_logical_not_emitter::get_inputs_num() const { return 1; }
 
-std::set<std::vector<element::Type>> jit_logical_not_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_logical_not_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32}};
 }
 
@@ -1421,30 +1747,30 @@ void jit_logical_not_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs,
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_logical_not_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_logical_not_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
-    Vmm vmm_aux1 = Vmm(aux_vec_idxs[1]);
+    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+    Vmm vmm_aux_1 = Vmm(aux_vec_idxs[1]);
 
     if (isa == x64::sse41) {
-        h->pxor(vmm_aux0, vmm_aux0);
-        h->cmpps(vmm_aux0, vmm_src0, _cmp_eq_oq);
-        h->movups(vmm_aux1, table_val("one"));
+        h->pxor(vmm_aux_0, vmm_aux_0);
+        h->cmpps(vmm_aux_0, vmm_src_0, _cmp_eq_oq);
+        h->movups(vmm_aux_1, table_val("one"));
         h->pxor(vmm_dst, vmm_dst);
-        h->blendvps(vmm_dst, vmm_aux1);
+        h->blendvps(vmm_dst, vmm_aux_1);
     } else if (isa == x64::avx2) {
-        h->vcmpeqps(vmm_aux0, vmm_src0, table_val("zero"));
+        h->vcmpeqps(vmm_aux_0, vmm_src_0, table_val("zero"));
         h->uni_vmovups(vmm_dst, table_val("zero"));
-        h->vblendvps(vmm_dst, vmm_dst, table_val("one"), vmm_aux0);
+        h->vblendvps(vmm_dst, vmm_dst, table_val("one"), vmm_aux_0);
     } else {
-        h->vcmpps(k_mask, vmm_src0, table_val("zero"), _cmp_eq_oq);
+        h->vcmpps(k_mask, vmm_src_0, table_val("zero"), _cmp_eq_oq);
         h->uni_vmovups(vmm_dst, table_val("zero"));
         h->vblendmps(vmm_dst | k_mask, vmm_dst, table_val("one"));
     }
@@ -1460,8 +1786,8 @@ size_t jit_logical_not_emitter::aux_vecs_count() const {
 }
 
 /// POWER_STATIC ///
-jit_power_static_emitter::jit_power_static_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_power_static_emitter::jit_power_static_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node,
+        const Precision& exec_prc) : jit_emitter(host, host_isa, node, exec_prc) {
     auto powerStaticNode = ov::as_type_ptr<ov::snippets::op::PowerStatic>(node);
     if (powerStaticNode == nullptr) {
         IE_THROW() << "Can't cast to snippets::op::PowerStatic";
@@ -1476,14 +1802,14 @@ jit_power_static_emitter::jit_power_static_emitter(x64::jit_generator *host, x64
 
 jit_power_static_emitter::jit_power_static_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa,
                                                    float inpPower, float inpScale, float inpShift,
-                                                   Precision exec_prc)
+                                                   const Precision& exec_prc)
 : jit_emitter(host, host_isa, exec_prc), power(inpPower), scale(inpScale), shift(inpShift) {
     prepare_table();
 }
 
 size_t jit_power_static_emitter::get_inputs_num() const { return 1; }
 
-std::set<std::vector<element::Type>> jit_power_static_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_power_static_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32}};
 }
 
@@ -1495,38 +1821,38 @@ void jit_power_static_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs,
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_power_static_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_power_static_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
+    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
 
     Xmm xmm0 = Xmm(0), xmm1 = Xmm(1);
 
     if (scale != 1.f || shift != 0.f) {
         if (isa == x64::sse41) {
-            h->uni_vmovups(vmm_aux0, table_val("scale"));
-            h->uni_vmulps(vmm_aux0, vmm_aux0, vmm_src0);
+            h->uni_vmovups(vmm_aux_0, table_val("scale"));
+            h->uni_vmulps(vmm_aux_0, vmm_aux_0, vmm_src_0);
             h->uni_vmovups(vmm_dst, table_val("shift"));
-            h->uni_vaddps(vmm_dst, vmm_dst, vmm_aux0);
+            h->uni_vaddps(vmm_dst, vmm_dst, vmm_aux_0);
         } else {
-            if (vmm_dst.getIdx() != vmm_src0.getIdx()) {
+            if (vmm_dst.getIdx() != vmm_src_0.getIdx()) {
                 h->uni_vmovups(vmm_dst, table_val("shift"));
-                h->uni_vfmadd231ps(vmm_dst, vmm_src0, table_val("scale"));
+                h->uni_vfmadd231ps(vmm_dst, vmm_src_0, table_val("scale"));
             } else {
-                h->uni_vmovups(vmm_aux0, table_val("shift"));
-                h->uni_vfmadd231ps(vmm_aux0, vmm_src0, table_val("scale"));
-                h->uni_vmovups(vmm_dst, vmm_aux0);
+                h->uni_vmovups(vmm_aux_0, table_val("shift"));
+                h->uni_vfmadd231ps(vmm_aux_0, vmm_src_0, table_val("scale"));
+                h->uni_vmovups(vmm_dst, vmm_aux_0);
             }
         }
     } else {
-        if (vmm_dst.getIdx() != vmm_src0.getIdx())
-            h->uni_vmovups(vmm_dst, vmm_src0);
+        if (vmm_dst.getIdx() != vmm_src_0.getIdx())
+            h->uni_vmovups(vmm_dst, vmm_src_0);
     }
 
     if (power == 1.f) {
@@ -1534,37 +1860,37 @@ void jit_power_static_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs,
         h->uni_vsqrtps(vmm_dst, vmm_dst);
 
         if (power < 0.f) {
-            h->uni_vmovups(vmm_aux0, table_val("one"));
+            h->uni_vmovups(vmm_aux_0, table_val("one"));
             if (isa == x64::sse41) {
-                h->uni_vdivps(vmm_aux0, vmm_aux0, vmm_dst);
-                h->uni_vmovups(vmm_dst, vmm_aux0);
+                h->uni_vdivps(vmm_aux_0, vmm_aux_0, vmm_dst);
+                h->uni_vmovups(vmm_dst, vmm_aux_0);
             } else {
-                h->uni_vdivps(vmm_dst, vmm_aux0, vmm_dst);
+                h->uni_vdivps(vmm_dst, vmm_aux_0, vmm_dst);
             }
         }
     } else if (std::floor(power) == power && power != 0) {
         int ipower = std::abs(static_cast<int>(power));
-        h->uni_vmovups(vmm_aux0, vmm_dst);
+        h->uni_vmovups(vmm_aux_0, vmm_dst);
         for (int i = 1; i < ipower; i++) {
-            h->uni_vmulps(vmm_dst, vmm_dst, vmm_aux0);
+            h->uni_vmulps(vmm_dst, vmm_dst, vmm_aux_0);
         }
 
         if (power < 0.f) {
-            h->uni_vmovups(vmm_aux0, table_val("one"));
+            h->uni_vmovups(vmm_aux_0, table_val("one"));
             if (isa == x64::sse41) {
-                h->uni_vdivps(vmm_aux0, vmm_aux0, vmm_dst);
-                h->uni_vmovups(vmm_dst, vmm_aux0);
+                h->uni_vdivps(vmm_aux_0, vmm_aux_0, vmm_dst);
+                h->uni_vmovups(vmm_dst, vmm_aux_0);
             } else {
-                h->uni_vdivps(vmm_dst, vmm_aux0, vmm_dst);
+                h->uni_vdivps(vmm_dst, vmm_aux_0, vmm_dst);
             }
         }
     } else {
-        h->uni_vmovups(vmm_aux0, table_val("power"));
+        h->uni_vmovups(vmm_aux_0, table_val("power"));
 
         // caller obligation to save gprs as callee may use them
         size_t gpr_size = 8;
-        Xbyak::Operand gprs_to_save[] = {h->r8, h->r9, h->r10, h->r11, h->rax,
-                                         h->rcx, h->rdx, h->rdi, h->rsi, h->rbp, h->rbx};
+        Operand gprs_to_save[] = {h->r8, h->r9, h->r10, h->r11, h->rax,
+                                  h->rcx, h->rdx, h->rdi, h->rsi, h->rbp, h->rbx};
         size_t n_gprs_to_save = sizeof(gprs_to_save) / sizeof(gprs_to_save[0]);
 
         h->sub(h->rsp, n_gprs_to_save * gpr_size);
@@ -1594,7 +1920,7 @@ void jit_power_static_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs,
         for (size_t i = 2; i < get_max_vecs_count() + 2; ++i)
             h->uni_vmovups(h->ptr[h->rsp + i * get_vec_length()], Vmm(i - 2));
         h->uni_vmovups(h->ptr[h->rsp + 0 * get_vec_length()], vmm_dst); // src
-        h->uni_vmovups(h->ptr[h->rsp + 1 * get_vec_length()], vmm_aux0); // beta
+        h->uni_vmovups(h->ptr[h->rsp + 1 * get_vec_length()], vmm_aux_0); // beta
 
         // save function address in gpr to pass in in call instruction
         h->mov(h->rbp, reinterpret_cast<uintptr_t>(powf));
@@ -1651,17 +1977,17 @@ size_t jit_power_static_emitter::aux_vecs_count() const {
 }
 
 /// PRELU ///
-jit_prelu_emitter::jit_prelu_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
+jit_prelu_emitter::jit_prelu_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, const Precision& exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {
     prepare_table();
 }
-jit_prelu_emitter::jit_prelu_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
+jit_prelu_emitter::jit_prelu_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {
     prepare_table();
 }
 size_t jit_prelu_emitter::get_inputs_num() const { return 2; }
 
-std::set<std::vector<element::Type>> jit_prelu_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_prelu_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32, element::f32}};
 }
 
@@ -1673,38 +1999,38 @@ void jit_prelu_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_prelu_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_prelu_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[1]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
-    Vmm vmm_aux1 = Vmm(aux_vec_idxs[1]);
+    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+    Vmm vmm_aux_1 = Vmm(aux_vec_idxs[1]);
 
     if (isa == x64::sse41) {
-        h->pxor(vmm_aux0, vmm_aux0);
-        h->cmpps(vmm_aux0, vmm_src0, _cmp_gt_os);
-        h->movups(vmm_aux1, vmm_src1);
-        h->mulps(vmm_aux1, vmm_src0);
-        if (vmm_src0.getIdx() != vmm_dst.getIdx())
-            h->movups(vmm_dst, vmm_src0);
-        h->blendvps(vmm_dst, vmm_aux1);
+        h->pxor(vmm_aux_0, vmm_aux_0);
+        h->cmpps(vmm_aux_0, vmm_src_0, _cmp_gt_os);
+        h->movups(vmm_aux_1, vmm_src_1);
+        h->mulps(vmm_aux_1, vmm_src_0);
+        if (vmm_src_0.getIdx() != vmm_dst.getIdx())
+            h->movups(vmm_dst, vmm_src_0);
+        h->blendvps(vmm_dst, vmm_aux_1);
     } else if (isa == x64::avx2) {
-        h->vmulps(vmm_aux0, vmm_src0, vmm_src1);
-        h->vxorps(vmm_aux1, vmm_aux1, vmm_aux1);
-        h->vcmpgtps(vmm_aux1, vmm_src0, vmm_aux1);
-        h->vblendvps(vmm_dst, vmm_aux0, vmm_src0, vmm_aux1);
+        h->vmulps(vmm_aux_0, vmm_src_0, vmm_src_1);
+        h->vxorps(vmm_aux_1, vmm_aux_1, vmm_aux_1);
+        h->vcmpgtps(vmm_aux_1, vmm_src_0, vmm_aux_1);
+        h->vblendvps(vmm_dst, vmm_aux_0, vmm_src_0, vmm_aux_1);
     } else if (isa == x64::avx512_core) {
-        h->vxorpd(vmm_aux0, vmm_aux0, vmm_aux0);
-        if (vmm_src0.getIdx() != vmm_dst.getIdx())
-            h->vmovups(vmm_dst, vmm_src0);
-        h->vcmpps(k_mask, vmm_src0, vmm_aux0, _cmp_lt_os);
-        h->vmulps(vmm_dst | k_mask, vmm_src0, vmm_src1);
+        h->vxorpd(vmm_aux_0, vmm_aux_0, vmm_aux_0);
+        if (vmm_src_0.getIdx() != vmm_dst.getIdx())
+            h->vmovups(vmm_dst, vmm_src_0);
+        h->vcmpps(k_mask, vmm_src_0, vmm_aux_0, _cmp_lt_os);
+        h->vmulps(vmm_dst | k_mask, vmm_src_0, vmm_src_1);
     }
 }
 
@@ -1713,15 +2039,19 @@ size_t jit_prelu_emitter::aux_vecs_count() const {
 }
 
 /// SQRT ///
-jit_sqrt_emitter::jit_sqrt_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
-jit_sqrt_emitter::jit_sqrt_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
-: jit_emitter(host, host_isa, exec_prc) {}
+jit_sqrt_emitter::jit_sqrt_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, const Precision& exec_prc)
+        : jit_emitter(host, host_isa, node, exec_prc) {
+    prepare_table();
+}
+jit_sqrt_emitter::jit_sqrt_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
+        : jit_emitter(host, host_isa, exec_prc) {
+    prepare_table();
+}
 
 size_t jit_sqrt_emitter::get_inputs_num() const { return 1; }
 
-std::set<std::vector<element::Type>> jit_sqrt_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
-    return {{element::f32}};
+std::set<std::vector<element::Type>> jit_sqrt_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
+    return { {element::f32}, {element::i64} };
 }
 
 void jit_sqrt_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
@@ -1732,26 +2062,65 @@ void jit_sqrt_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const s
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_sqrt_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
 template <x64::cpu_isa_t isa>
 void jit_sqrt_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
-    Vmm vmm_src0 = Vmm(in_vec_idxs[0]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[0]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
 
-     h->uni_vsqrtps(vmm_dst, vmm_src0);
+    switch (exec_prc_) {
+        case Precision::FP32: h->uni_vsqrtps(vmm_dst, vmm_src_0); break;
+        case Precision::I64: {
+            if (isa == x64::avx512_core) {
+                h->vcvtqq2pd(vmm_dst, vmm_src_0);
+                h->uni_vsqrtpd(vmm_dst, vmm_dst);
+                if (rounding_type != RoundType::nearest) {
+                    h->uni_vroundpd(vmm_dst, vmm_dst, rounding_type);
+                }
+                h->vcvtpd2qq(vmm_dst, vmm_dst);
+            } else {
+                Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+                h->uni_vmovups(vmm_aux_0, table_val_64("dMask"));
+
+                h->uni_vpaddq(vmm_dst,  vmm_src_0, vmm_aux_0);
+                h->uni_vsubpd(vmm_dst,  vmm_dst,  vmm_aux_0);
+
+                h->uni_vsqrtpd(vmm_dst, vmm_dst);
+                if (rounding_type != RoundType::nearest) {
+                    h->uni_vroundpd(vmm_dst, vmm_dst, rounding_type);
+                }
+
+                h->uni_vaddpd(vmm_dst, vmm_dst, vmm_aux_0);
+                h->uni_vpsubq(vmm_dst, vmm_dst, vmm_aux_0);
+            }
+        } break;
+        default: IE_THROW() << "jit_sqrt_emitter doesn't support precision '" << exec_prc_ << "'";
+    }
+}
+
+size_t jit_sqrt_emitter::aux_vecs_count() const {
+    if (host_isa_ != x64::avx512_core && exec_prc_ == Precision::I64) {
+        return 1;
+    }
+}
+
+void jit_sqrt_emitter::register_table_entries() {
+    if (host_isa_ != x64::avx512_core && exec_prc_ == Precision::I64) {
+        push_arg_entry_of_64("dMask",  0x433800002150d000, true);
+    }
 }
 
 /// Negate ///
-jit_negative_emitter::jit_negative_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
+jit_negative_emitter::jit_negative_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, const Precision& exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
 
 size_t jit_negative_emitter::get_inputs_num() const { return 1; }
 
-std::set<std::vector<element::Type>> jit_negative_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_negative_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32}};
 }
 
@@ -1763,7 +2132,7 @@ void jit_negative_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, con
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_negative_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
@@ -1777,19 +2146,19 @@ void jit_negative_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, cons
 }
 
 /// ERF ///
-jit_erf_emitter::jit_erf_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
+jit_erf_emitter::jit_erf_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {
     prepare_table();
 }
 
-jit_erf_emitter::jit_erf_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
+jit_erf_emitter::jit_erf_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, const Precision& exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {
     prepare_table();
 }
 
 size_t jit_erf_emitter::get_inputs_num() const { return 1; }
 
-std::set<std::vector<element::Type>> jit_erf_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_erf_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32}};
 }
 
@@ -1803,7 +2172,7 @@ void jit_erf_emitter::emit_impl(
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_erf_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
@@ -1814,14 +2183,13 @@ void jit_erf_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
 
     Vmm vmm_mask = Vmm(aux_vec_idxs[0]);
-    Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
-    Vmm vmm_aux1 = Vmm(aux_vec_idxs[1]);
+    Vmm vmm_aux_0 = Vmm(aux_vec_idxs[0]);
+    Vmm vmm_aux_1 = Vmm(aux_vec_idxs[1]);
     Vmm vmm_aux2 = Vmm(aux_vec_idxs[2]);
     Vmm vmm_aux3 = Vmm(aux_vec_idxs[3]);
     Vmm vmm_aux4 = Vmm(aux_vec_idxs[4]);
 
-    auto compute_cmp_mask = [&](const Vmm &vmm_src,
-        const Xbyak::Operand &compare_operand, int cmp_predicate) {
+    auto compute_cmp_mask = [&](const Vmm &vmm_src, const Operand &compare_operand, int cmp_predicate) {
         if (host_isa_ == x64::avx512_core) {
             h->vcmpps(k_mask, vmm_src, compare_operand, cmp_predicate);
         } else {
@@ -1829,7 +2197,7 @@ void jit_erf_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std
         }
     };
 
-    auto blend_with_mask = [&](const Vmm &vmm_dst, const Xbyak::Operand &src) {
+    auto blend_with_mask = [&](const Vmm &vmm_dst, const Operand &src) {
         if (host_isa_ == x64::avx512_core) {
             h->vblendmps(vmm_dst | k_mask, vmm_dst, src);
         } else {
@@ -1843,7 +2211,7 @@ void jit_erf_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std
 
         h->uni_vminps(vmm_src, vmm_src, table_val("exp_ln_flt_max_f"));
         h->uni_vmaxps(vmm_src, vmm_src, table_val("exp_ln_flt_min_f"));
-        h->uni_vmovups(vmm_aux1, vmm_src);
+        h->uni_vmovups(vmm_aux_1, vmm_src);
 
         // calculate exp(x)
         // fx = x * log2ef + 0.5
@@ -1858,7 +2226,7 @@ void jit_erf_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std
         h->uni_vmovups(vmm_src, vmm_aux2);
 
         // x = x - fx * ln2
-        h->uni_vfnmadd231ps(vmm_aux1, vmm_aux2, table_val("ln2f"));
+        h->uni_vfnmadd231ps(vmm_aux_1, vmm_aux2, table_val("ln2f"));
 
         // compute 2^n
         h->uni_vcvtps2dq(vmm_aux2, vmm_src);
@@ -1873,11 +2241,11 @@ void jit_erf_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std
 
         // compute polynomial
         h->uni_vmovups(vmm_src, table_val("ex_pol5"));
-        h->uni_vfmadd213ps(vmm_src, vmm_aux1, table_val("ex_pol4"));
-        h->uni_vfmadd213ps(vmm_src, vmm_aux1, table_val("ex_pol3"));
-        h->uni_vfmadd213ps(vmm_src, vmm_aux1, table_val("ex_pol2"));
-        h->uni_vfmadd213ps(vmm_src, vmm_aux1, table_val("ex_pol1"));
-        h->uni_vfmadd213ps(vmm_src, vmm_aux1, table_val("one"));
+        h->uni_vfmadd213ps(vmm_src, vmm_aux_1, table_val("ex_pol4"));
+        h->uni_vfmadd213ps(vmm_src, vmm_aux_1, table_val("ex_pol3"));
+        h->uni_vfmadd213ps(vmm_src, vmm_aux_1, table_val("ex_pol2"));
+        h->uni_vfmadd213ps(vmm_src, vmm_aux_1, table_val("ex_pol1"));
+        h->uni_vfmadd213ps(vmm_src, vmm_aux_1, table_val("one"));
         // y = y * 2^n
         h->uni_vmulps(vmm_src, vmm_src, vmm_aux2);
     };
@@ -1899,17 +2267,17 @@ void jit_erf_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std
     h->uni_vxorps(vmm_src, vmm_src, table_val("sign_mask"));
 
     // get sign
-    h->uni_vmovups(vmm_aux0, vmm_aux3);
-    h->uni_vandps(vmm_aux0, vmm_aux0, table_val("sign_mask"));
+    h->uni_vmovups(vmm_aux_0, vmm_aux3);
+    h->uni_vandps(vmm_aux_0, vmm_aux_0, table_val("sign_mask"));
 
     // abs(x)
-    h->uni_vmovups(vmm_aux1, vmm_aux3);
+    h->uni_vmovups(vmm_aux_1, vmm_aux3);
     // compute abs(x) = _mm_and_ps(x, 01111..111));
-    abs_compute_vector_fwd(vmm_aux1);
+    abs_compute_vector_fwd(vmm_aux_1);
 
     // t = 1 / (p*x + 1)
     h->uni_vmovups(vmm_aux2, table_val("approx_const"));
-    h->uni_vfmadd213ps(vmm_aux2, vmm_aux1, table_val("one"));
+    h->uni_vfmadd213ps(vmm_aux2, vmm_aux_1, table_val("one"));
     h->uni_vmovups(vmm_aux4, table_val("one"));
     h->uni_vdivps(vmm_aux4, vmm_aux4, vmm_aux2);
 
@@ -1917,15 +2285,15 @@ void jit_erf_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std
     h->uni_vmulps(vmm_src, vmm_src, vmm_aux4);
 
     // compute polynomialial r
-    h->uni_vmovups(vmm_aux1, table_val("erf_pol5"));
-    h->uni_vfmadd213ps(vmm_aux1, vmm_aux4, table_val("erf_pol4"));
-    h->uni_vfmadd213ps(vmm_aux1, vmm_aux4, table_val("erf_pol3"));
-    h->uni_vfmadd213ps(vmm_aux1, vmm_aux4, table_val("erf_pol2"));
-    h->uni_vfmadd213ps(vmm_aux1, vmm_aux4, table_val("erf_pol1"));
+    h->uni_vmovups(vmm_aux_1, table_val("erf_pol5"));
+    h->uni_vfmadd213ps(vmm_aux_1, vmm_aux4, table_val("erf_pol4"));
+    h->uni_vfmadd213ps(vmm_aux_1, vmm_aux4, table_val("erf_pol3"));
+    h->uni_vfmadd213ps(vmm_aux_1, vmm_aux4, table_val("erf_pol2"));
+    h->uni_vfmadd213ps(vmm_aux_1, vmm_aux4, table_val("erf_pol1"));
 
     // erf = sign * (1 - r * t * exp(-x*x))
-    h->uni_vfmadd213ps(vmm_src, vmm_aux1, table_val("one"));
-    h->uni_vxorps(vmm_dst, vmm_src, vmm_aux0);
+    h->uni_vfmadd213ps(vmm_src, vmm_aux_1, table_val("one"));
+    h->uni_vxorps(vmm_dst, vmm_src, vmm_aux_0);
 }
 
 void jit_erf_emitter::register_table_entries() {
@@ -1962,18 +2330,18 @@ size_t jit_erf_emitter::aux_vecs_count() const {
 }
 
 /// SOFT SIGN ///
-jit_soft_sign_emitter::jit_soft_sign_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_soft_sign_emitter::jit_soft_sign_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node,
+        const Precision& exec_prc) : jit_emitter(host, host_isa, node, exec_prc) {
     prepare_table();
 }
-jit_soft_sign_emitter::jit_soft_sign_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
+jit_soft_sign_emitter::jit_soft_sign_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {
     prepare_table();
 }
 
 size_t jit_soft_sign_emitter::get_inputs_num() const { return 1; }
 
-std::set<std::vector<element::Type>> jit_soft_sign_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_soft_sign_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32}};
 }
 
@@ -1985,7 +2353,7 @@ void jit_soft_sign_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, co
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_soft_sign_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
@@ -2181,14 +2549,14 @@ void jit_is_nan_emitter::register_table_entries() {
 }
 
 /// SELECT ///
-jit_select_emitter::jit_select_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
+jit_select_emitter::jit_select_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, const Precision& exec_prc)
         : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_select_emitter::jit_select_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
+jit_select_emitter::jit_select_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const Precision& exec_prc)
         : jit_emitter(host, host_isa, exec_prc) {}
 
 size_t jit_select_emitter::get_inputs_num() const { return 3; }
 
-std::set<std::vector<element::Type>> jit_select_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+std::set<std::vector<element::Type>> jit_select_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
     return {{element::f32, element::f32, element::f32}};
 }
 
@@ -2209,16 +2577,16 @@ void jit_select_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const
     } else if (host_isa_ == x64::avx512_core) {
         emit_isa<x64::avx512_core>(in_vec_idxs, out_vec_idxs);
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "jit_select_emitter doesn't support ISA '" << host_isa_ << "'";
     }
 }
 
-template <dnnl::impl::cpu::x64::cpu_isa_t isa>
+template <x64::cpu_isa_t isa>
 void jit_select_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
     using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
     Vmm vmm_cond = Vmm(in_vec_idxs[0]);
-    Vmm vmm_src0 = Vmm(in_vec_idxs[1]);
-    Vmm vmm_src1 = Vmm(in_vec_idxs[2]);
+    Vmm vmm_src_0 = Vmm(in_vec_idxs[1]);
+    Vmm vmm_src_1 = Vmm(in_vec_idxs[2]);
     Vmm vmm_dst = Vmm(out_vec_idxs[0]);
 
     if (isa == x64::sse41) {
@@ -2229,18 +2597,18 @@ void jit_select_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const
         if (vmm_mask.getIdx() != vmm_cond.getIdx()) {
             h->uni_vmovups(vmm_mask, vmm_cond);
         }
-        if (vmm_src1.getIdx() != vmm_dst.getIdx()) {
-            h->uni_vmovups(vmm_dst, vmm_src1);
+        if (vmm_src_1.getIdx() != vmm_dst.getIdx()) {
+            h->uni_vmovups(vmm_dst, vmm_src_1);
         }
-        h->uni_vblendvps(vmm_dst, vmm_dst, vmm_src0, vmm_mask);
+        h->uni_vblendvps(vmm_dst, vmm_dst, vmm_src_0, vmm_mask);
     } else if (isa == x64::avx2) {
         Vmm vmm_zero = Vmm(aux_vec_idxs[0]);
         h->uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
         h->uni_vcmpps(vmm_cond, vmm_cond, vmm_zero, 0x4);
-        h->uni_vblendvps(vmm_dst, vmm_src1, vmm_src0, vmm_cond);
+        h->uni_vblendvps(vmm_dst, vmm_src_1, vmm_src_0, vmm_cond);
     } else {
         h->vptestmd(k_mask, vmm_cond, vmm_cond);
-        h->vblendmps(vmm_dst | k_mask, vmm_src1, vmm_src0);
+        h->vblendmps(vmm_dst | k_mask, vmm_src_1, vmm_src_0);
     }
 }
 }   // namespace intel_cpu
diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.hpp
index 5c00e4584b4274..858e6ee5dd8edd 100644
--- a/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.hpp
+++ b/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.hpp
@@ -12,11 +12,11 @@ namespace intel_cpu {
 class jit_add_emitter : public jit_emitter {
 public:
     jit_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                    const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -28,11 +28,11 @@ class jit_add_emitter : public jit_emitter {
 class jit_mul_add_emitter : public jit_emitter {
 public:
     jit_mul_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                        const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_mul_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -47,11 +47,11 @@ class jit_mul_add_emitter : public jit_emitter {
 class jit_subtract_emitter : public jit_emitter {
 public:
     jit_subtract_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                         InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                         const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_subtract_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -64,11 +64,13 @@ class jit_subtract_emitter : public jit_emitter {
 class jit_multiply_emitter : public jit_emitter {
 public:
     jit_multiply_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                         InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                         const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_multiply_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
+
+    size_t aux_vecs_count() const override;
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -81,30 +83,34 @@ class jit_multiply_emitter : public jit_emitter {
 class jit_divide_emitter : public jit_emitter {
 public:
     jit_divide_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                       InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
-    jit_divide_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                       InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                       const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
+    jit_divide_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
+
+    bool second_is_float = false;
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
 
     template <dnnl::impl::cpu::x64::cpu_isa_t isa>
     void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
+
     size_t aux_vecs_count() const override;
+
+    void register_table_entries() override;
 };
 
 class jit_floor_emitter : public jit_emitter {
 public:
     jit_floor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                      InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                      const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_floor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                      InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                      const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -116,12 +122,12 @@ class jit_floor_emitter : public jit_emitter {
 class jit_ceiling_emitter : public jit_emitter {
 public:
     jit_ceiling_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                      InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                      const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_ceiling_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                      InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                      const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -133,12 +139,12 @@ class jit_ceiling_emitter : public jit_emitter {
 class jit_floor_mod_emitter : public jit_emitter {
 public:
     jit_floor_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                          InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                          const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_floor_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                          InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                          const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -146,18 +152,20 @@ class jit_floor_mod_emitter : public jit_emitter {
     template <dnnl::impl::cpu::x64::cpu_isa_t isa>
     void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
     size_t aux_vecs_count() const override;
+
+    void register_table_entries() override;
 };
 
 
 class jit_mod_emitter : public jit_emitter {
 public:
     jit_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                    const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                    const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -171,65 +179,70 @@ class jit_mod_emitter : public jit_emitter {
 class jit_maximum_emitter : public jit_emitter {
 public:
     jit_maximum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                        const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_maximum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
 
     template <dnnl::impl::cpu::x64::cpu_isa_t isa>
     void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
+
+    size_t aux_vecs_count() const override;
 };
 
 
 class jit_minimum_emitter : public jit_emitter {
 public:
     jit_minimum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                        const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_minimum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
 
     template <dnnl::impl::cpu::x64::cpu_isa_t isa>
     void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
+
+    size_t aux_vecs_count() const override;
 };
 
 
 class jit_squared_difference_emitter : public jit_emitter {
 public:
     jit_squared_difference_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                                   InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
-    jit_squared_difference_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                                   const std::shared_ptr<ov::Node>& n,
-                                   InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                                   const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
+    jit_squared_difference_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
+                                   const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
 
     template <dnnl::impl::cpu::x64::cpu_isa_t isa>
     void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
+
+    size_t aux_vecs_count() const override;
 };
 
 
 class jit_power_dynamic_emitter : public jit_emitter {
 public:
     jit_power_dynamic_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                              InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                              const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_power_dynamic_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                              InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                              const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -242,12 +255,12 @@ class jit_power_dynamic_emitter : public jit_emitter {
 class jit_equal_emitter : public jit_emitter {
 public:
     jit_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                      InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                      const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                      InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                      const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -256,19 +269,18 @@ class jit_equal_emitter : public jit_emitter {
     void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
 
     void register_table_entries() override;
-    size_t aux_vecs_count() const override;
 };
 
 
 class jit_not_equal_emitter : public jit_emitter {
 public:
     jit_not_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                          InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                          const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_not_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                          InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                          const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -284,12 +296,12 @@ class jit_not_equal_emitter : public jit_emitter {
 class jit_greater_emitter : public jit_emitter {
 public:
     jit_greater_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                        const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_greater_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                        const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -298,19 +310,18 @@ class jit_greater_emitter : public jit_emitter {
     void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
 
     void register_table_entries() override;
-    size_t aux_vecs_count() const override;
 };
 
 
 class jit_greater_equal_emitter : public jit_emitter {
 public:
     jit_greater_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                              InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                              const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_greater_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                              InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                              const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -326,12 +337,12 @@ class jit_greater_equal_emitter : public jit_emitter {
 class jit_less_emitter : public jit_emitter {
 public:
     jit_less_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                     InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                     const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_less_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                     InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                     const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -340,20 +351,19 @@ class jit_less_emitter : public jit_emitter {
     void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
 
     void register_table_entries() override;
-    size_t aux_vecs_count() const override;
 };
 
 
 class jit_less_equal_emitter : public jit_emitter {
 public:
     jit_less_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                           InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                           const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     jit_less_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                           InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                           const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -369,12 +379,12 @@ class jit_less_equal_emitter : public jit_emitter {
 class jit_logical_and_emitter : public jit_emitter {
 public:
     jit_logical_and_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                            const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_logical_and_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                            const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -390,12 +400,12 @@ class jit_logical_and_emitter : public jit_emitter {
 class jit_logical_or_emitter : public jit_emitter {
 public:
     jit_logical_or_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                           InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                           const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_logical_or_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                           InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                           const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -411,12 +421,12 @@ class jit_logical_or_emitter : public jit_emitter {
 class jit_logical_xor_emitter : public jit_emitter {
 public:
     jit_logical_xor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                            const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_logical_xor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                            const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -431,12 +441,12 @@ class jit_logical_xor_emitter : public jit_emitter {
 class jit_logical_not_emitter : public jit_emitter {
 public:
     jit_logical_not_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                            const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_logical_not_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                            const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -452,12 +462,12 @@ class jit_power_static_emitter : public jit_emitter {
 public:
     jit_power_static_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
                              float inpPower, float inpScale, float inpShift,
-                             InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                             const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_power_static_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                             InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                             const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 
 private:
@@ -477,12 +487,12 @@ class jit_power_static_emitter : public jit_emitter {
 class jit_prelu_emitter : public jit_emitter {
 public:
     jit_prelu_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                      InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                      const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_prelu_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                      InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                      const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -496,27 +506,33 @@ class jit_prelu_emitter : public jit_emitter {
 class jit_sqrt_emitter : public jit_emitter {
 public:
     jit_sqrt_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                    const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_sqrt_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                    const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
+
+    RoundType rounding_type = RoundType::nearest;
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
 
     template <dnnl::impl::cpu::x64::cpu_isa_t isa>
     void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
+
+    size_t aux_vecs_count() const override;
+
+    void register_table_entries() override;
 };
 
 class jit_negative_emitter : public jit_emitter {
 public:
     jit_negative_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                    const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t>& in, const std::vector<size_t>& out) const override;
@@ -528,13 +544,13 @@ class jit_negative_emitter : public jit_emitter {
 class jit_erf_emitter : public jit_emitter {
 public:
     jit_erf_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+        const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     jit_erf_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                    const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(
@@ -551,12 +567,12 @@ class jit_erf_emitter : public jit_emitter {
 class jit_soft_sign_emitter : public jit_emitter {
 public:
     jit_soft_sign_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                          InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                          const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
     jit_soft_sign_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                          InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                          const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -579,7 +595,7 @@ class jit_is_finite_emitter : public jit_emitter {
     }
 
     size_t get_inputs_num() const override { return 1; };
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr) {
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr) {
         return {{element::f32}};
     }
 
@@ -607,7 +623,7 @@ class jit_is_inf_emitter : public jit_emitter {
     }
 
     size_t get_inputs_num() const override { return 1; };
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr) {
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr) {
         return {{element::f32}};
     }
 
@@ -637,7 +653,7 @@ class jit_is_nan_emitter : public jit_emitter {
     }
 
     size_t get_inputs_num() const override { return 1; }
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr) {
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr) {
         return {{element::f32}};
     }
 
@@ -655,12 +671,12 @@ class jit_is_nan_emitter : public jit_emitter {
 class jit_select_emitter : public jit_emitter {
 public:
     jit_select_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                       InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
-    jit_select_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
-                       InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+                       const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
+    jit_select_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
+                       const InferenceEngine::Precision &exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
     size_t aux_vecs_count() const override;
 
 private:
diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.cpp
index f727f8d9d1d7a5..fa18f576dcc470 100644
--- a/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.cpp
+++ b/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.cpp
@@ -52,7 +52,7 @@ emitter_in_out_map jit_emitter::get_in_out_type() const {
 
 size_t jit_emitter::aux_gprs_count() const {
     // We need one gpr to load table address
-    return entry_map_.empty() ? 0 : 1;
+    return entry_map_.empty() && entry_map_64.empty() ? 0 : 1;
 }
 
 std::set<std::vector<element::Type>> jit_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
@@ -133,7 +133,7 @@ void jit_emitter::emitter_preamble(const std::vector<size_t> &in_idxs, const std
     if (aux_gpr_idxs.size() < aux_gprs_count())
         IE_THROW() << "Failed to allocate required number of general-purpose registers";
 
-    if (!entry_map_.empty()) {
+    if (!entry_map_.empty() || !entry_map_64.empty()) {
         // last aux_gpr_idx is for p_table, we can use aux_gpr_idxs from idx 0 for other purpose
         p_table = Reg64(aux_gpr_idxs[aux_gprs_count() - 1]);
         aux_gpr_idxs.erase(aux_gpr_idxs.end() - 1);
@@ -149,8 +149,9 @@ void jit_emitter::emitter_preamble(const std::vector<size_t> &in_idxs, const std
         push_vec(h->ptr[h->rsp + i * get_vec_length()], preserved_vec_idxs[i]);
     }
 
-    if (!entry_map_.empty())
+    if (!entry_map_.empty() || !entry_map_64.empty()) {
         load_table_addr();
+    }
 }
 
 
@@ -187,6 +188,13 @@ void jit_emitter::emit_data() const {
         for (size_t d = 0; d < len; d += sizeof(table_entry_val_t))
             h->dd(te.val);
     }
+    for (auto it = entry_map_64.begin(); it != entry_map_64.end(); it++) {
+        const auto &te = (*it).second; // get map entry for a given key
+        const auto len = te.bcast ? get_vec_length() : sizeof(table_entry_val_t_64);
+        for (size_t d = 0; d < len; d += sizeof(table_entry_val_t_64)) {
+            h->dq(te.val);
+        }
+    }
 }
 
 void jit_emitter::prepare_table() {
@@ -202,6 +210,11 @@ void jit_emitter::prepare_table() {
         te.off = off;
         off += te.bcast ? get_vec_length() : sizeof(table_entry_val_t);
     }
+    for (auto it = entry_map_64.begin(); it != entry_map_64.end(); it++) {
+        auto &te = (*it).second;
+        te.off = off;
+        off += te.bcast ? get_vec_length() : sizeof(table_entry_val_t_64);
+    }
 }
 
 void jit_emitter::emit_code(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.hpp b/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.hpp
index d2e3a33b914406..7b42a0dcae0b95 100644
--- a/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.hpp
+++ b/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.hpp
@@ -36,7 +36,7 @@ class jit_emitter : public ov::snippets::Emitter {
         k_mask = Xbyak::Opmask(1); // FIXME: in general case we need preserve k_mask state as well
     }
 
-    jit_emitter(dnnl::impl::cpu::x64::jit_generator* host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
+    jit_emitter(dnnl::impl::cpu::x64::jit_generator* host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
                 InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::vec_to_vec)
         : Emitter(n), h(host), host_isa_(host_isa), exec_prc_(exec_prc), l_table (new Xbyak::Label()), in_out_type_(in_out_type) {
         k_mask = Xbyak::Opmask(1); // FIXME: in general case we need preserve k_mask state as well
@@ -55,7 +55,14 @@ class jit_emitter : public ov::snippets::Emitter {
      * Precisions are ordered, the first bigger bitness precision with the same type will be selected.
      * Empty collection means the emitter supports any input precisions.
      */
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
+
+    enum RoundType {
+        nearest = 0,
+        floor,
+        ceil,
+        truncation
+    };
 
 protected:
     virtual size_t aux_gprs_count() const;
@@ -75,6 +82,7 @@ class jit_emitter : public ov::snippets::Emitter {
 
     // we accept only 32bit hexadecimal table values to avoid any rounding
     using table_entry_val_t = uint32_t;
+    using table_entry_val_t_64 = uint64_t;
     using table_entry_offset_t = size_t; // offsets are in bytes wrt p_table
     using table_entry_bcast_t = bool; // true => bcast value
 
@@ -82,11 +90,20 @@ class jit_emitter : public ov::snippets::Emitter {
         table_entry_val_t val;
         table_entry_bcast_t bcast;
     };
+    struct table_entry_t_64 {
+        table_entry_val_t_64 val;
+        table_entry_bcast_t bcast;
+    };
     struct mapped_table_entry_t {
         table_entry_offset_t off;
         table_entry_val_t val;
         table_entry_bcast_t bcast;
     };
+    struct mapped_table_entry_t_64 {
+        table_entry_offset_t off;
+        table_entry_val_t_64 val;
+        table_entry_bcast_t bcast;
+    };
 
     mutable Xbyak::Reg64 p_table;
     mutable std::shared_ptr<Xbyak::Label> l_table;
@@ -118,16 +135,29 @@ class jit_emitter : public ov::snippets::Emitter {
         return h->ptr[p_table + off];
     }
 
+    Xbyak::Address table_val_64(std::string key, size_t key_off_val_shift = 0) const {
+        auto off = table_off_64(key, key_off_val_shift);
+        return h->ptr[p_table + off];
+    }
+
     using table_t = std::multimap<std::string, table_entry_t>;
+    using table_t_64 = std::multimap<std::string, table_entry_t_64>;
     using mapped_table_t = std::multimap<std::string, mapped_table_entry_t>;
+    using mapped_table_t_64 = std::multimap<std::string, mapped_table_entry_t_64>;
 
     mapped_table_t entry_map_;
+    mapped_table_t_64 entry_map_64;
 
     void push_arg_entry_of(const std::string key, const table_entry_val_t val, const bool broadcast) {
         mapped_table_entry_t te {0, val, broadcast};
         entry_map_.insert(std::make_pair(key, te));
     }
 
+    void push_arg_entry_of_64(const std::string key, const table_entry_val_t_64 val, const bool broadcast) {
+        mapped_table_entry_t_64 te {0, val, broadcast};
+        entry_map_64.insert(std::make_pair(key, te));
+    }
+
     void push_entries_of(const table_t &t) {
         for (auto it = t.begin(); it != t.end(); it++) {
             auto key = (*it).first;
@@ -136,6 +166,14 @@ class jit_emitter : public ov::snippets::Emitter {
         }
     }
 
+    void push_entries_of(const table_t_64 &t) {
+        for (auto it = t.begin(); it != t.end(); it++) {
+            auto key = (*it).first;
+            auto te = (*it).second; // copy values from table
+            push_arg_entry_of_64(key, te.val, te.bcast);
+        }
+    }
+
 private:
     mutable std::vector<size_t> preserved_vec_idxs;
     mutable std::vector<size_t> preserved_gpr_idxs;
@@ -153,6 +191,18 @@ class jit_emitter : public ov::snippets::Emitter {
         const auto scale = te.bcast ? get_vec_length() : sizeof(table_entry_val_t);
         return te.off + key_off_val_shift * scale;
     }
+
+    size_t table_off_64(std::string& key, size_t key_off_val_shift = 0) const {
+        // assumption: all table entries sharing the same key also
+        // share their broadcast property
+        // TODO: enforce through data structure
+        const auto it = entry_map_64.find(key); // search an entry for a key
+        assert(it != entry_map_64.end());
+        const auto &te = (*it).second;
+        const auto scale = te.bcast ? get_vec_length() : sizeof(table_entry_val_t_64);
+        return te.off + key_off_val_shift * scale;
+    }
+
     virtual void validate_arguments(const std::vector<size_t>&, const std::vector<size_t>&) const {}
 };
 
diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp
index 1d5cb7946eecba..2837219529917d 100644
--- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp
@@ -479,6 +479,10 @@ ScalarEmitter::ScalarEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl:
             value = ov::as_type_ptr<ov::op::v0::Constant>(n)->cast_vector<int32_t>()[0];
             break;
         }
+        case element::i64: {
+            value = ov::as_type_ptr<ov::op::v0::Constant>(n)->cast_vector<int64_t>()[0];
+            break;
+        }
         case element::f32: {
             value = dnnl::impl::cpu::x64::float2int(ov::as_type_ptr<ov::op::v0::Constant>(n)->cast_vector<float>()[0]);
             break;
diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp
index b65b582e4384b2..cf38459a4da930 100644
--- a/src/plugins/intel_cpu/src/graph.cpp
+++ b/src/plugins/intel_cpu/src/graph.cpp
@@ -45,7 +45,6 @@
 #include "memory_desc/cpu_memory_desc_utils.h"
 
 #include <openvino/core/model.hpp>
-#include <openvino/core/node.hpp>
 #include <openvino/op/ops.hpp>
 #include <transformations/utils/utils.hpp>
 #include <low_precision/low_precision.hpp>
@@ -306,7 +305,10 @@ void Graph::Replicate(const CNNNetwork &network) {
     // change precision for input/output nodes to avoid extra data conversion when set input/output blobs
     // also we need to change input/output precisions for consumers/producers to avoid inserting reorder
     for (auto &input : inputNodesMap) {
-        const auto precToSet = normalizeToSupportedPrecision(inputsInfo.at(input.first)->getPrecision());
+        auto precToSet = normalizeToSupportedPrecision(inputsInfo.at(input.first)->getPrecision());
+        if (!getConfig().enableNativeI64 && precToSet == Precision::I64) {
+            precToSet = Precision::I32;
+        }
         input.second->setOriginalOutputPrecisionAtPort(0, precToSet);
         const auto childEdges = input.second->getChildEdgesAtPort(0);
         for (size_t i = 0; i < childEdges.size(); i++) {
@@ -319,7 +321,10 @@ void Graph::Replicate(const CNNNetwork &network) {
     }
 
     for (auto &output : outputNodesMap) {
-        const auto precToSet = normalizeToSupportedPrecision(outputsInfo.at(output.first)->getPrecision());
+        auto precToSet = normalizeToSupportedPrecision(outputsInfo.at(output.first)->getPrecision());
+        if (!getConfig().enableNativeI64 && precToSet == Precision::I64) {
+            precToSet = Precision::I32;
+        }
         output.second->setOriginalInputPrecisionAtPort(0, precToSet);
         const auto parentEdges = output.second->getParentEdgesAtPort(0);
         for (size_t i = 0; i < parentEdges.size(); i++) {
@@ -977,7 +982,7 @@ void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob::
 
         // todo: make sure 'name' exists in this map...
         if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) {
-            if (inTensorDesc.getPrecision() == InferenceEngine::Precision::FP32) {
+            if (inTensorDesc.getPrecision() == Precision::FP32) {
                 _normalizePreprocMap[name].NormalizeImage(outDims, reinterpret_cast<float *>(inter_data_ptr),
                                                           inTensorDesc.getLayout());
             } else {
@@ -1424,16 +1429,16 @@ void Graph::SortTopologically() {
     }
 }
 
-void Graph::GetPerfData(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap) const {
+void Graph::GetPerfData(std::map<std::string, InferenceEngineProfileInfo> &perfMap) const {
     unsigned i = 0;
-    std::function<void(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &, const NodePtr&)>
-            getPerfMapFor = [&](std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap, const NodePtr& node) {
-        InferenceEngine::InferenceEngineProfileInfo &pc = perfMap[node->getName()];
+    std::function<void(std::map<std::string, InferenceEngineProfileInfo> &, const NodePtr&)>
+            getPerfMapFor = [&](std::map<std::string, InferenceEngineProfileInfo> &perfMap, const NodePtr& node) {
+        InferenceEngineProfileInfo &pc = perfMap[node->getName()];
         pc.execution_index = i++;
         // TODO: Why time counter is signed?
         pc.cpu_uSec = pc.realTime_uSec = (long long) node->PerfCounter().avg();
-        pc.status = pc.cpu_uSec > 0 ? InferenceEngine::InferenceEngineProfileInfo::EXECUTED
-                                    : InferenceEngine::InferenceEngineProfileInfo::NOT_RUN;
+        pc.status = pc.cpu_uSec > 0 ? InferenceEngineProfileInfo::EXECUTED
+                                    : InferenceEngineProfileInfo::NOT_RUN;
         std::string pdType = node->getPrimitiveDescriptorType();
         size_t typeLen = sizeof(pc.exec_type) / sizeof(pc.exec_type[0]);
         pdType.copy(pc.exec_type, typeLen, 0);
diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp
index 8952b09ea6f9af..f52779ad33d1e7 100644
--- a/src/plugins/intel_cpu/src/graph_optimizer.cpp
+++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp
@@ -18,7 +18,6 @@
 #include "nodes/mvn.h"
 #include "nodes/transpose.h"
 #include "nodes/interpolate.h"
-#include "nodes/reduce.h"
 #include "nodes/input.h"
 #include "nodes/rnn.h"
 #include "nodes/common/cpu_convert.h"
diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp
index cdd343c126277c..685daa25ead8c4 100644
--- a/src/plugins/intel_cpu/src/node.cpp
+++ b/src/plugins/intel_cpu/src/node.cpp
@@ -38,7 +38,6 @@
 #include "nodes/memory.hpp"
 #include "nodes/mvn.h"
 #include "nodes/normalize.h"
-#include "nodes/reduce.h"
 #include "nodes/tensoriterator.h"
 #include "nodes/scatter_update.h"
 #include "nodes/interpolate.h"
@@ -52,7 +51,7 @@
 
 #include "nodes/common/cpu_memcpy.h"
 #include "utils/rt_info/memory_formats_attribute.hpp"
-#include <ngraph/opsets/opset1.hpp>
+#include <openvino/opsets/opset1.hpp>
 
 #include <dnnl_types.h>
 #include <dnnl_debug.h>
@@ -80,7 +79,7 @@ Node::NodesFactory & Node::factory() {
     return factoryInstance;
 }
 
-Node::Node(const std::shared_ptr<ngraph::Node>& op,
+Node::Node(const std::shared_ptr<ov::Node>& op,
            const GraphContext::CPtr ctx,
            const ShapeInferFactory& shapeInferFactory)
     : selectedPrimitiveDescriptorIndex(-1),
@@ -95,8 +94,6 @@ Node::Node(const std::shared_ptr<ngraph::Node>& op,
       typeStr(op->get_type_name()),
       type(TypeFromName(op->get_type_name())),
       profiling(op->get_friendly_name()) {
-    const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name();
-
     for (size_t i = 0; i < op->get_input_size(); i++) {
         const auto &shape = op->get_input_partial_shape(i);
         if (shape.rank().is_dynamic()) {
@@ -104,11 +101,11 @@ Node::Node(const std::shared_ptr<ngraph::Node>& op,
         }
 
         bool isScalar = shape.rank().get_length() == 0;
-        inputShapes.emplace_back(isScalar ? ngraph::PartialShape{1} : shape);
+        inputShapes.emplace_back(isScalar ? ov::PartialShape{1} : shape);
         originalInputPrecisions.emplace_back(details::convertPrecision(op->get_input_element_type(i)));
     }
 
-    if (typeStr != "Result" && typeStr != "Assign") {
+    if (type != Type::Output && type != Type::MemoryOutput) {
         if (op->get_output_size() == 0) {
             IE_THROW() << "Node with type '" << typeStr << "' and name '" << name << "' does not have any outputs.";
         }
@@ -119,11 +116,10 @@ Node::Node(const std::shared_ptr<ngraph::Node>& op,
             }
 
             bool isScalar = shape.rank().get_length() == 0;
-            outputShapes.emplace_back(isScalar ? ngraph::PartialShape{1} : shape);
+            outputShapes.emplace_back(isScalar ? ov::PartialShape{1} : shape);
             originalOutputPrecisions.emplace_back(details::convertPrecision(op->get_output_element_type(i)));
         }
     }
-
     isDynamic = std::any_of(inputShapes.begin(), inputShapes.end(), [](const Shape& shape){ return shape.isDynamic(); }) ||
                 std::any_of(outputShapes.begin(), outputShapes.end(), [](const Shape& shape){ return shape.isDynamic(); });
 
@@ -1274,7 +1270,7 @@ InferenceEngine::Precision Node::getRuntimePrecision() const {
     return runtimePrecision;
 }
 
-Node* Node::NodesFactory::create(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context) {
+Node* Node::NodesFactory::create(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context) {
     // getExceptionDescWithoutStatus removes redundant information from the exception message. For instance, the NotImplemented
     // exception is generated in the form: full_path_to_src_file:line_number [ NOT_IMPLEMENTED ] reason.
     // An example for gather node:
diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h
index 4cfe9c7d708660..cf49b6e1ebf0b2 100644
--- a/src/plugins/intel_cpu/src/node.h
+++ b/src/plugins/intel_cpu/src/node.h
@@ -22,8 +22,6 @@
 #include "dnnl_scratch_pad.h"
 #include <openvino/itt.hpp>
 #include "utils/ngraph_utils.hpp"
-#include <ngraph/ops.hpp>
-#include <ngraph/node.hpp>
 #include <ie_precision.hpp>
 #include <nodes/common/blocked_desc_creator.h>
 #include "cpu_types.h"
@@ -41,6 +39,8 @@
 #include "nodes/executors/mvn_list.hpp"
 #include "nodes/executors/executor.hpp"
 
+#define THROW_CPU_NODE_ERR IE_THROW() << getTypeStr() << " node with name '" << getName() << "' "
+
 namespace ov {
 namespace intel_cpu {
 
@@ -436,13 +436,13 @@ class Node {
         return originalOutputPrecisions;
     }
 
-    InferenceEngine::Precision getOriginalInputPrecisionAtPort(size_t port) const {
+    const InferenceEngine::Precision &getOriginalInputPrecisionAtPort(size_t port) const {
         if (originalInputPrecisions.size() <= port) {
             IE_THROW() << "Incorrect input port number for node " << getName();
         }
         return originalInputPrecisions[port];
     }
-    InferenceEngine::Precision getOriginalOutputPrecisionAtPort(size_t port) const {
+    const InferenceEngine::Precision &getOriginalOutputPrecisionAtPort(size_t port) const {
         if (originalOutputPrecisions.size() <= port) {
             IE_THROW() << "Incorrect output port number for node " << getName();
         }
@@ -584,7 +584,7 @@ class Node {
 
     std::string originalLayers;  // contains names of the original layers separated by comma
 
-    Node(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr ctx, const ShapeInferFactory& shapeInferFactory);
+    Node(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr ctx, const ShapeInferFactory& shapeInferFactory);
     Node(const std::string& type, const std::string& name, const GraphContext::CPtr ctx);
 
     int selectedPrimitiveDescriptorIndex = -1;
@@ -740,17 +740,17 @@ constexpr uint64_t PortMask(T... rest) {
 }
 
 class Node::NodesFactory : public openvino::cc::Factory<Type,
-                                            Node*(const std::shared_ptr<ngraph::Node>& op,
+                                            Node*(const std::shared_ptr<ov::Node>& op,
                                                   const GraphContext::CPtr)> {
 public:
     NodesFactory();
 
-    Node* create(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    Node* create(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context);
 };
 
 template<typename NodeType>
 struct NodeImpl : public NodeType {
-    NodeImpl(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
+    NodeImpl(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
         : NodeType(op, context) {
         NodeType::perfCounters().template buildClassCounters<NodeType>(NameFromType(NodeType::getType()));
     }
diff --git a/src/plugins/intel_cpu/src/nodes/broadcast.cpp b/src/plugins/intel_cpu/src/nodes/broadcast.cpp
index 2293e36850aada..26f889d9f04e71 100644
--- a/src/plugins/intel_cpu/src/nodes/broadcast.cpp
+++ b/src/plugins/intel_cpu/src/nodes/broadcast.cpp
@@ -2,17 +2,12 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <cmath>
-#include <vector>
-#include <string>
-#include <dnnl_types.h>
-#include "ie_parallel.hpp"
-#include "utils/bfloat16.hpp"
-#include <selective_build.h>
 #include "broadcast.h"
-#include <nodes/common/blocked_desc_creator.h>
-#include <ngraph/opsets/opset1.hpp>
+
 #include "common/cpu_memcpy.h"
+#include "ie_parallel.hpp"
+#include <openvino/op/broadcast.hpp>
+#include <openvino/op/constant.hpp>
 
 using namespace InferenceEngine;
 
@@ -22,12 +17,12 @@ namespace node {
 
 bool Broadcast::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        if (!ov::is_type<ov::op::v1::Broadcast>(op)) {
+        if (!ov::is_type<op::v1::Broadcast>(op)) {
             errorMessage = "Only Broadcast operations from opset1 are supported.";
             return false;
         }
-        if (!one_of(ov::as_type_ptr<const ov::op::v1::Broadcast>(op)->get_broadcast_spec().m_type,
-                ov::op::AutoBroadcastType::NUMPY, ov::op::AutoBroadcastType::EXPLICIT)) {
+        if (!one_of(ov::as_type_ptr<const op::v1::Broadcast>(op)->get_broadcast_spec().m_type,
+                op::AutoBroadcastType::NUMPY, op::AutoBroadcastType::EXPLICIT)) {
             errorMessage = "Only NUMPY and EXPLICIT broadcast types are supported.";
             return false;
         }
@@ -37,9 +32,9 @@ bool Broadcast::isSupportedOperation(const std::shared_ptr<const ov::Node>& op,
             return false;
         }
         if (!isDynamicNgraphNode(op) &&
-                (!ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(TARGET_SHAPE_IDX)) ||
+                (!ov::is_type<op::v0::Constant>(op->get_input_node_ptr(TARGET_SHAPE_IDX)) ||
                  (op->get_input_size() > AXES_MAPPING_IDX &&
-                 !ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXES_MAPPING_IDX))))) {
+                 !ov::is_type<op::v0::Constant>(op->get_input_node_ptr(AXES_MAPPING_IDX))))) {
             errorMessage = "Only constant target shapes and axis mapping inputs are supported for static shapes.";
             return false;
         }
@@ -50,7 +45,7 @@ bool Broadcast::isSupportedOperation(const std::shared_ptr<const ov::Node>& op,
 }
 
 Broadcast::Broadcast(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
-    : Node(op, context, NgraphShapeInferFactory(op, PortMask(TARGET_SHAPE_IDX, AXES_MAPPING_IDX))) {
+        : Node(op, context, NgraphShapeInferFactory(op, PortMask(TARGET_SHAPE_IDX, AXES_MAPPING_IDX))) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
@@ -62,10 +57,10 @@ Broadcast::Broadcast(const std::shared_ptr<ov::Node>& op, const GraphContext::CP
     if (op->get_output_size() == 0)
         IE_THROW() << errorPrefix << "has no output edges.";
 
-    auto broadcastOp = ov::as_type_ptr<const ov::op::v1::Broadcast>(op);
-    if (broadcastOp->get_broadcast_spec().m_type == ov::op::AutoBroadcastType::NUMPY) {
+    auto broadcastOp = ov::as_type_ptr<const op::v1::Broadcast>(op);
+    if (broadcastOp->get_broadcast_spec().m_type == op::AutoBroadcastType::NUMPY) {
         broadcastType = NUMPY;
-    } else if (broadcastOp->get_broadcast_spec().m_type == ov::op::AutoBroadcastType::EXPLICIT) {
+    } else if (broadcastOp->get_broadcast_spec().m_type == op::AutoBroadcastType::EXPLICIT) {
         if (op->get_input_size() <= AXES_MAPPING_IDX)
             IE_THROW() << errorPrefix << " and EXPLICIT mode must have tree input edges: " << getParentEdges().size();
         broadcastType = EXPLICIT;
@@ -73,14 +68,16 @@ Broadcast::Broadcast(const std::shared_ptr<ov::Node>& op, const GraphContext::CP
         IE_THROW() << errorPrefix << "has unexpected broadcast type: " << broadcastOp->get_broadcast_spec().m_type;
     }
 
-    if (ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(TARGET_SHAPE_IDX))) {
+    if (auto shapeOp = ov::as_type<op::v0::Constant>(op->get_input_node_ptr(TARGET_SHAPE_IDX))) {
         constMap[TARGET_SHAPE_IDX] = true;
-        targetShape = (ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(TARGET_SHAPE_IDX)))->get_vector<int32_t>();
+        targetShape = shapeOp->cast_vector<Dim>();
     }
-    if (broadcastType == EXPLICIT &&
-                ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXES_MAPPING_IDX))) {
-        constMap[AXES_MAPPING_IDX] = true;
-        axesMapping = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXES_MAPPING_IDX))->get_vector<int32_t>();
+
+    if (broadcastType == EXPLICIT) {
+        if (auto axesOp = ov::as_type<op::v0::Constant>(op->get_input_node_ptr(AXES_MAPPING_IDX))) {
+            constMap[AXES_MAPPING_IDX] = true;
+            axesMapping = axesOp->cast_vector<Dim>();
+        }
     }
 }
 
@@ -117,13 +114,29 @@ bool Broadcast::needPrepareParams() const {
 void Broadcast::prepareParams() {
     if (!constMap[TARGET_SHAPE_IDX]) {
         const auto& targetShapeMem = getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory();
-        const int32_t* targetShapeData = reinterpret_cast<const int32_t *>(targetShapeMem.getData());
-        targetShape.assign(targetShapeData, targetShapeData + targetShapeMem.getStaticDims()[0]);
+        if (targetShapeMem.getDataType() == dnnl::memory::data_type::s64) {
+            const auto *targetShapeData = reinterpret_cast<const int64_t *>(targetShapeMem.getData());
+            targetShape.assign(targetShapeData, targetShapeData + targetShapeMem.getStaticDims()[0]);
+        } else if (targetShapeMem.getDataType() == dnnl::memory::data_type::s32) {
+            const auto *targetShapeData = reinterpret_cast<const int32_t *>(targetShapeMem.getData());
+            targetShape.assign(targetShapeData, targetShapeData + targetShapeMem.getStaticDims()[0]);
+        } else {
+            IE_THROW() << errorPrefix << " does not support precision '" << int(targetShapeMem.getDataType())
+                       << "' for the Target shape input.";
+        }
     }
     if (broadcastType == EXPLICIT && !constMap[AXES_MAPPING_IDX]) {
         const auto& axesMapMem = getParentEdgesAtPort(AXES_MAPPING_IDX)[0]->getMemory();
-        const int32_t* axesMapData = reinterpret_cast<const int32_t *>(axesMapMem.getData());
-        axesMapping.assign(axesMapData, axesMapData + axesMapMem.getStaticDims()[0]);
+        if (axesMapMem.getDataType() == dnnl::memory::data_type::s64) {
+            const auto axesMapData = reinterpret_cast<const int64_t *>(axesMapMem.getData());
+            axesMapping.assign(axesMapData, axesMapData + axesMapMem.getStaticDims()[0]);
+        } else if (axesMapMem.getDataType() == dnnl::memory::data_type::s32) {
+            const auto axesMapData = reinterpret_cast<const int32_t *>(axesMapMem.getData());
+            axesMapping.assign(axesMapData, axesMapData + axesMapMem.getStaticDims()[0]);
+        } else {
+            IE_THROW() << errorPrefix << " does not support precision '" << int(axesMapMem.getDataType())
+                       << "' for the Axes mapping input.";
+        }
     }
 
     const auto& srcDims = getParentEdgesAtPort(INPUT_DATA_IDX)[0]->getMemory().getShape().getStaticDims();
@@ -162,22 +175,48 @@ bool Broadcast::needShapeInfer() const {
         if (targetShape.empty()) {
             return true;
         }
-        const int32_t* targetShapeData = reinterpret_cast<const int32_t *>(getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory().getData());
-        for (size_t i = 0lu; i < targetShape.size(); i++) {
-            if (targetShape[i] != targetShapeData[i]) {
-                return true;
+        const auto& targetShapeMem = getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory();
+        if (targetShapeMem.getDataType() == dnnl::memory::data_type::s64) {
+            const auto *targetShapeData = reinterpret_cast<const int64_t *>(targetShapeMem.getData());
+            for (size_t i = 0lu; i < targetShape.size(); i++) {
+                if (targetShape[i] != targetShapeData[i]) {
+                    return true;
+                }
+            }
+        } else if (targetShapeMem.getDataType() == dnnl::memory::data_type::s32) {
+            const auto *targetShapeData = reinterpret_cast<const int32_t *>(targetShapeMem.getData());
+            for (size_t i = 0lu; i < targetShape.size(); i++) {
+                if (targetShape[i] != targetShapeData[i]) {
+                    return true;
+                }
             }
+        } else {
+            IE_THROW() << errorPrefix << " does not support precision '" << int(targetShapeMem.getDataType())
+                       << "' for the Target shape input.";
         }
     }
     if (broadcastType == EXPLICIT && !constMap[AXES_MAPPING_IDX]) {
         if (axesMapping.empty()) {
             return true;
         }
-        const int32_t* axesMappingData = reinterpret_cast<const int32_t *>(getParentEdgesAtPort(AXES_MAPPING_IDX)[0]->getMemory().getData());
-        for (size_t i = 0lu; i < axesMapping.size(); i++) {
-            if (axesMapping[i] != axesMappingData[i]) {
-                return true;
+        const auto& axesMapMem = getParentEdgesAtPort(AXES_MAPPING_IDX)[0]->getMemory();
+        if (axesMapMem.getDataType() == dnnl::memory::data_type::s64) {
+            const auto *axesMappingData = reinterpret_cast<const int64_t *>(axesMapMem.getData());
+            for (size_t i = 0lu; i < axesMapping.size(); i++) {
+                if (axesMapping[i] != axesMappingData[i]) {
+                    return true;
+                }
+            }
+        } else if (axesMapMem.getDataType() == dnnl::memory::data_type::s32) {
+            const auto *axesMappingData = reinterpret_cast<const int32_t *>(axesMapMem.getData());
+            for (size_t i = 0lu; i < axesMapping.size(); i++) {
+                if (axesMapping[i] != axesMappingData[i]) {
+                    return true;
+                }
             }
+        } else {
+            IE_THROW() << errorPrefix << " does not support precision '" << int(axesMapMem.getDataType())
+                       << "' for the Axes mapping input.";
         }
     }
     needPrepareParamsVar = false;
diff --git a/src/plugins/intel_cpu/src/nodes/broadcast.h b/src/plugins/intel_cpu/src/nodes/broadcast.h
index 4ab3201365e05f..34ac289eef2c85 100644
--- a/src/plugins/intel_cpu/src/nodes/broadcast.h
+++ b/src/plugins/intel_cpu/src/nodes/broadcast.h
@@ -6,10 +6,6 @@
 
 #include "common/tile_broadcast_utils.h"
 
-#include <memory>
-#include <string>
-#include <vector>
-
 namespace ov {
 namespace intel_cpu {
 namespace node {
@@ -45,8 +41,8 @@ class Broadcast : public Node, public TileBroadcastCommon {
     static constexpr size_t TARGET_SHAPE_IDX = 1;
     static constexpr size_t AXES_MAPPING_IDX = 2;
 
-    std::vector<int32_t> targetShape;
-    std::vector<int32_t> axesMapping;
+    VectorDims targetShape;
+    VectorDims axesMapping;
 
     std::string errorPrefix;
 };
diff --git a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp
index d8322c709e2288..f4bd26dbfdafd2 100644
--- a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp
+++ b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp
@@ -224,7 +224,12 @@ const std::tuple<U, U> & Range<T, U>::fit(const Precision & prec) {
                 IE_THROW() << "Unsupported precision";
         }
         std::get<0>(_range) = static_cast<U>(std::max(static_cast<double>(std::get<0>(_range)), lbound));
-        std::get<1>(_range) = static_cast<U>(std::min(static_cast<double>(std::get<1>(_range)), ubound));
+
+        auto v1 = static_cast<U>(std::min(static_cast<double>(std::get<1>(_range)), ubound));
+        if (v1 < U(0)) { // WA for convertion double->int64:  9.2233720368547758e+18 -> -9223372036854775808
+            v1 -= U(1);
+        }
+        std::get<1>(_range) = v1;
     } else {
         int64_t lbound;
         uint64_t ubound;
diff --git a/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp b/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp
index e7921c24abd8e0..9f3c031714b4fa 100644
--- a/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp
+++ b/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp
@@ -92,8 +92,12 @@ bool TileBroadcastCommon::canBeExecutedInNSPCLayout(VectorDims srcBlockedDims, V
 
 std::vector<NodeDesc> TileBroadcastCommon::getSupportedConfigs(const Node *node) {
     std::vector<NodeDesc> supportedPrimitiveDescriptors;
-    auto precision = node->getOriginalInputPrecisionAtPort(0);
+    const auto &precision = node->getOriginalInputPrecisionAtPort(0);
     auto dataType = DnnlExtensionUtils::IEPrecisionToDataType(precision);
+    auto secPrecision = node->getOriginalInputPrecisionAtPort(1);
+    if (!one_of(secPrecision, Precision::I32, Precision::I64)) {
+        secPrecision = Precision::I32;
+    }
 
     const auto& srcDims = node->getInputShapeAtPort(0).getDims();
     const auto& inDataShape = node->getInputShapeAtPort(0);
@@ -109,11 +113,15 @@ std::vector<NodeDesc> TileBroadcastCommon::getSupportedConfigs(const Node *node)
     config.inConfs[0].constant(constMap[0]);
     config.inConfs[1].inPlace(-1);
     config.inConfs[1].constant(constMap[1]);
-    config.inConfs[1].setMemDesc(std::make_shared<CpuBlockedMemoryDesc>(Precision::I32, node->getInputShapeAtPort(1)));
+    config.inConfs[1].setMemDesc(std::make_shared<CpuBlockedMemoryDesc>(secPrecision, node->getInputShapeAtPort(1)));
     if (config.inConfs.size() == 3) {
+        auto thrdPrecision = node->getOriginalInputPrecisionAtPort(2);
+        if (!one_of(thrdPrecision, Precision::I32, Precision::I64)) {
+            thrdPrecision = Precision::I32;
+        }
         config.inConfs[2].inPlace(-1);
         config.inConfs[2].constant(constMap[2]);
-        config.inConfs[2].setMemDesc(std::make_shared<CpuBlockedMemoryDesc>(Precision::I32, node->getInputShapeAtPort(2)));
+        config.inConfs[2].setMemDesc(std::make_shared<CpuBlockedMemoryDesc>(thrdPrecision, node->getInputShapeAtPort(2)));
     }
 
     config.outConfs.resize(node->getChildEdges().size());
diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp
index 633f40cea00fa5..7a306f35c82cc6 100644
--- a/src/plugins/intel_cpu/src/nodes/concat.cpp
+++ b/src/plugins/intel_cpu/src/nodes/concat.cpp
@@ -3,26 +3,11 @@
 //
 
 #include "concat.h"
-
-#include <map>
-#include <utility>
-#include <vector>
-#include <dnnl_extension_utils.h>
-
-#include <onednn/dnnl.h>
-#include <onednn/iml_type_mapper.h>
-#include <edge.h>
-#include <cpu_memory.h>
 #include "ie_parallel.hpp"
-#include "conv.h"
-#include "fake_quantize.h"
-#include "pooling.h"
-#include "eltwise.h"
-#include <limits>
 #include "common/cpu_memcpy.h"
-#include "common/blocked_desc_creator.h"
-#include <memory_desc/cpu_memory_desc_utils.h>
+#include <openvino/op/concat.hpp>
 #include <partitioned_mem_mgr.h>
+
 using namespace dnnl;
 using namespace InferenceEngine;
 
@@ -37,10 +22,9 @@ bool Concat::isExecutable() const {
     return !isInPlace() && !hasEmptyOutputTensors();
 }
 
-bool Concat::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool Concat::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        const auto concatOp = ngraph::as_type_ptr<const ngraph::op::v0::Concat>(op);
-        if (!concatOp) {
+        if (op->get_type_info() != op::v0::Concat::get_type_info_static()) {
             errorMessage = "Node is not an instance of the Concat operation.";
             return false;
         }
@@ -50,7 +34,7 @@ bool Concat::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op,
     return true;
 }
 
-Concat::Concat(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
+Concat::Concat(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
         : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
@@ -58,13 +42,13 @@ Concat::Concat(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr
     }
 
     const auto inRank = getInputShapeAtPort(0).getRank();
-    auto concatOp = ngraph::as_type_ptr<ngraph::op::v0::Concat>(op);
+    auto concatOp = ov::as_type_ptr<op::v0::Concat>(op);
     auto axis = concatOp->get_axis();
     if (axis < 0) {
         axis += inRank;
     }
     if (axis >= static_cast<int64_t>(inRank) || axis < 0) {
-        IE_THROW() << "Concat node with name '" << getName() << "' has invalid value of axis parameter: " << axis;
+        THROW_CPU_NODE_ERR << "has invalid value of axis parameter: " << axis;
     }
     this->axis = axis;
 }
@@ -83,7 +67,7 @@ void Concat::getSupportedDescriptors() {
             }
         }
         if (incorrectDims || firstParentDims.size() == 0) {
-            IE_THROW() << "Incorrect input dimensions for concat node " << getName();
+            THROW_CPU_NODE_ERR << " has incorrect input dimensions.";
         }
     }
 
@@ -195,8 +179,14 @@ void Concat::selectOptimalPrimitiveDescriptor() {
     // be replicated. Inplace approach is not applicable
     // for that case.
     for (size_t i = 0; i < getParentEdges().size(); i++) {
+        if (!canBeInPlace) {
+            break;
+        }
         for (size_t j = i + 1; j < getParentEdges().size(); j++) {
-            if (getParentEdgeAt(i) == getParentEdgeAt(j)) canBeInPlace = false;
+            if (getParentEdgeAt(i) == getParentEdgeAt(j)) {
+                canBeInPlace = false;
+                break;
+            }
         }
     }
 
@@ -324,7 +314,7 @@ void Concat::prepareParams() {
         IE_THROW() << "Destination memory didn't allocate.";
     auto dstMemDesc = dstMemPtr->getDescWithType<BlockedMemoryDesc>();
     if (getSelectedPrimitiveDescriptor() == nullptr)
-        IE_THROW() << "Preferable primitive descriptor is not set.";
+        THROW_CPU_NODE_ERR << "does not have preferable primitive descriptor.";
 
     const auto& outputStrides = dstMemDesc->getStrides();
     size_t curConcatOffset = 0;
@@ -348,8 +338,7 @@ void Concat::prepareParams() {
         const auto& srcMemPtr = getParentEdgesAtPort(i)[0]->getMemoryPtr();
         if (!srcMemPtr || !srcMemPtr->isAllocated()) {
             auto parent = getParentEdgeAt(i)->getParent();
-            IE_THROW() << "Source memory from " << parent->getName() << " didn't allocate for node "
-                       << getName() << ".";
+            THROW_CPU_NODE_ERR << "has input '" << parent->getName() << "' with not allocated memory.";
         }
 
         if (canExecRef) {
@@ -413,7 +402,7 @@ size_t Concat::inverseOrder(const SizeVector& order, size_t axis) {
 void Concat::initOptimalPrimitiveDescriptor() {
     auto selected_pd = getSelectedPrimitiveDescriptor();
     if (selected_pd == nullptr)
-        IE_THROW() << "Preferable primitive descriptor is not set.";
+        THROW_CPU_NODE_ERR << "does not have preferable primitive descriptor.";
 
    if (!isInPlace()) {
        Node::initOptimalPrimitiveDescriptor();
diff --git a/src/plugins/intel_cpu/src/nodes/concat.h b/src/plugins/intel_cpu/src/nodes/concat.h
index e9a4c9e764a7b3..1504cb92f38eb2 100644
--- a/src/plugins/intel_cpu/src/nodes/concat.h
+++ b/src/plugins/intel_cpu/src/nodes/concat.h
@@ -4,11 +4,7 @@
 
 #pragma once
 
-#include <ie_common.h>
 #include <node.h>
-#include <string>
-#include <ie_precision.hpp>
-#include <graph_context.h>
 
 namespace ov {
 namespace intel_cpu {
@@ -16,9 +12,9 @@ namespace node {
 
 class Concat : public Node {
 public:
-    Concat(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    Concat(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context);
 
-    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
     void initOptimalPrimitiveDescriptor() override;
diff --git a/src/plugins/intel_cpu/src/nodes/convert.cpp b/src/plugins/intel_cpu/src/nodes/convert.cpp
index 2f3fa0d1b675b1..10b010959c2a29 100644
--- a/src/plugins/intel_cpu/src/nodes/convert.cpp
+++ b/src/plugins/intel_cpu/src/nodes/convert.cpp
@@ -2,12 +2,12 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <dnnl_extension_utils.h>
+// #include <dnnl_extension_utils.h>
 #include "convert.h"
-#include "common/blocked_desc_creator.h"
-#include <ngraph/opsets/opset1.hpp>
+// #include "common/blocked_desc_creator.h"
+#include <openvino/op/convert.hpp>
 #include <ie_ngraph_utils.hpp>
-#include <utils/ngraph_utils.hpp>
+// #include <utils/ngraph_utils.hpp>
 #include <utils/shape_inference/shape_inference_pass_through.hpp>
 
 using namespace dnnl;
@@ -17,10 +17,9 @@ namespace ov {
 namespace intel_cpu {
 namespace node {
 
-bool Convert::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool Convert::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        const auto convert = std::dynamic_pointer_cast<const ngraph::opset1::Convert>(op);
-        if (!convert) {
+        if (op->get_type_info() != op::v0::Convert::get_type_info_static()) {
             errorMessage = "Only opset1 Convert operation is supported";
             return false;
         }
@@ -30,21 +29,19 @@ bool Convert::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op
     return true;
 }
 
-Convert::Convert(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
+Convert::Convert(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context)
         : Node(op, context, PassThroughShapeInferFactory()) {
     std::string errorMessage;
-    if (isSupportedOperation(op, errorMessage)) {
-        errorPrefix = "Convert node with name '" + getName() + "'";
-    } else {
+    if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
     }
 
-    auto convert = ov::as_type_ptr<const ngraph::opset1::Convert>(op);
+    auto convert = ov::as_type_ptr<const op::v0::Convert>(op);
     convertParams.origPrc = details::convertPrecision(convert->get_destination_type());
 }
 
 Convert::Convert(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
-                 const std::string &nodeName, const GraphContext::CPtr context)
+                 const std::string &nodeName, const GraphContext::CPtr& context)
         : Node("Convert", nodeName, context) {
     convertParams.origPrc = outPrc;
     inputShapes.push_back(shape);
@@ -56,8 +53,6 @@ Convert::Convert(const Shape &shape, const InferenceEngine::Precision &inPrc, co
     if (isDynamicNode()) {
         shapeInference = std::make_shared<ShapeInferPassThrough>();
     }
-
-    errorPrefix = "Convert node with name '" + getName() + "'";
 }
 
 void Convert::getSupportedDescriptors() {
@@ -68,9 +63,9 @@ void Convert::getSupportedDescriptors() {
     if (inputShapes.empty())
         inputShapes.push_back(input->getShape());
     if (getParentEdges().size() != 1)
-        IE_THROW() << errorPrefix << " has incorrect number of input edges";
+        THROW_CPU_NODE_ERR << " has incorrect number of input edges";
     if (getChildEdges().empty())
-        IE_THROW() << errorPrefix << " has incorrect number of output edges";
+        THROW_CPU_NODE_ERR << " has incorrect number of output edges";
 }
 
 bool Convert::isSupportedDesc(const MemoryDesc &desc) {
@@ -117,25 +112,25 @@ void Convert::initSupportedPrimitiveDescriptors() {
         config.outConfs.push_back(dataConfigOut);
         supportedPrimitiveDescriptorsBuilder(config);
     } else if (inputShapes.size() == 1 && outputShapes.size() == 1) {
-        const Shape& insShape = getInputShapeAtPort(0);
-        auto insPrecision = getOriginalInputPrecisionAtPort(0);
-        const Shape& outputShape = getOutputShapeAtPort(0);
-        auto outPrecision = getOriginalOutputPrecisionAtPort(0);
+        const auto& inShape = getInputShapeAtPort(0);
+        const auto& inPrecision = getOriginalInputPrecisionAtPort(0);
+        const auto& outputShape = getOutputShapeAtPort(0);
+        const auto& outPrecision = getOriginalOutputPrecisionAtPort(0);
 
         config.inConfs.push_back(dataIn);
         config.outConfs.push_back(dataConfigOut);
 
         auto creators = BlockedDescCreator::getCommonCreators();
-        auto range = BlockedDescCreator::makeFilteredRange(creators, insShape.getRank());
+        auto range = BlockedDescCreator::makeFilteredRange(creators, inShape.getRank());
 
         for (auto itr = range.first; itr != range.second; ++itr) {
-            config.inConfs[0].setMemDesc(std::make_shared<CpuBlockedMemoryDesc>(itr->second->createDesc(insPrecision, insShape)));
+            config.inConfs[0].setMemDesc(std::make_shared<CpuBlockedMemoryDesc>(itr->second->createDesc(inPrecision, inShape)));
             config.outConfs[0].setMemDesc(std::make_shared<CpuBlockedMemoryDesc>(itr->second->createDesc(outPrecision, outputShape)));
 
             supportedPrimitiveDescriptorsBuilder(config);
         }
     } else {
-        IE_THROW() << errorPrefix << " has incorrect number of input/output edges";
+        THROW_CPU_NODE_ERR << " has incorrect number of input/output edges";
     }
 }
 
@@ -165,7 +160,7 @@ void Convert::execute(dnnl::stream strm) {
     const auto childPaddElemCount = childMem.getDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
 
     if (parentPaddElemCount != childPaddElemCount)
-        IE_THROW() << errorPrefix << " has different elements number in input and output buffers";
+        THROW_CPU_NODE_ERR << " has different elements number in input and output buffers";
 
     MemoryCPtr srcMemory = getParentEdgeAt(0)->getMemoryPtr();
     MemoryPtr dstMemory = getChildEdgeAt(0)->getMemoryPtr();
diff --git a/src/plugins/intel_cpu/src/nodes/convert.h b/src/plugins/intel_cpu/src/nodes/convert.h
index 3fd65ebb20a5d6..d8676ec0217fff 100644
--- a/src/plugins/intel_cpu/src/nodes/convert.h
+++ b/src/plugins/intel_cpu/src/nodes/convert.h
@@ -16,9 +16,9 @@ namespace node {
 
 class Convert : public Node {
 public:
-    Convert(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    Convert(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr& context);
     Convert(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
-                      const std::string &nodeName, const GraphContext::CPtr context);
+                      const std::string &nodeName, const GraphContext::CPtr& context);
 
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
@@ -54,8 +54,6 @@ class Convert : public Node {
     ConvertParams convertParams;
     std::shared_ptr<ConvertExecutor> execPtr = nullptr;
     NodeConfig config;
-
-    std::string errorPrefix;
 };
 
 }   // namespace node
diff --git a/src/plugins/intel_cpu/src/nodes/cum_sum.cpp b/src/plugins/intel_cpu/src/nodes/cum_sum.cpp
index 65d3a55ada0cb4..20563df8667e3d 100644
--- a/src/plugins/intel_cpu/src/nodes/cum_sum.cpp
+++ b/src/plugins/intel_cpu/src/nodes/cum_sum.cpp
@@ -2,15 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <string>
-#include <vector>
+#include "cum_sum.h"
 
-#include <ngraph/opsets/opset1.hpp>
-#include <ngraph/opsets/opset3.hpp>
 #include "ie_parallel.hpp"
-#include "ie_precision.hpp"
-#include <ie_ngraph_utils.hpp>
-#include "cum_sum.h"
+#include <openvino/op/cum_sum.hpp>
 #include "utils/bfloat16.hpp"
 
 using namespace InferenceEngine;
@@ -19,10 +14,9 @@ namespace ov {
 namespace intel_cpu {
 namespace node {
 
-bool CumSum::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool CumSum::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        const auto cumsum = std::dynamic_pointer_cast<const ngraph::opset3::CumSum>(op);
-        if (!cumsum) {
+        if (op->get_type_info() != op::v0::CumSum::get_type_info_static()) {
             errorMessage = "Only opset3 CumSum operation is supported";
             return false;
         }
@@ -32,7 +26,7 @@ bool CumSum::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op,
     return true;
 }
 
-CumSum::CumSum(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context) : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) {
+CumSum::CumSum(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context) : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
@@ -49,7 +43,7 @@ CumSum::CumSum(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr
         IE_THROW() << errorPrefix << " doesn't support 'data' input tensor with rank: " << numOfDims;
     }
 
-    const auto cumsum = std::dynamic_pointer_cast<const ngraph::opset3::CumSum>(op);
+    const auto cumsum = ov::as_type_ptr<const op::v0::CumSum>(op);
     if (cumsum == nullptr)
         IE_THROW() << "Operation with name '" << op->get_friendly_name() <<
             "' is not an instance of CumSum from opset3.";
@@ -59,7 +53,7 @@ CumSum::CumSum(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr
 
     if (getOriginalInputsNumber() == numOfInputs) {
         const auto axis_shape = cumsum->get_input_partial_shape(AXIS);
-        if (axis_shape.is_dynamic() || !ngraph::is_scalar(axis_shape.to_shape()))
+        if (axis_shape.is_dynamic() || !ov::is_scalar(axis_shape.to_shape()))
             IE_THROW() << errorPrefix << " doesn't support 'axis' input tensor with non scalar rank";
     }
 
diff --git a/src/plugins/intel_cpu/src/nodes/cum_sum.h b/src/plugins/intel_cpu/src/nodes/cum_sum.h
index eee2da8c085472..961ae5362a15f0 100644
--- a/src/plugins/intel_cpu/src/nodes/cum_sum.h
+++ b/src/plugins/intel_cpu/src/nodes/cum_sum.h
@@ -4,7 +4,6 @@
 
 #pragma once
 
-#include <ie_common.h>
 #include <node.h>
 
 namespace ov {
@@ -13,7 +12,7 @@ namespace node {
 
 class CumSum : public Node {
 public:
-    CumSum(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    CumSum(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context);
 
     void getSupportedDescriptors() override {};
     void initSupportedPrimitiveDescriptors() override;
@@ -23,7 +22,7 @@ class CumSum : public Node {
     bool needPrepareParams() const override;
     void executeDynamicImpl(dnnl::stream strm) override;
 
-    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
 
 private:
     template <typename dataType>
diff --git a/src/plugins/intel_cpu/src/nodes/def_conv.cpp b/src/plugins/intel_cpu/src/nodes/def_conv.cpp
index d8dd6bb1a6b586..d90b0c8ab42e71 100644
--- a/src/plugins/intel_cpu/src/nodes/def_conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/def_conv.cpp
@@ -16,6 +16,7 @@
 #include <dnnl_extension_utils.h>
 #include <cpu/x64/jit_generator.hpp>
 #include <common/dnnl_thread.hpp>
+#include <openvino/op/deformable_convolution.hpp>
 
 using namespace InferenceEngine;
 using namespace dnnl;
@@ -673,8 +674,8 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_
 bool DeformableConvolution::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
     try {
         if (!one_of(op->get_type_info(),
-                ngraph::op::v1::DeformableConvolution::get_type_info_static(),
-                ngraph::op::v8::DeformableConvolution::get_type_info_static())) {
+                op::v1::DeformableConvolution::get_type_info_static(),
+                op::v8::DeformableConvolution::get_type_info_static())) {
             errorMessage = "Node is not an instance of DeformableConvolution form the operation set v1 or v8.";
             return false;
         }
@@ -749,7 +750,7 @@ DeformableConvolution::DeformableConvolution(const std::shared_ptr<ngraph::Node>
         IE_THROW(NotImplemented) << errorMessage;
     }
     errorPrefix = "Deformable convolution with name '" + op->get_friendly_name() + "'";
-    auto defConvNodeBase = std::dynamic_pointer_cast<ngraph::op::util::DeformableConvolutionBase>(op);
+    auto defConvNodeBase = std::dynamic_pointer_cast<op::util::DeformableConvolutionBase>(op);
     if (defConvNodeBase == nullptr)
         IE_THROW() << errorPrefix << " is not an instance of DeformableConvolutionBase.";
 
@@ -769,8 +770,8 @@ DeformableConvolution::DeformableConvolution(const std::shared_ptr<ngraph::Node>
 
     autoPadding = one_of(defConvNodeBase->get_auto_pad(), ov::op::PadType::SAME_UPPER, ov::op::PadType::SAME_LOWER);
 
-    if (op->get_type_info() == ngraph::op::v8::DeformableConvolution::get_type_info_static()) {
-        auto defConvNode = std::dynamic_pointer_cast<ngraph::op::v8::DeformableConvolution>(op);
+    if (op->get_type_info() == op::v8::DeformableConvolution::get_type_info_static()) {
+        auto defConvNode = std::dynamic_pointer_cast<op::v8::DeformableConvolution>(op);
         if (defConvNode == nullptr)
             IE_THROW() << errorPrefix << " is not an instance of DeformableConvolution from opset8.";
         defConvAttr.with_bilinear_pad = defConvNode->get_bilinear_interpolation_pad();
diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
index 1f74c4f70a2c1a..a46ff1fefe63a0 100644
--- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp
+++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
@@ -1,4 +1,3 @@
-
 // Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -10,7 +9,6 @@
 
 #include <ie_parallel.hpp>
 
-#include "cpu_types.h"
 #include "utils/bfloat16.hpp"
 #include "ie_ngraph_utils.hpp"
 #include <cpu/x64/injectors/jit_uni_quantization_injector.hpp>
@@ -18,8 +16,6 @@
 
 #include <onednn/dnnl.h>
 #include <dnnl_extension_utils.h>
-#include "fake_quantize.h"
-#include "pooling.h"
 #include "input.h"
 #include "common/cpu_convert.h"
 
@@ -29,11 +25,9 @@
 #include "emitters/x64/jit_bf16_emitters.hpp"
 #include <selective_build.h>
 #include "utils/general_utils.h"
-#include "utils/cpu_utils.hpp"
 #include <common/primitive_hashing_utils.hpp>
 
-#include "ngraph/ngraph.hpp"
-#include <ngraph/opsets/opset1.hpp>
+#include <openvino/opsets/opset12.hpp>
 #include "transformations/cpu_opset/common/op/power_static.hpp"
 #include "transformations/cpu_opset/common/op/leaky_relu.hpp"
 #include "transformations/cpu_opset/common/op/swish_cpu.hpp"
@@ -43,9 +37,9 @@
 #include <memory>
 #include <algorithm>
 #include <cmath>
-#include <map>
 #include <functional>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
+#include "executors/eltwise_list.hpp"
 
 using namespace InferenceEngine;
 using namespace dnnl::impl::utils;
@@ -73,7 +67,7 @@ struct EltwiseEmitterContext {
     jit_generator *host;
     cpu_isa_t host_isa;
     const Eltwise::EltwiseData& opData;
-    InferenceEngine::Precision exec_prc;
+    Precision exec_prc;
 };
 
 template<typename T>
@@ -137,7 +131,7 @@ InferenceEngine::Precision eltwise_precision_helper::get_precision(const size_t
 
     // for element-wise operations all inputs must to have the same precisions
     auto has_same_precision = [](const std::vector<element::Type>& precisions) {
-        return std::all_of(precisions.begin(), precisions.end(), [&precisions](const element::Type precision) {
+        return std::all_of(precisions.begin(), precisions.end(), [&precisions](const element::Type& precision) {
             return precision == precisions[0];
         });
     };
@@ -165,15 +159,17 @@ InferenceEngine::Precision eltwise_precision_helper::get_precision(const size_t
             element::i16,
             element::bf16,
             element::i32,
+            element::i64,
             element::f32
     };
 
     for (const auto prc : exec_precisions_priority) {
         if (std::any_of(
-            supported_precision_intersection.begin(),
-            supported_precision_intersection.end(),
-            [&prc](const std::vector<element::Type>& precisions) { return std::find(precisions.begin(), precisions.end(), prc) != precisions.end(); })) {
-            exec_prc = InferenceEngine::details::convertPrecision(prc);
+                    supported_precision_intersection.begin(),
+                    supported_precision_intersection.end(),
+                    [&prc](const std::vector<element::Type>& precisions) {
+                        return std::find(precisions.begin(), precisions.end(), prc) != precisions.end(); })) {
+            exec_prc = details::convertPrecision(prc);
             break;
         }
     }
@@ -185,6 +181,24 @@ InferenceEngine::Precision eltwise_precision_helper::get_precision(const size_t
         }
     }
 
+    bool allInpI64 = true;
+    for (size_t i = 0lu; i < inputs_number; i++) {
+        if (src_prc[i] != Precision::I64) {
+            allInpI64 = false;
+            break;
+        }
+    }
+    if (allInpI64) {
+        for (const auto &prcs : supported_precision_intersection) {
+            if (prcs[0] == element::i64) {
+                exec_prc = Precision::I64;
+                break;
+            } else if (prcs[0] == element::f64) {
+                exec_prc = Precision::FP64;
+            }
+        }
+    }
+
     if (exec_prc == Precision::UNSPECIFIED) {
         IE_THROW() << "Eltwise jitter failed to specify execution precision for Eltwise node";
     }
@@ -203,7 +217,7 @@ std::set<std::vector<element::Type>> eltwise_precision_helper::get_supported_pre
         OV_CASE(Algorithm::EltwiseTanh, jit_dnnl_aux_emitter),
         OV_CASE(Algorithm::EltwiseSigmoid, jit_dnnl_aux_emitter),
         OV_CASE(Algorithm::EltwiseAbs, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseSqrt, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseSqrt, jit_sqrt_emitter),
         OV_CASE(Algorithm::EltwiseSoftRelu, jit_dnnl_aux_emitter),
         OV_CASE(Algorithm::EltwiseExp, jit_dnnl_aux_emitter),
         OV_CASE(Algorithm::EltwiseClamp, jit_dnnl_aux_emitter),
@@ -340,12 +354,12 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
 
         mov(reg_post_op_ptrs, ptr[reg_const_params + GET_OFF(post_op_data)]);
 
-        Xbyak::Label unroll_loop_label;
-        Xbyak::Label unroll_loop_end_label;
-        Xbyak::Label main_loop_label;
-        Xbyak::Label main_loop_end_label;
-        Xbyak::Label tail_loop_label;
-        Xbyak::Label tail_loop_end_label;
+        Label unroll_loop_label;
+        Label unroll_loop_end_label;
+        Label main_loop_label;
+        Label main_loop_end_label;
+        Label tail_loop_label;
+        Label tail_loop_end_label;
 
         if (isa == x64::avx512_core)
             vpxord(vmm_zero, vmm_zero, vmm_zero);
@@ -577,7 +591,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
         OV_CASE(Algorithm::EltwiseTanh, jit_dnnl_aux_emitter),
         OV_CASE(Algorithm::EltwiseSigmoid, jit_dnnl_aux_emitter),
         OV_CASE(Algorithm::EltwiseAbs, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseSqrt, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseSqrt, jit_sqrt_emitter),
         OV_CASE(Algorithm::EltwiseSoftRelu, jit_dnnl_aux_emitter),
         OV_CASE(Algorithm::EltwiseExp, jit_dnnl_aux_emitter),
         OV_CASE(Algorithm::EltwiseClamp, jit_dnnl_aux_emitter),
@@ -682,17 +696,40 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
         }
     }
 
-    inline void load_vector(Vmm vmm_src, const Xbyak::Address &op, Precision src_prc, Precision dst_prc, bool broadcast) {
+    inline void load_vector(const Vmm &vmm_src, const Address &op, const Precision &src_prc, const Precision &dst_prc, bool broadcast) {
         Xmm xmm_src = Xmm(vmm_src.getIdx());
+        Ymm ymm_src = Ymm(vmm_src.getIdx());
 
         if (broadcast) {
-            load_scalar(xmm_src, op, src_prc, dst_prc);
-            uni_vbroadcastss(vmm_src, xmm_src);
+                load_scalar(xmm_src, op, src_prc, dst_prc);
+                if (src_prc.size() == 8) {
+                    uni_vbroadcastsd(vmm_src, xmm_src);
+                } else {
+                    uni_vbroadcastss(vmm_src, xmm_src);
+                }
         } else {
             switch (src_prc) {
+                case Precision::I64:
+                    if (dst_prc == Precision::I64 || dst_prc == Precision::I32) {
+                        uni_vmovups(vmm_src, op);
+                    } else if (dst_prc == Precision::FP64) {
+                        if (x64::mayiuse(x64::avx512_core)) {
+                            vcvtqq2pd(vmm_src, op);
+                        } else {
+                            // Do conversion inside the emitter.
+                            uni_vmovups(vmm_src, op);
+                        }
+                    }
+                    break;
                 case Precision::FP32:
+                    if (dst_prc == Precision::FP32) {
+                        uni_vmovups(vmm_src, op);
+                    }
+                    break;
                 case Precision::I32:
-                    uni_vmovups(vmm_src, op);
+                    if (dst_prc == Precision::I32) {
+                        uni_vmovups(vmm_src, op);
+                    }
                     break;
                 case Precision::BF16:
                     vpmovzxwd(vmm_src, op);
@@ -714,29 +751,53 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
                     uni_vpmovzxbd(vmm_src, op);
                     break;
                 default:
-                    assert(!"unknown src_prc");
+                    IE_THROW() << "Unknown src_prc: " << src_prc;
             }
 
             switch (dst_prc) {
                 case Precision::FP32:
-                    if (!src_prc.is_float())
+                    if (src_prc == Precision::I64) {
+                        vcvtqq2ps(ymm_src, op);
+                    } else if (one_of(src_prc, Precision::U8, Precision::I8, Precision::I16, Precision::U16)) {
                         uni_vcvtdq2ps(vmm_src, vmm_src);
+                    } else if (src_prc == Precision::I32) {
+                        uni_vcvtdq2ps(vmm_src, op);
+                    }
                     break;
                 case Precision::I32:
-                    if (src_prc.is_float())
-                        uni_vcvtps2dq(vmm_src, vmm_src);
+                    if (src_prc == Precision::I64) {
+                        vpmovsqd(ymm_src, vmm_src);
+                    } else if (src_prc == Precision::FP32 || src_prc == Precision::BF16 || src_prc == Precision::FP16) {
+                        uni_vcvtps2dq(vmm_src, op);
+                    }
+                    break;
+                case Precision::I64:
+                case Precision::FP64:
                     break;
                 default:
-                    assert(!"unknown dst_prc");
+                    IE_THROW() << "Unsupported destination precision: " << dst_prc;
             }
         }
     }
 
-    inline void load_scalar(Xmm xmm_src, const Xbyak::Address &op, Precision src_prc, Precision dst_prc) {
+    inline void load_scalar(const Xmm &xmm_src, const Address &op, const Precision &src_prc, const Precision &dst_prc, bool broadcast = false) {
+        Address srcAdrBcst(op.getBit(), true, op.getRegExp());
         switch (src_prc) {
+            case Precision::I64:
+                if (dst_prc == Precision::I64) {
+                    uni_vmovsd(xmm_src, op);
+                } else if (dst_prc == Precision::FP64) {
+                    if (x64::mayiuse(x64::avx512_core)) {
+                        vcvtqq2pd(xmm_src, srcAdrBcst);
+                    } else {
+                        // Do conversion inside the emitter.
+                        uni_vmovsd(xmm_src, op);
+                    }
+                }
+                break;
             case Precision::FP32:
             case Precision::I32:
-                uni_vmovss(xmm_src, op);
+                uni_vmovss(xmm_src, op); // TODO: AVX512 uni_vcvtdq2ps with bct
                 break;
             case Precision::BF16:
                 uni_vpinsrw(xmm_src, xmm_src, op, 0);
@@ -762,45 +823,88 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
                 uni_vmovq(xmm_src, reg_tmp_64);
                 break;
             default:
-                assert(!"unknown src_prc");
+                IE_THROW() << "Unkown source precision '" << src_prc << "'";
         }
 
         switch (dst_prc) {
             case Precision::FP32:
-                if (!src_prc.is_float())
+                if (src_prc == Precision::I64) {
+                    vcvtqq2ps(xmm_src, xmm_src);
+                } else if (src_prc != Precision::FP32 && src_prc != Precision::BF16 && src_prc != Precision::FP16) {
                     uni_vcvtdq2ps(xmm_src, xmm_src);
+                }
                 break;
             case Precision::I32:
-                if (src_prc.is_float())
+                if (src_prc == Precision::I64) {
+                    vpmovsqd(xmm_src, xmm_src);
+                } else if (src_prc == Precision::FP32 || src_prc == Precision::BF16 || src_prc == Precision::FP16) {
                     uni_vcvtps2dq(xmm_src, xmm_src);
+                }
+                break;
+            case Precision::I64:
+            case Precision::FP64:
                 break;
             default:
-                assert(!"unknown dst_prc");
+                IE_THROW() << "Unsupported destination precision: " << dst_prc;
         }
     }
 
-    inline void store_vector(const Xbyak::Address &op, Vmm vmm_dst, Precision src_prc, Precision dst_prc) {
+    inline void store_vector(const Address &op, const Vmm &vmm_dst, const Precision &src_prc, const Precision &dst_prc) {
         Xmm xmm_dst = Xmm(vmm_dst.getIdx());
         Ymm ymm_dst = Ymm(vmm_dst.getIdx());
 
         switch (src_prc) {
+            case Precision::FP64:
+                if (dst_prc == Precision::FP32) {
+                    uni_vcvtpd2ps(x64::mayiuse(x64::avx512_core) ? ymm_dst : xmm_dst, vmm_dst);
+                } else if (dst_prc == Precision::I64) {
+                    if (x64::mayiuse(x64::avx512_core)) {
+                        vcvtpd2qq(vmm_dst, vmm_dst);
+                    } else {
+                        // Do conversion inside the emitter.
+                    }
+                } else if (dst_prc == Precision::I32) {
+                    vcvtpd2dq(ymm_dst, vmm_dst);
+                }
+                break;
             case Precision::FP32:
-                if (!dst_prc.is_float())
+                if (dst_prc == Precision::I64) {
+                    vcvtps2qq(vmm_dst, ymm_dst);
+                } else if (dst_prc != Precision::FP32 && dst_prc != Precision::BF16 && dst_prc != Precision::FP16) {
                     uni_vcvtps2dq(vmm_dst, vmm_dst);
+                }
                 break;
             case Precision::I32:
-                if (dst_prc.is_float())
+                if (dst_prc == Precision::FP32 || dst_prc == Precision::BF16 || dst_prc == Precision::FP16)
                     uni_vcvtdq2ps(vmm_dst, vmm_dst);
                 break;
+            case Precision::I64:
+                if (dst_prc == Precision::FP32 || dst_prc == Precision::BF16 || dst_prc == Precision::FP16) {
+                    vcvtqq2ps(ymm_dst, vmm_dst);
+                }
+                break;
             default:
-                assert(!"unknown src_prc");
+                IE_THROW() << "Unsupported source precision: " << src_prc;
         }
 
         switch (dst_prc) {
             case Precision::FP32:
-            case Precision::I32:
+                if (src_prc == Precision::I64) {
+                    uni_vmovups(op, ymm_dst);
+                } else {
+                    uni_vmovups(op, vmm_dst);
+                }
+                break;
+            case Precision::I64:
                 uni_vmovups(op, vmm_dst);
                 break;
+            case Precision::I32:
+                if (src_prc == Precision::I64) {
+                    vpmovsqd(op, vmm_dst);
+                } else {
+                    uni_vmovups(op, vmm_dst);
+                }
+                break;
             case Precision::BF16:
                 uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())}, {static_cast<size_t>(ymm_dst.getIdx())});
                 vmovdqu16(op, ymm_dst);
@@ -837,7 +941,11 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
                 break;
             case Precision::I8:
                 if (isa == x64::avx512_core) {
-                    vpmovsdb(op, vmm_dst);
+                    if (src_prc == Precision::I64) {
+                        vpmovsqb(xmm_dst, vmm_dst);
+                    } else {
+                        vpmovsdb(op, vmm_dst);
+                    }
                 } else {
                     uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst);
                     if (isa != x64::sse41)
@@ -851,8 +959,12 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
                 break;
             case Precision::U8:
                 if (isa == x64::avx512_core) {
-                    vpmaxsd(vmm_dst, vmm_zero, vmm_dst);
-                    vpmovusdb(op, vmm_dst);
+                    if (src_prc == Precision::I64) {
+                        vpmovusqb(xmm_dst, vmm_dst);
+                    } else {
+                        vpmaxsd(vmm_dst, vmm_zero, vmm_dst);
+                        vpmovusdb(op, vmm_dst);
+                    }
                 } else {
                     uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst);
                     if (isa != x64::sse41)
@@ -865,25 +977,52 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
                 }
                 break;
             default:
-                assert(!"unknown dst_prc");
+                IE_THROW() << "Unsupported destination precision: " << dst_prc;
         }
     }
 
-    inline void store_scalar(const Xbyak::Address &op, Xmm xmm_dst, Precision src_prc, Precision dst_prc) {
+    inline void store_scalar(const Address &op, const Xmm &xmm_dst, const Precision &src_prc, const Precision &dst_prc) {
         switch (src_prc) {
+            case Precision::FP64:
+                if (dst_prc == Precision::FP32) {
+                    uni_vcvtpd2ps(xmm_dst, xmm_dst);
+                } else if (dst_prc == Precision::I64) {
+                    if (x64::mayiuse(x64::avx512_core)) {
+                        vcvtpd2qq(xmm_dst, xmm_dst);
+                    } else {
+                        // Do conversion inside the emitter.
+                    }
+                } else if (dst_prc == Precision::I32) {
+                    uni_vcvtpd2dq(xmm_dst, xmm_dst);
+                }
+                break;
             case Precision::FP32:
-                if (!dst_prc.is_float())
+                if (dst_prc == Precision::I64) {
+                    vcvtps2qq(xmm_dst, xmm_dst);
+                } else if (dst_prc != Precision::FP32 && dst_prc != Precision::BF16) {
                     uni_vcvtps2dq(xmm_dst, xmm_dst);
+                }
                 break;
             case Precision::I32:
-                if (dst_prc.is_float())
+                if (dst_prc == Precision::FP32 || dst_prc == Precision::BF16 || dst_prc == Precision::FP16)
                     uni_vcvtdq2ps(xmm_dst, xmm_dst);
                 break;
+            case Precision::I64:
+                if (dst_prc == Precision::FP32 || dst_prc == Precision::BF16 || dst_prc == Precision::FP16) {
+                    vcvtqq2ps(xmm_dst, xmm_dst);
+                } else if (dst_prc == Precision::I32) {
+                    vpmovsqd(xmm_dst, xmm_dst);
+                }
+                break;
             default:
-                assert(!"unknown src_prc");
+                IE_THROW() << "Unsupported source precision: " << src_prc;
         }
 
         switch (dst_prc) {
+            case Precision::FP64:
+            case Precision::I64:
+                uni_vmovsd(op, xmm_dst);
+                break;
             case Precision::FP32:
             case Precision::I32:
                 uni_vmovss(op, xmm_dst);
@@ -914,13 +1053,10 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
                 mov(op, reg_tmp_8);
                 break;
             case Precision::U8:
-                uni_vpackusdw(xmm_dst, xmm_dst, xmm_dst);
-                uni_vpackuswb(xmm_dst, xmm_dst, xmm_dst);
-                movq(reg_tmp_64, xmm_dst);
-                mov(op, reg_tmp_8);
+                uni_vpextrb(op, xmm_dst, 0);
                 break;
             default:
-                assert(!"unknown dst_prc");
+                IE_THROW() << "Unsupported destination precision: " << dst_prc;
         }
     }
 };
@@ -985,9 +1121,9 @@ class EltwiseShapeInferFactory : public ShapeInferFactory {
 
 }   // namespace
 
-Eltwise::BroadcastingPolicy Eltwise::determineBroadcastingPolicy(const std::shared_ptr<ngraph::Node>& op) {
-    const auto const1 = ov::as_type_ptr<ngraph::opset1::Constant>(op->get_input_node_shared_ptr(0));
-    const auto const2 = ov::as_type_ptr<ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1));
+Eltwise::BroadcastingPolicy Eltwise::determineBroadcastingPolicy(const std::shared_ptr<ov::Node>& op) {
+    const auto const1 = ov::as_type_ptr<op::v0::Constant>(op->get_input_node_shared_ptr(0));
+    const auto const2 = ov::as_type_ptr<op::v0::Constant>(op->get_input_node_shared_ptr(1));
     int constPort = -1;
     if (const2) {
         constPort = 1;
@@ -998,48 +1134,48 @@ Eltwise::BroadcastingPolicy Eltwise::determineBroadcastingPolicy(const std::shar
     }
 
     auto const_shape = op->get_input_shape(constPort);
-    if (ngraph::shape_size(const_shape) == 1)
+    if (ov::shape_size(const_shape) == 1)
         return PerTensor;
     else
         return PerChannel;
 }
 
-const std::map<const ngraph::DiscreteTypeInfo, Eltwise::Initializer> Eltwise::initializers = {
-    {ngraph::op::v1::Add::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+const std::map<const ov::DiscreteTypeInfo, Eltwise::Initializer> Eltwise::initializers = {
+    {op::v1::Add::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseAdd;
         node.broadcastingPolicy = determineBroadcastingPolicy(op);
     }},
-    {ngraph::op::v1::Subtract::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::Subtract::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseSubtract;
         node.broadcastingPolicy = determineBroadcastingPolicy(op);
     }},
-    {ngraph::op::v1::Multiply::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::Multiply::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseMultiply;
         node.broadcastingPolicy = determineBroadcastingPolicy(op);
     }},
-    {ngraph::op::v1::Divide::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::Divide::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseDivide;
         node.broadcastingPolicy = determineBroadcastingPolicy(op);
     }},
-    {ngraph::op::v0::SquaredDifference::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v0::SquaredDifference::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseSquaredDifference;
     }},
-    {ngraph::op::v1::Maximum::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::Maximum::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseMaximum;
     }},
-    {ngraph::op::v1::Minimum::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::Minimum::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseMinimum;
     }},
-    {ngraph::op::v1::Mod::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::Mod::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseMod;
     }},
-    {ngraph::op::v1::FloorMod::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::FloorMod::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseFloorMod;
     }},
-    {ngraph::op::v1::Power::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::Power::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwisePowerDynamic;
     }},
-    {PowerStaticNode::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {PowerStaticNode::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         auto powerStatic = getNgraphOpAs<PowerStaticNode>(op);
         node.algorithm = Algorithm::EltwisePowerStatic;
         node.alpha = powerStatic->get_power();
@@ -1047,100 +1183,100 @@ const std::map<const ngraph::DiscreteTypeInfo, Eltwise::Initializer> Eltwise::in
         node.gamma = powerStatic->get_shift();
         node.broadcastingPolicy = PerTensor;
     }},
-    {ngraph::op::v1::Equal::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::Equal::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseEqual;
     }},
-    {ngraph::op::v1::NotEqual::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::NotEqual::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseNotEqual;
     }},
-    {ov::op::v10::IsFinite::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
+    {op::v10::IsFinite::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseIsFinite;
     }},
-    {ov::op::v10::IsInf::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
+    {op::v10::IsInf::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseIsInf;
-        const auto& attributes = ov::as_type_ptr<ov::op::v10::IsInf>(op)->get_attributes();
+        const auto& attributes = ov::as_type_ptr<op::v10::IsInf>(op)->get_attributes();
         node.alpha = attributes.detect_negative;
         node.beta  = attributes.detect_positive;
     }},
-    {ov::op::v10::IsNaN::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
+    {op::v10::IsNaN::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseIsNaN;
     }},
-    {ngraph::op::v1::Greater::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::Greater::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseGreater;
     }},
-    {ngraph::op::v1::GreaterEqual::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::GreaterEqual::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseGreaterEqual;
     }},
-    {ngraph::op::v1::Less::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::Less::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseLess;
     }},
-    {ngraph::op::v1::LessEqual::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::LessEqual::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseLessEqual;
     }},
-    {ngraph::op::v1::LogicalAnd::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::LogicalAnd::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseLogicalAnd;
     }},
-    {ngraph::op::v1::LogicalOr::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::LogicalOr::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseLogicalOr;
     }},
-    {ngraph::op::v1::LogicalXor::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::LogicalXor::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseLogicalXor;
     }},
-    {ngraph::op::v1::LogicalNot::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::LogicalNot::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseLogicalNot;
     }},
-    {ngraph::op::v0::Relu::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v0::Relu::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseRelu;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_relu;
     }},
-    {LeakyReluNode::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {LeakyReluNode::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         auto leakyRelu = getNgraphOpAs<LeakyReluNode>(op);
         node.algorithm = Algorithm::EltwiseRelu;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_relu;
         node.alpha = leakyRelu->get_slope();
         node.beta = 0.0f;
     }},
-    {ngraph::op::v0::Gelu::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v0::Gelu::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseGeluErf;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_gelu_erf;
     }},
-    {ngraph::op::v7::Gelu::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
-        auto gelu = getNgraphOpAs<ngraph::op::v7::Gelu>(op);
-        ngraph::op::GeluApproximationMode approximationMode = gelu->get_approximation_mode();
-        if (approximationMode == ngraph::op::GeluApproximationMode::ERF) {
+    {op::v7::Gelu::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
+        auto gelu = getNgraphOpAs<op::v7::Gelu>(op);
+        op::GeluApproximationMode approximationMode = gelu->get_approximation_mode();
+        if (approximationMode == op::GeluApproximationMode::ERF) {
             node.algorithm = Algorithm::EltwiseGeluErf;
             node.onednnAlgorithm = dnnl::algorithm::eltwise_gelu_erf;
-        } else if (approximationMode == ngraph::op::GeluApproximationMode::TANH) {
+        } else if (approximationMode == op::GeluApproximationMode::TANH) {
             node.algorithm = Algorithm::EltwiseGeluTanh;
             node.onednnAlgorithm = dnnl::algorithm::eltwise_gelu_tanh;
         } else {
             IE_THROW(NotImplemented) << "CPU Eltwise node doesn't support ngraph operation Gelu with approximation mode: " << approximationMode;
         }
     }},
-    {ngraph::op::v0::Elu::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
-        auto eluOp = getNgraphOpAs<ngraph::op::v0::Elu>(op);
+    {op::v0::Elu::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
+        auto eluOp = getNgraphOpAs<op::v0::Elu>(op);
         node.alpha = static_cast<float>(eluOp->get_alpha());
         node.algorithm = Algorithm::EltwiseElu;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_elu;
     }},
-    {ngraph::op::v0::Tanh::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v0::Tanh::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseTanh;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_tanh;
     }},
-    {ngraph::op::v0::Sigmoid::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v0::Sigmoid::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseSigmoid;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_logistic;
     }},
-    {ngraph::op::v0::Abs::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v0::Abs::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseAbs;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_abs;
     }},
-    {ngraph::op::v0::Sqrt::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v0::Sqrt::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseSqrt;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_sqrt;
     }},
-    {ngraph::op::v0::Clamp::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
-        auto clampOp = getNgraphOpAs<ngraph::op::v0::Clamp>(op);
+    {op::v0::Clamp::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
+        auto clampOp = getNgraphOpAs<op::v0::Clamp>(op);
 
         float alpha_ = static_cast<float>(clampOp->get_min());
         float beta_ = static_cast<float>(clampOp->get_max());
@@ -1154,64 +1290,64 @@ const std::map<const ngraph::DiscreteTypeInfo, Eltwise::Initializer> Eltwise::in
         node.algorithm = Algorithm::EltwiseClamp;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_clip;
     }},
-    {ngraph::op::v0::Exp::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v0::Exp::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseExp;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_exp;
     }},
-    {SwishNode::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {SwishNode::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         auto swishOp = getNgraphOpAs<SwishNode>(op);
         node.algorithm = Algorithm::EltwiseSwish;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_swish;
         node.alpha = swishOp->get_alpha();
     }},
-    {ngraph::op::v4::HSwish::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v4::HSwish::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         // since v3.0 version, oneDNN has flexible implementation of hardswish, ov still uses the one with hardcoded alpha and beta
         node.alpha = 1.f / 6.f;
         node.beta = 0.5f;
         node.algorithm = Algorithm::EltwiseHswish;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_hardswish;
     }},
-    {ngraph::op::v4::Mish::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v4::Mish::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseMish;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_mish;
     }},
-    {ngraph::op::v5::HSigmoid::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v5::HSigmoid::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseHsigmoid;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_hsigmoid;
     }},
-    {ngraph::op::v5::Round::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
-        auto roundOp = getNgraphOpAs<ngraph::op::v5::Round>(op);
+    {op::v5::Round::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
+        auto roundOp = getNgraphOpAs<op::v5::Round>(op);
 
         switch (roundOp->get_mode()) {
-            case ngraph::op::v5::Round::RoundMode::HALF_TO_EVEN:
+            case op::v5::Round::RoundMode::HALF_TO_EVEN:
                 node.algorithm = Algorithm::EltwiseRoundHalfToEven;
                 node.onednnAlgorithm = dnnl::algorithm::eltwise_round_half_to_even;
                 break;
-            case ngraph::op::v5::Round::RoundMode::HALF_AWAY_FROM_ZERO:
+            case op::v5::Round::RoundMode::HALF_AWAY_FROM_ZERO:
                 node.algorithm = Algorithm::EltwiseRoundHalfAwayFromZero;
                 node.onednnAlgorithm = dnnl::algorithm::eltwise_round_half_away_from_zero;
                 break;
         }
     }},
-    {ngraph::op::v0::PRelu::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v0::PRelu::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwisePrelu;
         node.broadcastingPolicy = determineBroadcastingPolicy(op);
     }},
-    {ngraph::op::v0::Erf::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v0::Erf::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseErf;
     }},
-    {ngraph::op::v4::SoftPlus::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v4::SoftPlus::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseSoftRelu;
         node.alpha = 1.f;
         node.onednnAlgorithm = dnnl::algorithm::eltwise_soft_relu;
     }},
-    {ngraph::op::v9::SoftSign::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v9::SoftSign::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseSoftSign;
     }},
-    {ngraph::op::v1::Select::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v1::Select::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseSelect;
     }},
-    {ngraph::op::v0::Log::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
+    {op::v0::Log::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Eltwise& node) {
         node.algorithm = Algorithm::EltwiseLog;
     }},
 };
@@ -1224,8 +1360,8 @@ struct EltwiseKey {
     VectorDims outBlkDims;
     VectorDims outOrder;
     std::vector<VectorDims> inpDims;
-    std::vector<InferenceEngine::Precision> inpPrc;
-    InferenceEngine::Precision outPrc;
+    std::vector<Precision> inpPrc;
+    Precision outPrc;
     dnnl::post_ops postOps;
     EltwiseImplType implType;
 
@@ -1323,8 +1459,8 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor {
                        const VectorDims& outBlkDims,
                        const VectorDims& outOrder,
                        std::vector<VectorDims> inpDims,
-                       const std::vector<InferenceEngine::Precision>& inpPrc,
-                       const InferenceEngine::Precision& outPrc,
+                       const std::vector<Precision>& inpPrc,
+                       const Precision& outPrc,
                        const dnnl::post_ops& post_ops,
                        bool useRuntimePtrs) {
         auto collapseLastDims = [](std::vector<size_t>& dims, int dimsToCollapse) {
@@ -1544,6 +1680,7 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor {
                                args.indexes[3] = i3;
                                args.indexes[4] = i4;
 
+
                                (*_pKernel)(&args_ptrs, &args);
                            });
         } else {
@@ -1834,23 +1971,23 @@ static Eltwise::executorPtr buildExecutor(const EltwiseKey& key) {
     return execPtr;
 }
 
-bool Eltwise::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool Eltwise::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
         if (initializers.find(op->get_type_info()) == initializers.end()) {
             errorMessage = "Doesn't support Eltwise algorithm: " +  std::string(op->get_type_name());
             return false;
         }
-        if (const auto binOp = ov::as_type_ptr<const ov::op::util::BinaryElementwiseArithmetic>(op)) {
-            if (binOp->get_autob().m_type != ngraph::op::AutoBroadcastType::NONE &&
-                binOp->get_autob().m_type != ngraph::op::AutoBroadcastType::NUMPY) {
-                errorMessage = "Doesn't support broadcast type: " + ngraph::as_string(binOp->get_autob().m_type);
+        if (const auto binOp = ov::as_type_ptr<const op::util::BinaryElementwiseArithmetic>(op)) {
+            if (binOp->get_autob().m_type != op::AutoBroadcastType::NONE &&
+                binOp->get_autob().m_type != op::AutoBroadcastType::NUMPY) {
+                errorMessage = "Doesn't support broadcast type: " + ov::as_string(binOp->get_autob().m_type);
                 return false;
             }
         }
-        if (const auto select = ov::as_type_ptr<const ov::op::v1::Select>(op)) {
-            if (select->get_auto_broadcast().m_type != ngraph::op::AutoBroadcastType::NONE &&
-                select->get_auto_broadcast().m_type != ngraph::op::AutoBroadcastType::NUMPY) {
-                errorMessage = "Doesn't support broadcast type: " + ngraph::as_string(select->get_autob().m_type);
+        if (const auto select = ov::as_type_ptr<const op::v1::Select>(op)) {
+            if (select->get_auto_broadcast().m_type != op::AutoBroadcastType::NONE &&
+                select->get_auto_broadcast().m_type != op::AutoBroadcastType::NUMPY) {
+                errorMessage = "Doesn't support broadcast type: " + ov::as_string(select->get_autob().m_type);
                 return false;
             }
         }
@@ -1860,8 +1997,8 @@ bool Eltwise::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op
     return true;
 }
 
-Eltwise::Eltwise(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context) :
-    Node(op, context, EltwiseShapeInferFactory()), broadcastingPolicy(Undefined) {
+Eltwise::Eltwise(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context) :
+        Node(op, context, EltwiseShapeInferFactory()), broadcastingPolicy(Undefined) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
@@ -1953,7 +2090,8 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
             Precision::I16,
             Precision::BF16,
             Precision::FP16,
-            Precision::I32
+            Precision::I32,
+            Precision::I64
     };
 
     if (!supportedPrimitiveDescriptors.empty())
@@ -1984,7 +2122,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
         IE_THROW() << "Eltwise node with name `" << getName() << "` has invalid input number of inputs: expected = " << expectedInputsNum
                            << " (actual = " << getParentEdges().size() << ")";
 
-    std::vector<InferenceEngine::Precision> inputPrecisions;
+    std::vector<Precision> inputPrecisions;
     for (const auto &prec : getOriginalInputPrecisions()) {
         inputPrecisions.push_back(prec);
     }
@@ -2006,7 +2144,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
     if (inputPrecisions.size() != getParentEdges().size())
         IE_THROW() << "Eltwise node with name `" << getName() << "` has invalid input precisions configuration.";
 
-    InferenceEngine::Precision outputPrecision = getOriginalOutputPrecisionAtPort(0);
+    Precision outputPrecision = getOriginalOutputPrecisionAtPort(0);
     if (!fusedWith.empty()) {
         outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);
     }
@@ -2025,8 +2163,10 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
         if (implType == EltwiseImplType::reference) {
             return Precision(Precision::FP32);
         } else if (std::find(supportedPrecisions.begin(), supportedPrecisions.end(), prc) == supportedPrecisions.end()) {
-            if (prc == Precision::U32 || prc == Precision::I64 || prc == Precision::U64) {
+            if (prc == Precision::U32) {
                 return Precision(Precision::I32);
+            } else if (prc == Precision::U64) {
+                return Precision(Precision::I64);
             } else {
                 IE_THROW() << "Eltwise node with name `" << getName() << "` doesn't support " << prc << " precision.";
             }
@@ -2746,8 +2886,8 @@ bool Eltwise::canFuse(const NodePtr& node) const {
     return false;
 }
 
-InferenceEngine::Precision Eltwise::getRuntimePrecision() const {
-    std::vector<InferenceEngine::Precision> inputPrecisions;
+Precision Eltwise::getRuntimePrecision() const {
+    std::vector<Precision> inputPrecisions;
     // Don't take bias precision into account
     for (size_t i = 0; i < getParentEdges().size(); i++) {
         auto parentEdge = getParentEdgeAt(i);
diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.h b/src/plugins/intel_cpu/src/nodes/eltwise.h
index ec3ff99ea545a7..3aa73c56f04c01 100644
--- a/src/plugins/intel_cpu/src/nodes/eltwise.h
+++ b/src/plugins/intel_cpu/src/nodes/eltwise.h
@@ -4,12 +4,7 @@
 
 #pragma once
 
-#include <ie_common.h>
 #include <node.h>
-#include <string>
-#include <vector>
-#include <memory>
-#include <caseless.hpp>
 #include "executors/eltwise_list.hpp"
 
 namespace ov {
@@ -103,7 +98,7 @@ class Eltwise : public Node {
     using executorPtr = std::shared_ptr<IEltwiseExecutor>;
 
 public:
-    Eltwise(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    Eltwise(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr& context);
 
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
diff --git a/src/plugins/intel_cpu/src/nodes/executors/common/ref_opt_transpose.cpp b/src/plugins/intel_cpu/src/nodes/executors/common/ref_opt_transpose.cpp
index 71997a495d50e0..ea196005021bb3 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/common/ref_opt_transpose.cpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/common/ref_opt_transpose.cpp
@@ -124,7 +124,8 @@ void RefOptimizedTransposeExecutor::exec(const std::vector<MemoryCPtr>& src, con
     OV_SWITCH(intel_cpu, TransposeOptimizedEmitter, ctx, dataSize,
               OV_CASE(1u, InferenceEngine::PrecisionTrait<InferenceEngine::Precision::U8>::value_type),
               OV_CASE(2u, InferenceEngine::PrecisionTrait<InferenceEngine::Precision::U16>::value_type),
-              OV_CASE(4u, InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type));
+              OV_CASE(4u, InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type),
+              OV_CASE(8u, InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I64>::value_type));
 }
 
 bool RefOptimizedTransposeExecutor::init(const TransposeParams &transposeParams,
diff --git a/src/plugins/intel_cpu/src/nodes/eye.cpp b/src/plugins/intel_cpu/src/nodes/eye.cpp
index 747e89bdc1ed11..6f7ce1f8e92ea8 100644
--- a/src/plugins/intel_cpu/src/nodes/eye.cpp
+++ b/src/plugins/intel_cpu/src/nodes/eye.cpp
@@ -7,6 +7,7 @@
 #include <utils/bfloat16.hpp>
 #include <ie_parallel.hpp>
 #include <utils/shape_inference/shape_inference_ngraph.hpp>
+#include <openvino/op/eye.hpp>
 
 #define THROW_ERROR IE_THROW() << NameFromType(getType()) << " node with name '" << getName() << "' "
 
@@ -20,7 +21,7 @@ using namespace InferenceEngine::details;
 
 bool Eye::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        if (op->get_type_info() != ngraph::op::v9::Eye::get_type_info_static()) {
+        if (op->get_type_info() != op::v9::Eye::get_type_info_static()) {
             errorMessage = "Node is not an instance of Eye form the operation set v9.";
             return false;
         }
diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp
index 06314ca17c6f5e..f6f86114ecc0de 100644
--- a/src/plugins/intel_cpu/src/nodes/gather.cpp
+++ b/src/plugins/intel_cpu/src/nodes/gather.cpp
@@ -2,23 +2,18 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <string>
-#include <vector>
-
-#include "ie_parallel.hpp"
 #include "gather.h"
-#include <ngraph/opsets/opset1.hpp>
+
 #include "common/cpu_memcpy.h"
-#include <utils/general_utils.h>
+#include "ie_parallel.hpp"
 #include "kernels/x64/gather_uni_kernel.hpp"
-#include "utils/shape_inference/shape_inference_cpu.hpp"
+#include <openvino/op/constant.hpp>
+#include <openvino/op/gather.hpp>
 #include <partitioned_mem_mgr.h>
 
 using namespace InferenceEngine;
 using namespace dnnl::impl::cpu;
 
-#define THROW_ERROR IE_THROW() << getTypeStr() << " node with name '" << getName() << "' "
-
 namespace ov {
 namespace intel_cpu {
 namespace node {
@@ -26,13 +21,13 @@ namespace node {
 bool Gather::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
         if (!one_of(op->get_type_info(),
-                ov::op::v7::Gather::get_type_info_static(),
-                ov::op::v8::Gather::get_type_info_static())) {
+                op::v7::Gather::get_type_info_static(),
+                op::v8::Gather::get_type_info_static())) {
             errorMessage = "Not supported Gather operation version. CPU plug-in supports only 7 and 8 versions.";
             return false;
         }
 
-        if (!isDynamicNgraphNode(op) && !ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(GATHER_AXIS))) {
+        if (!isDynamicNgraphNode(op) && !ov::is_type<op::v0::Constant>(op->get_input_node_ptr(GATHER_AXIS))) {
             errorMessage = "Only Constant operation on 'axis' input is supported for static node.";
             return false;
         }
@@ -58,11 +53,15 @@ class GatherShapeInfer : public ShapeInferEmptyPads {
         const auto& indices_shape = m_isIndicesScalar ? VectorDims{} : input_shapes[GATHER_INDICES].get();
 
         if (!m_isAxisInputConst) {
-            if (data_dependency.at(GATHER_AXIS)->getDesc().getPrecision() != Precision::I32) {
+            auto axPrc = data_dependency.at(GATHER_AXIS)->getDesc().getPrecision();
+            if (axPrc == Precision::I32) {
+                m_axis = reinterpret_cast<const int32_t *>(data_dependency.at(GATHER_AXIS)->getData())[0];
+            } else if (axPrc == Precision::I64) {
+                m_axis = reinterpret_cast<const int64_t *>(data_dependency.at(GATHER_AXIS)->getData())[0];
+            } else {
                 IE_THROW() << "Unsupported precision " << data_dependency.at(GATHER_AXIS)->getDesc().getPrecision()
                            << " for axis tensor.";
             }
-            m_axis = reinterpret_cast<const int32_t *>(data_dependency.at(GATHER_AXIS)->getData())[0];
         }
 
         if (m_axis < 0)
@@ -85,7 +84,7 @@ class GatherShapeInfer : public ShapeInferEmptyPads {
 private:
     bool m_isAxisInputConst = false;
     bool m_isIndicesScalar = false;
-    int m_axis = 0;
+    int64_t m_axis = 0;
     int m_batchDims = 0;
 };
 
@@ -95,15 +94,15 @@ class GatherShapeInferFactory : public ShapeInferFactory {
     ShapeInferPtr makeShapeInfer() const override {
         static constexpr size_t GATHER_INDICES = 1, GATHER_AXIS = 2;
 
-        bool isAxisInputConst = ov::is_type<ov::op::v0::Constant>(m_op->get_input_node_ptr(GATHER_AXIS));
+        bool isAxisInputConst = ov::is_type<op::v0::Constant>(m_op->get_input_node_ptr(GATHER_AXIS));
         const auto& indicesShape = m_op->get_input_partial_shape(GATHER_INDICES);
         if (!indicesShape.rank().is_static())
             IE_THROW() << "indicesShape do not support dynamic rank.";
         bool isIndicesScalar = indicesShape.rank().get_length() == 0;
 
-        int axis = isAxisInputConst ? ov::as_type<ov::op::v0::Constant>(m_op->get_input_node_ptr(GATHER_AXIS))->cast_vector<int>()[0] : 0;
-        int batchDims = ov::is_type<ov::op::v8::Gather>(m_op) ? static_cast<int>(ov::as_type_ptr<ov::op::v8::Gather>(m_op)->get_batch_dims()) : (
-                        ov::is_type<ov::op::v7::Gather>(m_op) ? static_cast<int>(ov::as_type_ptr<ov::op::v7::Gather>(m_op)->get_batch_dims()) : 0);
+        int axis = isAxisInputConst ? ov::as_type<op::v0::Constant>(m_op->get_input_node_ptr(GATHER_AXIS))->cast_vector<int>()[0] : 0;
+        int batchDims = ov::is_type<op::v8::Gather>(m_op) ? static_cast<int>(ov::as_type_ptr<op::v8::Gather>(m_op)->get_batch_dims()) : (
+                ov::is_type<op::v7::Gather>(m_op) ? static_cast<int>(ov::as_type_ptr<op::v7::Gather>(m_op)->get_batch_dims()) : 0);
 
         return std::make_shared<GatherShapeInfer>(isAxisInputConst, isIndicesScalar, axis, batchDims);
     }
@@ -114,15 +113,14 @@ class GatherShapeInferFactory : public ShapeInferFactory {
 } // namespace
 
 Gather::Gather(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
-    : Node(op, context, GatherShapeInferFactory(op)),
-      batchDims(0) {
+        : Node(op, context, GatherShapeInferFactory(op)), batchDims(0) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
     }
 
     if (op->get_input_size() != 3 || op->get_output_size() != 1)
-        THROW_ERROR << "has incorrect number of input/output edges!";
+        THROW_CPU_NODE_ERR << "has incorrect number of input/output edges!";
 
     const auto& dataShape = getInputShapeAtPort(GATHER_DATA);
     isDataShapeStat = dataShape.isStatic();
@@ -132,10 +130,10 @@ Gather::Gather(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr con
     isIdxShapeStat = idxShape.isStatic();
     const auto indicesRank = idxShape.getRank();
     if (dataSrcRank == 0lu || indicesRank == 0lu)
-        THROW_ERROR << "has incorrect input parameters ranks.";
+        THROW_CPU_NODE_ERR << "has incorrect input parameters ranks.";
 
-    if (ov::is_type<ov::op::v8::Gather>(op)) {
-        batchDims = static_cast<int>(ov::as_type_ptr<ov::op::v8::Gather>(op)->get_batch_dims());
+    if (ov::is_type<op::v8::Gather>(op)) {
+        batchDims = static_cast<int>(ov::as_type_ptr<op::v8::Gather>(op)->get_batch_dims());
         // WA for NMS->Gather construction. NMS fills part of the output blob by the -1 if these values
         // must not be taken into account. There is appropriate pass that looks for such subgraphs
         // and sets the dontReverseIndices flag.
@@ -145,23 +143,23 @@ Gather::Gather(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr con
             reverseIndexing = true;
         else
             reverseIndexing = false;
-    } else if (ov::is_type<ov::op::v7::Gather>(op)) {
-        batchDims = static_cast<int>(ov::as_type_ptr<ov::op::v7::Gather>(op)->get_batch_dims());
+    } else if (ov::is_type<op::v7::Gather>(op)) {
+        batchDims = static_cast<int>(ov::as_type_ptr<op::v7::Gather>(op)->get_batch_dims());
         reverseIndexing = false;
     }
 
     if (batchDims < 0)
         batchDims += indicesRank;
     if (batchDims < 0 || batchDims > std::min(static_cast<int>(dataSrcRank), static_cast<int>(indicesRank)))
-        THROW_ERROR << "has incorrect batch_dims " << batchDims << "!";
+        THROW_CPU_NODE_ERR << "has incorrect batch_dims " << batchDims << "!";
 
-    if (ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(GATHER_AXIS))) {
+    if (ov::is_type<op::v0::Constant>(op->get_input_node_ptr(GATHER_AXIS))) {
         isAxisInputConst = true;
-        axis = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(GATHER_AXIS))->cast_vector<int>()[0];
+        axis = ov::as_type<op::v0::Constant>(op->get_input_node_ptr(GATHER_AXIS))->cast_vector<int>()[0];
         if (axis < 0)
             axis += dataSrcRank;
         if (axis < 0 || axis >= dataSrcRank || batchDims > axis)
-            THROW_ERROR << "has incorrect input parameter axis value: " << axis;
+            THROW_CPU_NODE_ERR << "has incorrect input parameter axis value: " << axis;
     }
 
     if (auto indices = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(GATHER_INDICES))) {
@@ -173,7 +171,17 @@ void Gather::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    dataTypeSize = getOriginalInputPrecisionAtPort(GATHER_DATA).size();
+    const auto &dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DATA);
+    dataTypeSize = dataPrecision.size();
+    idxPrecision = getOriginalInputPrecisionAtPort(GATHER_INDICES);
+    if (!one_of(idxPrecision, Precision::I32, Precision::I64)) {
+        idxPrecision = Precision::I32;
+    }
+    idxTypeSize = idxPrecision.size();
+    auto axisPrecision = getOriginalInputPrecisionAtPort(GATHER_AXIS);
+    if (!one_of(axisPrecision, Precision::I32, Precision::I64)) {
+        axisPrecision = Precision::I32;
+    }
 
     const auto& dataDims = getInputShapeAtPort(GATHER_DATA).getDims();
     if (isAxisInputConst && isDataShapeStat) {
@@ -200,10 +208,9 @@ void Gather::initSupportedPrimitiveDescriptors() {
     }
 
     // Implementation desc type will be redefined in the fn prepareParams if a kernel will be created.
-    Precision dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DATA);
     addSupportedPrimDesc({{LayoutType::ncsp, dataPrecision},
-                          {LayoutType::ncsp, Precision::I32},
-                          {LayoutType::ncsp, Precision::I32, isAxisInputConst}},
+                          {LayoutType::ncsp, idxPrecision},
+                          {LayoutType::ncsp, axisPrecision, isAxisInputConst}},
                          {{LayoutType::ncsp, dataPrecision}},
                          ref_any);
 
@@ -232,10 +239,10 @@ void Gather::createPrimitive() {
     uint64_t idxElPerVec = 1;
     if (!isDynamicNode()) {
         idxElPerVec = x64::mayiuse(x64::avx512_core) ? x64::cpu_isa_traits<x64::avx512_core>::vlen / idxTypeSize :
-            x64::mayiuse(x64::avx2) ? x64::cpu_isa_traits<x64::avx2>::vlen / idxTypeSize : 1;
+                      x64::mayiuse(x64::avx2) ? x64::cpu_isa_traits<x64::avx2>::vlen / idxTypeSize : 1;
     }
     // Gather instruction is not supported by SSE.
-    if ((x64::mayiuse(x64::avx512_core) || x64::mayiuse(x64::avx2)) &&
+    if ((x64::mayiuse(x64::avx512_core) || x64::mayiuse(x64::avx2)) && dataTypeSize <= 4 && idxTypeSize == 4 &&
             (isDynamicNode() || afterAxisSize == 1 || (afterAxisSize <= idxElPerVec &&
             (x64::mayiuse(x64::avx512_core) || (x64::mayiuse(x64::avx2) && dataTypeSize == 4))))) {
         jGatherConfParams jcp;
@@ -298,31 +305,44 @@ void Gather::createPrimitive() {
 }
 
 bool Gather::needPrepareParams() const {
-    if (isInPlace()) {
-        return false;
+    if (inputShapesModified()) {
+        return true;
+    } else if (!isAxisInputConst) {
+        auto mem = getParentEdgeAt(GATHER_AXIS)->getMemoryPtr();
+        int64_t newAxis = axis;
+        if (mem->getDesc().getPrecision() == Precision::I64) {
+            newAxis = (reinterpret_cast<const int64_t*>(mem->getData()))[0];
+        } else if (mem->getDesc().getPrecision() == Precision::I32) {
+            newAxis = (reinterpret_cast<const int32_t*>(mem->getData()))[0];
+        }
+        if (newAxis != axis) {
+            return true;
+        }
     }
-    bool result = inputShapesModified();
-    if (!isAxisInputConst)
-        result = result || axis != (reinterpret_cast<const int32_t*>(getParentEdgeAt(GATHER_AXIS)->getMemoryPtr()->getData()))[0];
-    return result;
+    return false;
 }
 
 void Gather::prepareParams() {
     auto dataMemPtr = getParentEdgeAt(GATHER_DATA)->getMemoryPtr();
     if (!dataMemPtr || !dataMemPtr->isAllocated())
-        THROW_ERROR << " has not allocated input data memory.";
+        THROW_CPU_NODE_ERR << " has not allocated input data memory.";
     auto idxMemPtr = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr();
     if (!idxMemPtr || !idxMemPtr->isAllocated())
-        THROW_ERROR << " has not allocated input indices memory.";
+        THROW_CPU_NODE_ERR << " has not allocated input indices memory.";
     if (getSelectedPrimitiveDescriptor() == nullptr)
-        THROW_ERROR << " has unidentified preferable primitive descriptor.";
+        THROW_CPU_NODE_ERR << " has unidentified preferable primitive descriptor.";
 
     if (!isAxisInputConst) {
-        axis = (reinterpret_cast<const int32_t*>(getParentEdgeAt(GATHER_AXIS)->getMemoryPtr()->getData()))[0];
+        auto mem = getParentEdgeAt(GATHER_AXIS)->getMemoryPtr();
+        if (mem->getDesc().getPrecision() == Precision::I64) {
+            axis = (reinterpret_cast<const int64_t*>(mem->getData()))[0];
+        } else if (mem->getDesc().getPrecision() == Precision::I32) {
+            axis = (reinterpret_cast<const int32_t*>(mem->getData()))[0];
+        }
         if (axis < 0)
             axis += dataSrcRank;
         if (axis < 0 || axis >= dataSrcRank || batchDims > axis)
-            THROW_ERROR << "has incorrect input parameter axis value: " << axis;
+            THROW_CPU_NODE_ERR << "has incorrect input parameter axis value: " << axis;
     }
 
     if (!isDataShapeStat || !isAxisInputConst) {
@@ -358,6 +378,9 @@ void Gather::prepareParams() {
         } else if (x64::mayiuse(x64::avx2)) {
             selectedPD->setImplementationType(jit_avx2);
         }
+    } else {
+        // TODO: Add tests
+        selectedPD->setImplementationType(ref_any);
     }
 #endif
 }
@@ -415,7 +438,9 @@ void Gather::execute(dnnl::stream strm) {
         return;
     }
 #endif
-    execReference();
+    OV_SWITCH(intel_cpu, refExec, this, idxPrecision,
+              OV_CASE(Precision::I32, int32_t),
+              OV_CASE(Precision::I64, int64_t))
 }
 
 void Gather::executeDynamicImpl(dnnl::stream strm) {
@@ -477,12 +502,14 @@ void Gather::executeDynamicImpl(dnnl::stream strm) {
         return;
     }
 #endif
-    execReference();
+    OV_SWITCH(intel_cpu, refExec, this, idxPrecision,
+              OV_CASE(Precision::I32, int32_t),
+              OV_CASE(Precision::I64, int64_t))
 }
 
 void Gather::initShortParams(threadExecParams& p, const uint64_t start) {
     if (!jitKernel)
-        THROW_ERROR << "has uninitialized kernel in function initShortParams.";
+        THROW_CPU_NODE_ERR << "has uninitialized kernel in function initShortParams.";
     const uint64_t idxElPerVec = jitKernel->getIdxElPerVec();
 
     if (afterAxisSize == 1) { // Elementwise gather.
@@ -547,8 +574,9 @@ void Gather::initShortParams(threadExecParams& p, const uint64_t start) {
     }
 }
 
+template<typename idxType>
 void Gather::execReference() {
-    const int32_t* srcIndices = reinterpret_cast<const int32_t*>(getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->getData());
+    const idxType* srcIndices = reinterpret_cast<const idxType*>(getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->getData());
     const uint8_t* srcData = reinterpret_cast<const uint8_t*>(getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->getData());
     uint8_t* dstData = reinterpret_cast<uint8_t*>(getChildEdgeAt(0)->getMemoryPtr()->getData());
 
@@ -579,6 +607,13 @@ void Gather::execReference() {
     });
 }
 
+template<typename idxType>
+struct Gather::refExec {
+    void operator()(Gather *node) {
+        node->execReference<idxType>();
+    }
+};
+
 bool Gather::created() const {
     return getType() == Type::Gather;
 }
diff --git a/src/plugins/intel_cpu/src/nodes/gather.h b/src/plugins/intel_cpu/src/nodes/gather.h
index f03a08832a66f5..0ec20a0587008d 100644
--- a/src/plugins/intel_cpu/src/nodes/gather.h
+++ b/src/plugins/intel_cpu/src/nodes/gather.h
@@ -7,17 +7,13 @@
 #include <node.h>
 #include "kernels/x64/gather_uni_kernel.hpp"
 
-#include <memory>
-#include <string>
-#include <vector>
-
 namespace ov {
 namespace intel_cpu {
 namespace node {
 
 class Gather : public Node {
 public:
-    Gather(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    Gather(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context);
 
     void getSupportedDescriptors() override {};
     void initSupportedPrimitiveDescriptors() override;
@@ -27,7 +23,7 @@ class Gather : public Node {
     bool isExecutable() const override;
     void resolveInPlaceEdges(Edge::LOOK look) override;
 
-    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
 
     struct threadExecParams {
         std::vector<int> specIdxInBytes;
@@ -53,7 +49,12 @@ class Gather : public Node {
     void prepareParams() override;
 
 private:
+    template<typename idxType>
+    struct refExec;
+
     void initShortParams(threadExecParams& p, uint64_t start);
+
+    template<typename idxType>
     void execReference();
 
     bool isDataShapeStat = false;
@@ -63,7 +64,8 @@ class Gather : public Node {
     bool reverseIndexing = false;
 
     uint64_t dataTypeSize = 1lu;
-    static constexpr uint64_t idxTypeSize = sizeof(int);
+    uint64_t idxTypeSize = sizeof(int32_t);
+    InferenceEngine::Precision idxPrecision;
 
     int axis = 0;
     int axisDim = 0;
diff --git a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp
index c029869faec4bb..79412e7fa7f33e 100644
--- a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp
+++ b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp
@@ -2,28 +2,21 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <cmath>
-#include <vector>
-#include <string>
-#include <dnnl_types.h>
-#include "ie_parallel.hpp"
 #include "gather_nd.h"
-#include <ngraph/opsets/opset8.hpp>
-#include <precision_utils.h>
-#include <utils/general_utils.h>
+
+#include "ie_parallel.hpp"
 #include "common/cpu_memcpy.h"
+#include <openvino/op/gather_nd.hpp>
 
 using namespace InferenceEngine;
 
-#define THROW_ERROR IE_THROW() << "GatherND layer with name '" << getName() << "' "
-
 namespace ov {
 namespace intel_cpu {
 namespace node {
 
-bool GatherND::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool GatherND::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        if (!one_of(op->get_type_info(), ngraph::op::v5::GatherND::get_type_info_static(), ngraph::op::v8::GatherND::get_type_info_static())) {
+        if (!one_of(op->get_type_info(), op::v5::GatherND::get_type_info_static(), op::v8::GatherND::get_type_info_static())) {
             errorMessage = "Node is not an instance of the GatherND operation from operation set v5 and v8.";
             return false;
         }
@@ -34,51 +27,51 @@ bool GatherND::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& o
     return true;
 }
 
-GatherND::GatherND(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
-    : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) {
+GatherND::GatherND(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
+        : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
     }
 
     if (inputShapes.size() != 2 && outputShapes.size() != 1)
-        THROW_ERROR << "has invalid number of input/output edges.";
+        THROW_CPU_NODE_ERR << "has invalid number of input/output edges.";
 
     const size_t dataInputRank = getInputShapeAtPort(GATHERND_DATA).getRank();
     const size_t indicesInputRank = getInputShapeAtPort(GATHERND_INDEXES).getRank();
 
-    if (auto gatherNdOp = ngraph::as_type_ptr<const ngraph::op::v8::GatherND>(op)) {
-        attrs.batchDims = gatherNdOp->get_batch_dims();
-    } else if (auto gatherNdOp = ngraph::as_type_ptr<const ngraph::op::v5::GatherND>(op)) {
+    if (auto gatherNdOp = ov::as_type<const op::util::GatherNDBase>(op.get())) {
         attrs.batchDims = gatherNdOp->get_batch_dims();
     } else {
-        THROW_ERROR << "has support only opset5.";
+        THROW_CPU_NODE_ERR << "has support only opset5.";
     }
     if (attrs.batchDims >= std::min(dataInputRank, indicesInputRank))
-        THROW_ERROR << "has invalid batch_dims attribute: " << attrs.batchDims;
+        THROW_CPU_NODE_ERR << "has invalid batch_dims attribute: " << attrs.batchDims;
 }
 
 void GatherND::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    Precision inDataPrecision = getOriginalInputPrecisionAtPort(GATHERND_DATA);
+    auto inDataPrecision = getOriginalInputPrecisionAtPort(GATHERND_DATA);
     if (!one_of(inDataPrecision.size(),
+                sizeof(PrecisionTrait<Precision::I64>::value_type),
                 sizeof(PrecisionTrait<Precision::I32>::value_type),
                 sizeof(PrecisionTrait<Precision::I16>::value_type),
                 sizeof(PrecisionTrait<Precision::I8>::value_type))) {
-        THROW_ERROR << "has unsupported 'data' input precision: " << inDataPrecision;
+        THROW_CPU_NODE_ERR << "has unsupported 'data' input precision: " << inDataPrecision;
     }
     attrs.dataSize = inDataPrecision.size();
 
-    Precision indicesPrecision = getOriginalInputPrecisionAtPort(GATHERND_INDEXES);
-    if (!one_of(indicesPrecision,
-                Precision::I32, Precision::I64, Precision::I16, Precision::U16, Precision::I8, Precision::U8)) {
-        THROW_ERROR << "has unsupported 'indices' input precision: " << indicesPrecision;
+    auto indicesPrecision = getOriginalInputPrecisionAtPort(GATHERND_INDEXES);
+    if (indicesPrecision == Precision::U64) {
+        indicesPrecision = Precision::I64;
+    } else if (!one_of(indicesPrecision, Precision::I32, Precision::I64)) {
+        indicesPrecision = Precision::I32;
     }
 
     addSupportedPrimDesc({{LayoutType::ncsp, inDataPrecision},
-                          {LayoutType::ncsp, Precision::I32}},
+                          {LayoutType::ncsp, indicesPrecision}},
                          {{LayoutType::ncsp, inDataPrecision}},
                          impl_desc_type::ref_any);
 }
@@ -88,13 +81,13 @@ void GatherND::prepareParams() {
     auto idxMemPtr = getParentEdgeAt(GATHERND_INDEXES)->getMemoryPtr();
     auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
     if (!srcMemPtr || !srcMemPtr->isAllocated())
-        THROW_ERROR << " has not allocated input memory of 'data'.";
+        THROW_CPU_NODE_ERR << " has not allocated input memory of 'data'.";
     if (!idxMemPtr || !idxMemPtr->isAllocated())
-        THROW_ERROR << " has not allocated input memory of 'indices'.";
+        THROW_CPU_NODE_ERR << " has not allocated input memory of 'indices'.";
     if (!dstMemPtr || !dstMemPtr->isAllocated())
-        THROW_ERROR << " has not allocated output memory.";
+        THROW_CPU_NODE_ERR << " has not allocated output memory.";
     if (getSelectedPrimitiveDescriptor() == nullptr)
-        THROW_ERROR << " has unidentified preferable primitive descriptor.";
+        THROW_CPU_NODE_ERR << " has unidentified preferable primitive descriptor.";
 
     attrs.srcDims = srcMemPtr->getStaticDims();
     attrs.srcStrides = srcMemPtr->getDescWithType<BlockedMemoryDesc>()->getStrides();
@@ -129,7 +122,7 @@ GatherND::GatherNDExecutor::GatherNDExecutor(const GatherNDAttributes& attrs) :
 
 void GatherND::execute(dnnl::stream strm) {
     if (!execPtr)
-        THROW_ERROR << "has not compiled executor.";
+        THROW_CPU_NODE_ERR << "has not compiled executor.";
 
     execPtr->exec(getParentEdgeAt(GATHERND_DATA)->getMemoryPtr(),
                   getParentEdgeAt(GATHERND_INDEXES)->getMemoryPtr(),
@@ -144,15 +137,16 @@ void GatherND::GatherNDExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPt
 
     GatherNDContext ctx { this, srcMemPtr, idxMemPtr, dstMemPtr };
     OV_SWITCH(intel_cpu, GatherNDEmitter, ctx, dataSize,
+              OV_CASE(sizeof(PrecisionTrait<Precision::I64>::value_type), PrecisionTrait<Precision::I64>::value_type),
               OV_CASE(sizeof(PrecisionTrait<Precision::I32>::value_type), PrecisionTrait<Precision::I32>::value_type),
               OV_CASE(sizeof(PrecisionTrait<Precision::I16>::value_type), PrecisionTrait<Precision::I16>::value_type),
               OV_CASE(sizeof(PrecisionTrait<Precision::I8>::value_type), PrecisionTrait<Precision::I8>::value_type));
 }
 
 void GatherND::GatherNDExecutor::gatherBlocks(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr) {
-    const uint8_t* srcData = reinterpret_cast<const uint8_t*>(srcMemPtr->getData());
-    const int32_t* indices = reinterpret_cast<const int32_t*>(idxMemPtr->getData());
-    uint8_t* dstData = reinterpret_cast<uint8_t*>(dstMemPtr->getData());
+    auto srcData = reinterpret_cast<const uint8_t*>(srcMemPtr->getData());
+    auto indices = idxMemPtr->getData();
+    auto dstData = reinterpret_cast<uint8_t*>(dstMemPtr->getData());
 
     parallel_nt(0, [&](const int ithr, const int nthr) {
         size_t start(0lu), end(0lu);
@@ -164,32 +158,55 @@ void GatherND::GatherNDExecutor::gatherBlocks(const MemoryPtr& srcMemPtr, const
         size_t workCounter = start;
 
         const uint8_t* shiftedSrcData = srcData + bStart * srcBatchStride;
-        const int32_t* shiftedIndices = indices + bStart * idxBatchStride + cStart * sliceRank;
         uint8_t* shiftedDstData = dstData + bStart * dstBatchStride + cStart * dataLength;
 
-        for (size_t b = bStart; b < batchSize; b++) {
-            for (size_t j = cStart; j < cycles; j++) {
-                size_t dataIdx = 0lu;
-                for (size_t i = 0; i < sliceRank; i++)
-                    dataIdx += srcShifts[i] * shiftedIndices[i];
-                cpu_memcpy(shiftedDstData, &(shiftedSrcData[dataIdx]), dataLength);
-                shiftedDstData += dataLength;
-                shiftedIndices += sliceRank;
-                if (++workCounter == end) {
-                    return;
+        if (idxMemPtr->getDataType() == dnnl::memory::data_type::s32) {
+            const int32_t* shiftedIndices = reinterpret_cast<const int32_t*>(indices)
+                    + bStart * idxBatchStride + cStart * sliceRank;
+
+            for (size_t b = bStart; b < batchSize; b++) {
+                for (size_t j = cStart; j < cycles; j++) {
+                    size_t dataIdx = 0lu;
+                    for (size_t i = 0; i < sliceRank; i++)
+                        dataIdx += srcShifts[i] * shiftedIndices[i];
+                    cpu_memcpy(shiftedDstData, &(shiftedSrcData[dataIdx]), dataLength);
+                    shiftedDstData += dataLength;
+                    shiftedIndices += sliceRank;
+                    if (++workCounter == end) {
+                        return;
+                    }
                 }
+                cStart = 0;
+                shiftedSrcData += srcBatchStride;
+            }
+        } else {
+            const int64_t* shiftedIndices = reinterpret_cast<const int64_t*>(indices)
+                    + bStart * idxBatchStride + cStart * sliceRank;
+
+            for (size_t b = bStart; b < batchSize; b++) {
+                for (size_t j = cStart; j < cycles; j++) {
+                    size_t dataIdx = 0lu;
+                    for (size_t i = 0; i < sliceRank; i++)
+                        dataIdx += srcShifts[i] * shiftedIndices[i];
+                    cpu_memcpy(shiftedDstData, &(shiftedSrcData[dataIdx]), dataLength);
+                    shiftedDstData += dataLength;
+                    shiftedIndices += sliceRank;
+                    if (++workCounter == end) {
+                        return;
+                    }
+                }
+                cStart = 0;
+                shiftedSrcData += srcBatchStride;
             }
-            cStart = 0;
-            shiftedSrcData += srcBatchStride;
         }
     });
 }
 
 template <typename dataType>
 void GatherND::GatherNDExecutor::gatherElementwise(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr) {
-    const dataType* srcData = reinterpret_cast<const dataType*>(srcMemPtr->getData());
-    const int32_t* indices = reinterpret_cast<const int32_t*>(idxMemPtr->getData());
-    dataType* dstData = reinterpret_cast<dataType*>(dstMemPtr->getData());
+    auto srcData = reinterpret_cast<const dataType*>(srcMemPtr->getData());
+    auto indices = idxMemPtr->getData();
+    auto dstData = reinterpret_cast<dataType*>(dstMemPtr->getData());
 
     parallel_nt(0, [&](const int ithr, const int nthr) {
         size_t start(0lu), end(0lu);
@@ -201,23 +218,46 @@ void GatherND::GatherNDExecutor::gatherElementwise(const MemoryPtr& srcMemPtr, c
         size_t workCounter = start;
 
         const dataType* shiftedSrcData = srcData + bStart * srcBatchStride;
-        const int32_t* shiftedIndices = indices + bStart * idxBatchStride + cStart * sliceRank;
         dataType* shiftedDstData = dstData + bStart * dstBatchStride + cStart * dataLength;
 
-        for (size_t b = bStart; b < batchSize; b++) {
-            for (size_t j = cStart; j < cycles; j++) {
-                size_t dataIdx = 0lu;
-                for (size_t i = 0lu; i < sliceRank; i++)
-                    dataIdx += srcShifts[i] * shiftedIndices[i];
-                shiftedDstData[0] = shiftedSrcData[dataIdx];
-                shiftedDstData++;
-                shiftedIndices += sliceRank;
-                if (++workCounter == end) {
-                    return;
+        if (idxMemPtr->getDataType() == dnnl::memory::data_type::s32) {
+            const int32_t* shiftedIndices = reinterpret_cast<const int32_t*>(indices)
+                    + bStart * idxBatchStride + cStart * sliceRank;
+
+            for (size_t b = bStart; b < batchSize; b++) {
+                for (size_t j = cStart; j < cycles; j++) {
+                    size_t dataIdx = 0lu;
+                    for (size_t i = 0lu; i < sliceRank; i++)
+                        dataIdx += srcShifts[i] * shiftedIndices[i];
+                    shiftedDstData[0] = shiftedSrcData[dataIdx];
+                    shiftedDstData++;
+                    shiftedIndices += sliceRank;
+                    if (++workCounter == end) {
+                        return;
+                    }
+                }
+                cStart = 0lu;
+                shiftedSrcData += srcBatchStride;
+            }
+        } else {
+            const int64_t* shiftedIndices = reinterpret_cast<const int64_t*>(indices)
+                        + bStart * idxBatchStride + cStart * sliceRank;
+
+            for (size_t b = bStart; b < batchSize; b++) {
+                for (size_t j = cStart; j < cycles; j++) {
+                    size_t dataIdx = 0lu;
+                    for (size_t i = 0lu; i < sliceRank; i++)
+                        dataIdx += srcShifts[i] * shiftedIndices[i];
+                    shiftedDstData[0] = shiftedSrcData[dataIdx];
+                    shiftedDstData++;
+                    shiftedIndices += sliceRank;
+                    if (++workCounter == end) {
+                        return;
+                    }
                 }
+                cStart = 0lu;
+                shiftedSrcData += srcBatchStride;
             }
-            cStart = 0lu;
-            shiftedSrcData += srcBatchStride;
         }
     });
 }
diff --git a/src/plugins/intel_cpu/src/nodes/gather_nd.h b/src/plugins/intel_cpu/src/nodes/gather_nd.h
index 0fec5e23337354..1d7fae2beae4d5 100644
--- a/src/plugins/intel_cpu/src/nodes/gather_nd.h
+++ b/src/plugins/intel_cpu/src/nodes/gather_nd.h
@@ -4,11 +4,7 @@
 
 #pragma once
 
-#include <ie_common.h>
 #include <node.h>
-#include <string>
-#include <memory>
-#include <vector>
 
 namespace ov {
 namespace intel_cpu {
@@ -16,14 +12,14 @@ namespace node {
 
 class GatherND : public Node {
 public:
-    GatherND(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    GatherND(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context);
 
     void getSupportedDescriptors() override {};
     void initSupportedPrimitiveDescriptors() override;
     void execute(dnnl::stream strm) override;
     bool created() const override;
 
-    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
 
 protected:
     void executeDynamicImpl(dnnl::stream strm) override;
diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp
index 798b04078352bf..af5c9fbe50d78f 100644
--- a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp
+++ b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp
@@ -11,8 +11,8 @@
 
 using namespace InferenceEngine;
 using namespace dnnl::impl::cpu;
-using namespace ov::intel_cpu;
 using namespace ov::intel_cpu::node;
+using namespace ov::intel_cpu::kernel;
 
 #define THROW_ERROR IE_THROW() << getTypeStr() << " node with name '" << getName() << "' "
 
@@ -145,7 +145,7 @@ void GridSample::createPrimitive() {
     if (!jitKernel) {
         THROW_ERROR << " could not create JIT kernel.";
     }
-    jitKernel->create_ker();
+    jitKernel->create_kernel();
 
     nthr = parallel_get_max_threads();
     execParamsPerThread.resize(nthr);
diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.hpp b/src/plugins/intel_cpu/src/nodes/grid_sample.hpp
index 89a1a409764615..774f85f3b69a73 100644
--- a/src/plugins/intel_cpu/src/nodes/grid_sample.hpp
+++ b/src/plugins/intel_cpu/src/nodes/grid_sample.hpp
@@ -58,8 +58,8 @@ class GridSample : public Node {
 
 private:
     bool alignCorners = false;
-    GridSampleInterpolationMode interpolationMode = GridSampleInterpolationMode::BILINEAR;
-    GridSamplePaddingMode paddingMode = GridSamplePaddingMode::ZEROS;
+    kernel::GridSampleInterpolationMode interpolationMode = kernel::GridSampleInterpolationMode::BILINEAR;
+    kernel::GridSamplePaddingMode paddingMode = kernel::GridSamplePaddingMode::ZEROS;
 
     uint64_t dataTypeSize = 1lu;
     uint64_t gridTypeSize = 1lu;
@@ -72,7 +72,7 @@ class GridSample : public Node {
     static constexpr size_t IN_DATA = 0;
     static constexpr size_t IN_GRID = 1;
 
-    std::shared_ptr<GridSampleKernelBase> jitKernel;
+    std::shared_ptr<kernel::GridSampleKernelBase> jitKernel;
 };
 
 }   // namespace node
diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp
index e153a55b011ace..fe3f1609882b87 100644
--- a/src/plugins/intel_cpu/src/nodes/input.cpp
+++ b/src/plugins/intel_cpu/src/nodes/input.cpp
@@ -26,7 +26,7 @@
 using namespace dnnl;
 using namespace InferenceEngine;
 using namespace details;
-using namespace ngraph::op;
+using namespace ov::op;
 using namespace dnnl::impl::cpu::x64;
 using namespace Xbyak;
 
@@ -232,7 +232,7 @@ jit_has_subnormals_base::fn_t jit_has_subnormals_function() {
 }   // namespace
 #endif
 
-Input::Input(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
+Input::Input(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context)
         : Node(op, context, PassThroughShapeInferFactory()) {
     if (!one_of(op->get_type_info(),
             v0::Parameter::get_type_info_static(),
@@ -244,7 +244,7 @@ Input::Input(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr c
 
     constant = ConstantType::NoConst;
 
-    constOp = ngraph::as_type_ptr<ngraph::op::Constant>(op);
+    constOp = ov::as_type_ptr<ov::op::v0::Constant>(op);
     if (constOp) {
         constant = ConstantType::Const;
         cloneBlobIfRequired();
@@ -252,7 +252,7 @@ Input::Input(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr c
 }
 
 void Input::cloneBlobIfRequired() {
-    Shape shape(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape());
+    Shape shape(constOp->get_shape().empty() ? ov::Shape(1, 1) : constOp->get_shape());
     const auto prec = convertPrecision(constOp->get_element_type());
     const size_t size = shape.getElementsCount();
     DnnlBlockedMemoryDesc memDesc(prec, shape);
@@ -379,7 +379,7 @@ Input::Input(const Shape& shape,
              const InferenceEngine::Precision& prc,
              const std::string& name,
              const std::string& type,
-             const GraphContext::CPtr context)
+             const GraphContext::CPtr& context)
     : Node(type, name, context) {
     constant = ConstantType::NoConst;
     if (getType() == Type::Input) {
@@ -391,7 +391,7 @@ Input::Input(const Shape& shape,
     }
 }
 
-Input::Input(MemoryDescPtr memDesc, const std::string& name, const std::string& type, const GraphContext::CPtr context)
+Input::Input(const MemoryDescPtr& memDesc, const std::string& name, const std::string& type, const GraphContext::CPtr& context)
     : Input(memDesc->getShape(), memDesc->getPrecision(), name, type, context) {
     extMemDesc = memDesc;
 }
diff --git a/src/plugins/intel_cpu/src/nodes/input.h b/src/plugins/intel_cpu/src/nodes/input.h
index 71ae6b91e7660c..a1adb15a6244e4 100644
--- a/src/plugins/intel_cpu/src/nodes/input.h
+++ b/src/plugins/intel_cpu/src/nodes/input.h
@@ -15,13 +15,13 @@ namespace node {
 
 class Input : public Node {
 public:
-    Input(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    Input(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr& context);
     Input(const Shape& shape,
           const InferenceEngine::Precision& prc,
           const std::string& name,
           const std::string& type,
-          const GraphContext::CPtr context);
-    Input(MemoryDescPtr memDesc, const std::string& name, const std::string& type, const GraphContext::CPtr context);
+          const GraphContext::CPtr& context);
+    Input(const MemoryDescPtr& memDesc, const std::string& name, const std::string& type, const GraphContext::CPtr& context);
 
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp
index 4f24e7ac2d7a34..31372bdf65e69c 100644
--- a/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp
+++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp
@@ -1025,6 +1025,9 @@ void jitUniGatherKernel<x64::avx2>::fillVlenVector() {
 
 template <x64::cpu_isa_t isa>
 bool jitUniGatherKernel<isa>::isSupportedConfiguration(uint64_t afterAxisSize) {
+    if (jcp.dataTypeSize > 4 || jcp.idxPrc != InferenceEngine::Precision::I32) {
+        return false;
+    }
     if (!jcp.dynamicShapes && afterAxisSize <= idxElPerVec) {
         if (afterAxisSize > 1 && isa == x64::avx2 && (jcp.dataTypeSize == 1 || jcp.dataTypeSize == 2))
             // There are no enough registers for these cases.
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.hpp
index aec991ba26360c..0548108948dbcf 100644
--- a/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.hpp
+++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.hpp
@@ -24,12 +24,14 @@
 
 #include "cpu/x64/jit_generator.hpp"
 #include <dnnl_types.h>
+#include <ie_precision.hpp>
 
 namespace ov {
 namespace intel_cpu {
 
 struct jGatherConfParams {
     uint64_t dataTypeSize = 1lu;
+    InferenceEngine::Precision idxPrc = InferenceEngine::Precision::I32;
     bool reverseIndexing = true;
     bool dynamicShapes = false;
     uint64_t batchDims = 0lu;
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp
index 7501dd606427ce..ea9bb8a41105fe 100644
--- a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp
+++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp
@@ -1,19 +1,17 @@
-// Copyright (C) 2022 Intel Corporation
+// Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include "grid_sample.hpp"
 
 using namespace dnnl::impl::cpu;
-
-namespace ov {
-namespace intel_cpu {
+using namespace ov::intel_cpu::kernel;
 
 #define GET_OFF(field) offsetof(GridSamplesKernelExecArgs, field)
 
 template <x64::cpu_isa_t isa>
-GridSampleKernel<isa>::GridSampleKernel(const GridSampleKernelConfParams& jcp) :
-        GridSampleKernelBase(jit_name(), jcp) {
+GridSampleKernel<isa>::GridSampleKernel(const GridSampleKernelConfParams& jcp)
+        : GridSampleKernelBase(jit_name(), jcp, isa) {
     vlen = x64::cpu_isa_traits<isa>::vlen;
     dataTypeSize = jcp.inDataPrc.size();
     gridTypeSize = jcp.gridPrc.size();
@@ -25,14 +23,6 @@ GridSampleKernel<isa>::GridSampleKernel(const GridSampleKernelConfParams& jcp) :
         dataTypeShift = 2;
 }
 
-template <x64::cpu_isa_t isa>
-void GridSampleKernel<isa>::create_ker() {
-    auto code = x64::jit_generator::create_kernel();
-    if (code != dnnl::impl::status::success)
-        IE_THROW() << "Could not create GridSample kernel. Error code: " << std::to_string(code);
-    ker_ = (decltype(ker_))jit_ker();
-}
-
 template <x64::cpu_isa_t isa>
 void GridSampleKernel<isa>::generate() {
     this->preamble();
@@ -2084,6 +2074,3 @@ void GridSampleKernel<isa>::hwShiftPs2dq(const Vmm& vDst, const Vmm& vHCoord, co
 template class GridSampleKernel<x64::avx512_core>;
 template class GridSampleKernel<x64::avx2>;
 template class GridSampleKernel<x64::sse41>;
-
-}   // namespace intel_cpu
-}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.hpp
index c24100259cd5bb..3883c2b1b7de0f 100644
--- a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.hpp
+++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.hpp
@@ -10,6 +10,7 @@
 
 namespace ov {
 namespace intel_cpu {
+namespace kernel {
 
 enum class GridSampleInterpolationMode { BILINEAR, BICUBIC, NEAREST };
 enum class GridSamplePaddingMode { ZEROS, BORDER, REFLECTION };
@@ -59,28 +60,16 @@ enum coord {
     w, h
 };
 
-class GridSampleKernelBase: public JitKernelBase {
+class GridSampleKernelBase: public JitKernel<GridSampleKernelConfParams, GridSamplesKernelExecArgs> {
 public:
-    void (*ker_)(const GridSamplesKernelExecArgs *);
-    void operator()(const GridSamplesKernelExecArgs *args) {
-        assert(ker_);
-        ker_(args);
-    }
-    explicit GridSampleKernelBase(const char* name, const GridSampleKernelConfParams& jcp) : JitKernelBase(name), ker_(nullptr), jcp(jcp) {}
+    explicit GridSampleKernelBase(const char* name, const GridSampleKernelConfParams& jcp, dnnl::impl::cpu::x64::cpu_isa_t isa)
+        : JitKernel(name, jcp, isa) {}
 
-    virtual void create_ker() = 0;
-    uint64_t getVecLen() {
-        return vlen;
-    }
     uint64_t getDataElPerVec() {
         return dataElPerVec;
     }
-    uint64_t getGridElPerVec() {
-        return gridElPerVec;
-    }
 
 protected:
-    GridSampleKernelConfParams jcp;
     uint64_t vlen         = 16lu;
     uint64_t dataTypeSize = 1lu;
     uint64_t gridTypeSize = 1lu;
@@ -95,7 +84,6 @@ class GridSampleKernel : public GridSampleKernelBase {
 
     explicit GridSampleKernel(const GridSampleKernelConfParams& jcp);
 
-    void create_ker() override;
     void generate() override;
 
     using Vmm   = typename dnnl::impl::utils::conditional3<isa == dnnl::impl::cpu::x64::avx512_core, Xbyak::Zmm,
@@ -173,5 +161,6 @@ class GridSampleKernel : public GridSampleKernelBase {
     void hwShiftPs2dq(const Vmm& vDst, const Vmm& vHCoord, const Vmm& vWCoord, const Vmm& vWidth);
 };
 
+}   // namespace kernel
 }   // namespace intel_cpu
 }   // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp
index 6afbecf143f27b..3967a944d11be1 100644
--- a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp
+++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp
@@ -1,175 +1,296 @@
-// Copyright (C) 2022 Intel Corporation
+// Copyright (C) 2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include "jit_kernel_base.hpp"
 
-using namespace ov;
-using namespace intel_cpu;
+using namespace ov::intel_cpu::kernel;
 using namespace dnnl::impl::cpu;
+using namespace Xbyak;
 
 
-void JitKernelBase::uni_vfmsub132ps(const Xbyak::Xmm& vDst,
-                                    const Xbyak::Xmm& vSrc,
-                                    const Xbyak::Operand& op) {
+JitKernelBase::JitKernelBase(const char* name, x64::cpu_isa_t isa) :
+        x64::jit_generator(name, nullptr, dnnl::impl::cpu::x64::MAX_CODE_SIZE, true, isa) {
+}
+
+void JitKernelBase::uni_vfmsub132ps(const Xmm& vmm_dst,
+                                    const Xmm& vmm_src,
+                                    const Operand& op) {
     if (isValidIsa(x64::avx2)) {
-        vfmsub132ps(vDst, vSrc, op);
+        vfmsub132ps(vmm_dst, vmm_src, op);
     } else if (isValidIsa(x64::avx)) {
-        assert(vDst.getIdx() != vSrc.getIdx());
-        vmulps(vDst, vDst, op);
-        vsubps(vDst, vDst, vSrc);
+        assert(vmm_dst.getIdx() != vmm_src.getIdx());
+        vmulps(vmm_dst, vmm_dst, op);
+        vsubps(vmm_dst, vmm_dst, vmm_src);
     } else {
-        assert(vDst.getIdx() != vSrc.getIdx());
-        mulps(vDst, op);
-        subps(vDst, vSrc);
+        assert(vmm_dst.getIdx() != vmm_src.getIdx());
+        mulps(vmm_dst, op);
+        subps(vmm_dst, vmm_src);
     }
 }
 
-void JitKernelBase::uni_vfnmadd132ps(const Xbyak::Xmm& vDst,
-                                     const Xbyak::Xmm& vSrc,
-                                     const Xbyak::Operand& op) {
+void JitKernelBase::uni_vfnmadd132ps(const Xmm& vmm_dst,
+                                     const Xmm& vmm_src,
+                                     const Operand& op) {
     if (isValidIsa(x64::avx2)) {
-        vfnmadd132ps(vDst, vSrc, op);
+        vfnmadd132ps(vmm_dst, vmm_src, op);
     } else if (isValidIsa(x64::avx)) {
-        assert(vDst.getIdx() != vSrc.getIdx());
-        vmulps(vDst, vDst, op);
-        vsubps(vDst, vSrc, vDst);
+        assert(vmm_dst.getIdx() != vmm_src.getIdx());
+        vmulps(vmm_dst, vmm_dst, op);
+        vsubps(vmm_dst, vmm_src, vmm_dst);
     } else {
-        assert(vDst.getIdx() != vSrc.getIdx());
-        mulps(vDst, op);
-        subps(vSrc, vDst);
-        movups(vDst, vSrc);
+        assert(vmm_dst.getIdx() != vmm_src.getIdx());
+        mulps(vmm_dst, op);
+        subps(vmm_src, vmm_dst);
+        movups(vmm_dst, vmm_src);
     }
 }
 
-void JitKernelBase::uni_vfmsub231ps(const Xbyak::Xmm& vDst,
-                                    const Xbyak::Xmm& vSrc,
-                                    const Xbyak::Operand& op) {
+void JitKernelBase::uni_vfmsub231ps(const Xmm& vmm_dst,
+                                    const Xmm& vmm_src,
+                                    const Operand& op) {
     if (isValidIsa(x64::avx2)) {
-        vfmsub231ps(vDst, vSrc, op);
+        vfmsub231ps(vmm_dst, vmm_src, op);
     } else if (isValidIsa(x64::avx)) {
-        assert(!vDst.isEqualIfNotInherited(op));
-        vmulps(vSrc, vSrc, op);
-        vsubps(vDst, vSrc, vDst);
+        assert(!vmm_dst.isEqualIfNotInherited(op));
+        vmulps(vmm_src, vmm_src, op);
+        vsubps(vmm_dst, vmm_src, vmm_dst);
     } else {
-        assert(!vDst.isEqualIfNotInherited(op));
-        mulps(vSrc, op);
-        subps(vSrc, vDst);
-        movups(vDst, vSrc);
+        assert(!vmm_dst.isEqualIfNotInherited(op));
+        mulps(vmm_src, op);
+        subps(vmm_src, vmm_dst);
+        movups(vmm_dst, vmm_src);
     }
 }
 
-void JitKernelBase::uni_vpaddd(const Xbyak::Ymm& vDst,
-                               const Xbyak::Ymm& vSrc,
-                               const Xbyak::Operand& op) {
+void JitKernelBase::uni_vpaddd(const Ymm& vmm_dst,
+                               const Ymm& vmm_src,
+                               const Operand& op) {
     if (isValidIsa(x64::avx2)) {
-        vpaddd(vDst, vSrc, op);
+        vpaddd(vmm_dst, vmm_src, op);
     } else if (isValidIsa(x64::avx)) {
-        Xbyak::Xmm xmmDst(vDst.getIdx());
-        vmovups(vDst, vSrc);
+        Xmm xmmDst(vmm_dst.getIdx());
+        vmovups(vmm_dst, vmm_src);
         if (op.isYMM()) {
-            Xbyak::Ymm ymmOp(op.getIdx());
-            Xbyak::Xmm xmmOp(op.getIdx());
+            Ymm ymmOp(op.getIdx());
+            Xmm xmmOp(op.getIdx());
             paddd(xmmDst, xmmOp);
-            vperm2f128(vDst, vDst, vDst, 0x1);
+            vperm2f128(vmm_dst, vmm_dst, vmm_dst, 0x1);
             vperm2f128(ymmOp, ymmOp, ymmOp, 0x1);
             paddd(xmmDst, xmmOp);
-            vperm2f128(vDst, vDst, vDst, 0x1);
+            vperm2f128(vmm_dst, vmm_dst, vmm_dst, 0x1);
             vperm2f128(ymmOp, ymmOp, ymmOp, 0x1);
         } else if (op.isMEM()) {
             const int vlen = x64::cpu_isa_traits<x64::sse41>::vlen;
             paddd(xmmDst, op.getAddress());
-            vperm2f128(vDst, vDst, vDst, 0x1);
+            vperm2f128(vmm_dst, vmm_dst, vmm_dst, 0x1);
             paddd(xmmDst, ptr[op.getAddress().getRegExp() + vlen]);
-            vperm2f128(vDst, vDst, vDst, 0x1);
+            vperm2f128(vmm_dst, vmm_dst, vmm_dst, 0x1);
         } else {
             IE_THROW() << "Not supported operand type.";
         }
     } else if (isValidIsa(x64::sse41)) {
-        assert(vDst.getIdx() != vSrc.getIdx());
-        paddd(vDst, op);
+        assert(vmm_dst.getIdx() != vmm_src.getIdx());
+        paddd(vmm_dst, op);
     } else {
         IE_THROW() << "Not defined behavior for instruction 'vpaddd' in current instructions set.";
     }
 }
 
-void JitKernelBase::uni_vpsubd(const Xbyak::Ymm& vDst,
-                               const Xbyak::Ymm& vSrc,
-                               const Xbyak::Operand& op) {
+void JitKernelBase::uni_vaddpd(const Xmm& vmm_dst, const Operand &op1, const Operand &op2) {
+    if (isValidIsa(x64::avx)) {
+        vaddpd(vmm_dst, op1, op2);
+    } else {
+        if (vmm_dst.getIdx() != op1.getIdx()) {
+            movupd(vmm_dst, op1);
+        }
+        addpd(vmm_dst, op2);
+    }
+}
+
+void JitKernelBase::uni_vpsubd(const Ymm& vmm_dst,
+                               const Ymm& vmm_src,
+                               const Operand& op) {
     if (isValidIsa(x64::avx2)) {
-        vpsubd(vDst, vSrc, op);
+        vpsubd(vmm_dst, vmm_src, op);
     } else if (isValidIsa(x64::avx)) {
-        Xbyak::Xmm xmmDst(vDst.getIdx());
-        vmovups(vDst, vSrc);
+        Xmm xmmDst(vmm_dst.getIdx());
+        vmovups(vmm_dst, vmm_src);
         if (op.isYMM()) {
-            Xbyak::Ymm ymmOp(op.getIdx());
-            Xbyak::Xmm xmmOp(op.getIdx());
+            Ymm ymmOp(op.getIdx());
+            Xmm xmmOp(op.getIdx());
             psubd(xmmDst, xmmOp);
-            vperm2f128(vDst, vDst, vDst, 0x1);
+            vperm2f128(vmm_dst, vmm_dst, vmm_dst, 0x1);
             vperm2f128(ymmOp, ymmOp, ymmOp, 0x1);
             psubd(xmmDst, xmmOp);
-            vperm2f128(vDst, vDst, vDst, 0x1);
+            vperm2f128(vmm_dst, vmm_dst, vmm_dst, 0x1);
             vperm2f128(ymmOp, ymmOp, ymmOp, 0x1);
         } else if (op.isMEM()) {
             const int vlen = x64::cpu_isa_traits<x64::sse41>::vlen;
             psubd(xmmDst, op.getAddress());
-            vperm2f128(vDst, vDst, vDst, 0x1);
+            vperm2f128(vmm_dst, vmm_dst, vmm_dst, 0x1);
             psubd(xmmDst, ptr[op.getAddress().getRegExp() + vlen]);
-            vperm2f128(vDst, vDst, vDst, 0x1);
+            vperm2f128(vmm_dst, vmm_dst, vmm_dst, 0x1);
         } else {
             IE_THROW() << "Not supported operand type.";
         }
     } else if (isValidIsa(x64::sse41)) {
-        assert(vDst.getIdx() != vSrc.getIdx());
-        psubd(vDst, op);
+        if (vmm_dst.getIdx() != vmm_src.getIdx()) {
+            movups(vmm_dst, vmm_src);
+        }
+        psubd(vmm_dst, op);
     } else {
         IE_THROW() << "Not defined behavior for instruction 'vpsubd' in current instructions set.";
     }
 }
 
-void JitKernelBase::uni_vdivps(const Xbyak::Xmm& vDst,
-                               const Xbyak::Operand& op1,
-                               const Xbyak::Operand& op2) {
+void JitKernelBase::uni_vmulpd(const Xmm& vmm_dst,
+                               const Operand& op1,
+                               const Operand& op2) {
+    if (isValidIsa(x64::avx)) {
+        vmulpd(vmm_dst, op1, op2);
+    } else {
+        if (vmm_dst.getIdx() != op1.getIdx()) {
+            movupd(vmm_dst, op1);
+        }
+        mulpd(vmm_dst, op2);
+    }
+}
+
+void JitKernelBase::uni_vdivps(const Xmm& vmm_dst,
+                               const Operand& op1,
+                               const Operand& op2) {
+    if (isValidIsa(x64::avx)) {
+        vdivps(vmm_dst, op1, op2);
+    } else {
+        if (!vmm_dst.isEqualIfNotInherited(op1)) {
+            movups(vmm_dst, op1);
+        }
+        divps(vmm_dst, op2);
+    }
+}
+
+void JitKernelBase::uni_vdivpd(const Xmm& vmm_dst,
+                               const Operand& op1,
+                               const Operand& op2) {
+    if (isValidIsa(x64::avx)) {
+        vdivpd(vmm_dst, op1, op2);
+    } else {
+        if (vmm_dst.getIdx() != op1.getIdx()) {
+            movupd(vmm_dst, op1);
+        }
+        divpd(vmm_dst, op2);
+    }
+}
+
+void JitKernelBase::uni_vandps(const Xmm& vmm_dst,
+                               const Xmm& vmm_src,
+                               const Operand &op) {
+    if (isValidIsa(x64::avx)) {
+        vandps(vmm_dst, vmm_src, op);
+    } else {
+        if (vmm_dst.getIdx() != vmm_src.getIdx()) {
+            movups(vmm_dst, vmm_src);
+        }
+        andps(vmm_dst, op);
+    }
+}
+
+void JitKernelBase::uni_vandpd(const Xmm& vmm_dst,
+                               const Xmm& vmm_src,
+                               const Operand &op) {
+    if (isValidIsa(x64::avx)) {
+        vandpd(vmm_dst, vmm_src, op);
+    } else {
+        if (vmm_dst.getIdx() != vmm_src.getIdx()) {
+            movupd(vmm_dst, vmm_src);
+        }
+        andpd(vmm_dst, op);
+    }
+}
+
+void JitKernelBase::uni_vandnps(const Xmm& vmm_dst,
+                                const Xmm& vmm_src,
+                                const Operand &op) {
+    if (isValidIsa(x64::avx)) {
+        vandnps(vmm_dst, vmm_src, op);
+    } else {
+        if (!vmm_dst.isEqualIfNotInherited(vmm_src)) {
+            movups(vmm_dst, vmm_src);
+        }
+        andnps(vmm_dst, op);
+    }
+}
+
+void JitKernelBase::uni_vorpd(const Xmm& vmm_dst,
+                              const Xmm& vmm_src,
+                              const Operand &op) {
     if (isValidIsa(x64::avx)) {
-        vdivps(vDst, op1, op2);
+        vorpd(vmm_dst, vmm_src, op);
     } else {
-        if (!vDst.isEqualIfNotInherited(op1)) {
-            movups(vDst, op1);
+        if (vmm_dst.getIdx() != vmm_src.getIdx()) {
+            movupd(vmm_dst, vmm_src);
         }
-        divps(vDst, op2);
+        orpd(vmm_dst, op);
     }
 }
 
-void JitKernelBase::uni_vandps(const Xbyak::Xmm& vDst,
-                               const Xbyak::Xmm& vSrs,
-                               const Xbyak::Operand &op) {
+void JitKernelBase::uni_vcmppd(const Xmm& vmm_dst,
+                               const Xmm &vmm_src,
+                               const Operand &op,
+                               const uint8_t imm) {
     if (isValidIsa(x64::avx)) {
-        vandps(vDst, vSrs, op);
+        vcmppd(vmm_dst, vmm_src, op, imm);
     } else {
-        if (!vDst.isEqualIfNotInherited(vSrs)) {
-            movups(vDst, vSrs);
+        if (vmm_dst.getIdx() != vmm_src.getIdx()) {
+            movupd(vmm_dst, vmm_src);
         }
-        andps(vDst, op);
+        cmppd(vmm_dst, op, imm);
     }
 }
 
-void JitKernelBase::uni_vandnps(const Xbyak::Xmm& vDst,
-                                const Xbyak::Xmm& vSrs,
-                                const Xbyak::Operand &op) {
+void JitKernelBase::uni_vmaxpd(const Xmm& vmm_dst, const Operand &op1, const Operand &op2) {
     if (isValidIsa(x64::avx)) {
-        vandnps(vDst, vSrs, op);
+        vmaxpd(vmm_dst, op1, op2);
     } else {
-        if (!vDst.isEqualIfNotInherited(vSrs)) {
-            movups(vDst, vSrs);
+        if (vmm_dst.getIdx() != op1.getIdx()) {
+            movupd(vmm_dst, op1);
         }
-        andnps(vDst, op);
+        maxpd(vmm_dst, op2);
     }
 }
 
-void JitKernelBase::gatherdd(const Xbyak::Xmm&    vDst,
-                             const Xbyak::Reg64&  rSrcPtr,
-                             const Xbyak::Xmm&    vSrcShift,
-                             const Xbyak::Opmask& kReadMask,
+void JitKernelBase::uni_vminpd(const Xmm& vmm_dst, const Operand &op1, const Operand &op2) {
+    if (isValidIsa(x64::avx)) {
+        vminpd(vmm_dst, op1, op2);
+    } else {
+        if (vmm_dst.getIdx() != op1.getIdx()) {
+            movupd(vmm_dst, op1);
+        }
+        minpd(vmm_dst, op2);
+    }
+}
+
+void JitKernelBase::uni_vcvtpd2dq(const Xbyak::Xmm &vmm_dst, const Xbyak::Operand &op) {
+    if (isValidIsa(x64::avx)) {
+        vcvtpd2dq(vmm_dst, op);
+    } else {
+        cvtpd2dq(vmm_dst, op);
+    }
+}
+
+void JitKernelBase::uni_vcvtpd2ps(const Xbyak::Xmm &vmm_dst, const Xbyak::Operand &op) {
+    if (isValidIsa(x64::avx)) {
+        vcvtpd2ps(vmm_dst, op);
+    } else {
+        cvtpd2ps(vmm_dst, op);
+    }
+}
+
+void JitKernelBase::gatherdd(const Xmm&    vmm_dst,
+                             const Reg64&  rSrcPtr,
+                             const Xmm&    vSrcShift,
+                             const Opmask& kReadMask,
                              const bool useMask,
                              const bool zeroFill) {
     if (kReadMask.getIdx() == 0) {
@@ -178,42 +299,42 @@ void JitKernelBase::gatherdd(const Xbyak::Xmm&    vDst,
     if (!useMask)
         kxnord(kReadMask, kReadMask, kReadMask);
     if (zeroFill)
-        uni_vpxor(vDst, vDst, vDst);
+        uni_vpxor(vmm_dst, vmm_dst, vmm_dst);
 
-    vpgatherdd(vDst | kReadMask, ptr[rSrcPtr + vSrcShift]);
+    vpgatherdd(vmm_dst | kReadMask, ptr[rSrcPtr + vSrcShift]);
 }
 
-void JitKernelBase::gatherdd(const Xbyak::Xmm&   vDst,
-                             const Xbyak::Reg64& rSrcPtr,
-                             const Xbyak::Xmm&   vSrcShift,
-                             const Xbyak::Xmm&   vReadMask,
+void JitKernelBase::gatherdd(const Xmm&   vmm_dst,
+                             const Reg64& rSrcPtr,
+                             const Xmm&   vSrcShift,
+                             const Xmm&   vReadMask,
                              const bool useMask,
                              const bool zeroFill) {
-    if (vDst.getIdx() == vSrcShift.getIdx() || vDst.getIdx() == vReadMask.getIdx() || vSrcShift.getIdx() == vReadMask.getIdx()) {
+    if (vmm_dst.getIdx() == vSrcShift.getIdx() || vmm_dst.getIdx() == vReadMask.getIdx() || vSrcShift.getIdx() == vReadMask.getIdx()) {
         IE_THROW() << "Any pair of the index, mask, or destination registers cannot be the same.";
     }
     if (zeroFill)
-        pxor(vDst, vDst); // Don't use vpxor. It zeros the rest of the YMM register.
+        pxor(vmm_dst, vmm_dst); // Don't use vpxor. It zeros the rest of the YMM register.
 
     if (isValidIsa(x64::avx2)) {
         if (!useMask)
             uni_vpcmpeqd(vReadMask, vReadMask, vReadMask);
 
-        vpgatherdd(vDst, ptr[rSrcPtr + vSrcShift], vReadMask);
+        vpgatherdd(vmm_dst, ptr[rSrcPtr + vSrcShift], vReadMask);
     } else {
         auto rAux = getReg64();
-        Xbyak::Reg32 r32Aux = Xbyak::Reg32(rAux.getIdx());
+        Reg32 r32Aux = Reg32(rAux.getIdx());
         const uint8_t elPerVec = x64::cpu_isa_traits<x64::sse41>::vlen / sizeof(int);
 
         for (uint8_t i = 0; i < elPerVec; i++) {
-            Xbyak::Label lLoopNext;
+            Label lLoopNext;
             if (useMask) {
                 uni_vpextrd(r32Aux, vReadMask, i);
                 cmp(r32Aux, 0); // TODO: check significant bit
                 je(lLoopNext, T_NEAR);
             }
             uni_vpextrd(r32Aux, vSrcShift, i);
-            pinsrd(vDst, ptr[rSrcPtr + rAux], i);
+            pinsrd(vmm_dst, ptr[rSrcPtr + rAux], i);
 
             if (useMask)
                 L(lLoopNext);
@@ -221,30 +342,30 @@ void JitKernelBase::gatherdd(const Xbyak::Xmm&   vDst,
     }
 }
 
-void JitKernelBase::gatherdd(const Xbyak::Ymm&   vDst,
-                             const Xbyak::Reg64& rSrcPtr,
-                             const Xbyak::Ymm&   vSrcShift,
-                             const Xbyak::Ymm&   vReadMask,
+void JitKernelBase::gatherdd(const Ymm&   vmm_dst,
+                             const Reg64& rSrcPtr,
+                             const Ymm&   vSrcShift,
+                             const Ymm&   vReadMask,
                              const bool useMask,
                              const bool zeroFill) {
-    if (vDst.getIdx() == vSrcShift.getIdx() || vDst.getIdx() == vReadMask.getIdx() || vSrcShift.getIdx() == vReadMask.getIdx()) {
+    if (vmm_dst.getIdx() == vSrcShift.getIdx() || vmm_dst.getIdx() == vReadMask.getIdx() || vSrcShift.getIdx() == vReadMask.getIdx()) {
         IE_THROW() << "Any pair of the index, mask, or destination registers cannot be the same.";
     }
     if (isValidIsa(x64::avx2)) {
         if (!useMask)
             uni_vpcmpeqd(vReadMask, vReadMask, vReadMask);
         if (zeroFill)
-            uni_vpxor(vDst, vDst, vDst);
+            uni_vpxor(vmm_dst, vmm_dst, vmm_dst);
 
-        vpgatherdd(vDst, ptr[rSrcPtr + vSrcShift], vReadMask);
+        vpgatherdd(vmm_dst, ptr[rSrcPtr + vSrcShift], vReadMask);
     } else {
-        Xbyak::Xmm xmmDst      = Xbyak::Xmm(vDst.getIdx()),
-                   xmmSrcShft  = Xbyak::Xmm(vSrcShift.getIdx()),
-                   xmmReadMask = Xbyak::Xmm(vReadMask.getIdx());
+        Xmm xmmDst      = Xmm(vmm_dst.getIdx()),
+                   xmmSrcShft  = Xmm(vSrcShift.getIdx()),
+                   xmmReadMask = Xmm(vReadMask.getIdx());
         for (uint8_t i = 0; i < 2; i++) {
             gatherdd(xmmDst, rSrcPtr, xmmSrcShft, xmmReadMask, useMask, zeroFill);
 
-            vperm2f128(vDst, vDst, vDst, 0x1);
+            vperm2f128(vmm_dst, vmm_dst, vmm_dst, 0x1);
             vperm2f128(vSrcShift, vSrcShift, vSrcShift, 0x1);
             if (useMask)
                 vperm2f128(vReadMask, vReadMask, vReadMask, 0x1);
@@ -252,7 +373,7 @@ void JitKernelBase::gatherdd(const Xbyak::Ymm&   vDst,
     }
 }
 
-void JitKernelBase::uni_vpbroadcastd(const Xbyak::Xmm &x, const Xbyak::Operand &op) {
+void JitKernelBase::uni_vpbroadcastd(const Xmm &x, const Operand &op) {
     if (isValidIsa(x64::avx2)) {
         vpbroadcastd(x, op);
     } else if (isValidIsa(x64::avx)) {
@@ -268,14 +389,14 @@ void JitKernelBase::uni_vpbroadcastd(const Xbyak::Xmm &x, const Xbyak::Operand &
     }
 }
 
-void JitKernelBase::uni_vpbroadcastd(const Xbyak::Ymm &x, const Xbyak::Operand &op) {
+void JitKernelBase::uni_vpbroadcastd(const Ymm &x, const Operand &op) {
     if (isValidIsa(x64::avx2)) {
         vpbroadcastd(x, op);
     } else {
         if (op.isMEM()) {
             vbroadcastss(x, op.getAddress());
         } else {
-            const Xbyak::Xmm t(x.getIdx());
+            const Xmm t(x.getIdx());
             if (!t.isEqualIfNotInherited(op)) {
                 vmovss(t, t, op);
             }
@@ -285,8 +406,8 @@ void JitKernelBase::uni_vpbroadcastd(const Xbyak::Ymm &x, const Xbyak::Operand &
     }
 }
 
-void JitKernelBase::fillRestWorkMask(const Xbyak::Opmask& dstMask,
-                                     const Xbyak::Reg64& rWorkRest) {
+void JitKernelBase::fillRestWorkMask(const Opmask& dstMask,
+                                     const Reg64& rWorkRest) {
     auto rOnes = getReg64();
 
     mov(rOnes, 0xFFFFFFFFFFFFFFFF);
@@ -295,15 +416,15 @@ void JitKernelBase::fillRestWorkMask(const Xbyak::Opmask& dstMask,
     kmovq(dstMask, rOnes);
 }
 
-void JitKernelBase::fillRestWorkMask(const Xbyak::Xmm& xmmDstMask,
-                                     const Xbyak::Reg64& rWorkRest,
+void JitKernelBase::fillRestWorkMask(const Xmm& xmmDstMask,
+                                     const Reg64& rWorkRest,
                                      const uint64_t typeSize) {
     if (!one_of(typeSize, 1u, 2u, 4u, 8u)) {
         IE_THROW() << "Could not fill data with type size " << typeSize;
     }
-    Xbyak::Label lEnd;
+    Label lEnd;
     auto r32Ones = getReg32();
-    Xbyak::Reg64 r64Ones(r32Ones.getIdx());
+    Reg64 r64Ones(r32Ones.getIdx());
     auto elPerVec = x64::cpu_isa_traits<x64::sse41>::vlen / typeSize;
 
     mov(r64Ones, 0xFFFFFFFFFFFFFFFF);
@@ -324,22 +445,22 @@ void JitKernelBase::fillRestWorkMask(const Xbyak::Xmm& xmmDstMask,
     L(lEnd);
 }
 
-void JitKernelBase::fillRestWorkMask(const Xbyak::Ymm& ymmDstMask,
-                                     const Xbyak::Reg64& rWorkRest,
+void JitKernelBase::fillRestWorkMask(const Ymm& ymmDstMask,
+                                     const Reg64& rWorkRest,
                                      const uint64_t typeSize) {
     if (!one_of(typeSize, 1u, 2u, 4u, 8u)) {
         IE_THROW() << "Could not fill data with type size " << typeSize;
     }
-    Xbyak::Label lEnd;
+    Label lEnd;
     auto elPerVec = x64::cpu_isa_traits<x64::sse41>::vlen / typeSize;
     auto r32Ones = getReg32();
-    Xbyak::Reg64 r64Ones(r32Ones.getIdx());
-    Xbyak::Xmm xmmDstMask(ymmDstMask.getIdx());
+    Reg64 r64Ones(r32Ones.getIdx());
+    Xmm xmmDstMask(ymmDstMask.getIdx());
 
     mov(r64Ones, 0xFFFFFFFFFFFFFFFF);
     uni_vpxor(ymmDstMask, ymmDstMask, ymmDstMask);
     for (uint8_t i = 0; i < 2; i++) {
-        Xbyak::Label lPerm;
+        Label lPerm;
         for (uint8_t j = 0; j < elPerVec; j++) {
             cmp(rWorkRest, i * elPerVec + j);
             jle(i == 0 ? lEnd : lPerm, T_NEAR);
@@ -362,18 +483,18 @@ void JitKernelBase::fillRestWorkMask(const Xbyak::Ymm& ymmDstMask,
     L(lEnd);
 }
 
-void JitKernelBase::load(const Xbyak::Xmm&     vDst,
-                         const Xbyak::Address& srcAddr,
-                         const Xbyak::Reg64&   rLoadNum,
-                         const size_t          typeSize,
-                         const bool            zeroFilling) {
-    if (!one_of(typeSize, 1u, 2u, 4u, 8u)) {
+void JitKernelBase::load(const Xmm&     vmm_dst,
+                         const Address& srcAddr,
+                         const Reg64&   rLoadNum,
+                         const size_t   typeSize,
+                         const bool     zeroFilling) {
+    if (!one_of(typeSize, 1lu, 2lu, 4lu, 8lu)) {
         IE_THROW() << "Could not load data with type size " << typeSize;
     }
     const uint8_t elPerVec = x64::cpu_isa_traits<x64::sse41>::vlen / typeSize;
-    Xbyak::Label lEnd;
+    Label lEnd;
     if (zeroFilling)
-        pxor(vDst, vDst);
+        pxor(vmm_dst, vmm_dst);
 
     for (uint8_t i = 0; i < elPerVec; i++) {
         cmp(rLoadNum, i);
@@ -381,33 +502,33 @@ void JitKernelBase::load(const Xbyak::Xmm&     vDst,
 
         const size_t offset = i * typeSize;
         if (typeSize == 1)
-            pinsrb(vDst, ptr[srcAddr.getRegExp() + offset], i);
+            pinsrb(vmm_dst, ptr[srcAddr.getRegExp() + offset], i);
         else if (typeSize == 2)
-            pinsrw(vDst, ptr[srcAddr.getRegExp() + offset], i);
+            pinsrw(vmm_dst, ptr[srcAddr.getRegExp() + offset], i);
         else if (typeSize == 4)
-            pinsrd(vDst, ptr[srcAddr.getRegExp() + offset], i);
+            pinsrd(vmm_dst, ptr[srcAddr.getRegExp() + offset], i);
         else if (typeSize == 8)
-            pinsrq(vDst, ptr[srcAddr.getRegExp() + offset], i);
+            pinsrq(vmm_dst, ptr[srcAddr.getRegExp() + offset], i);
     }
     L(lEnd);
 }
 
-void JitKernelBase::load(const Xbyak::Ymm&     vDst,
-                         const Xbyak::Address& srcAddr,
-                         const Xbyak::Reg64&   rLoadNum,
-                         const size_t          typeSize,
-                         const bool            zeroFilling) {
-    if (!one_of(typeSize, 1u, 2u, 4u, 8u)) {
+void JitKernelBase::load(const Ymm&     vmm_dst,
+                         const Address& srcAddr,
+                         const Reg64&   rLoadNum,
+                         const size_t   typeSize,
+                         const bool     zeroFilling) {
+    if (!one_of(typeSize, 1lu, 2lu, 4lu, 8lu)) {
         IE_THROW() << "Could not load data with type size " << typeSize;
     }
     const size_t elPerXmm = x64::cpu_isa_traits<x64::sse41>::vlen / typeSize;
-    Xbyak::Label lEnd;
+    Label lEnd;
     if (zeroFilling)
-        uni_vpxor(vDst, vDst, vDst);
-    Xbyak::Xmm xmmDst(vDst.getIdx());
+        uni_vpxor(vmm_dst, vmm_dst, vmm_dst);
+    Xmm xmmDst(vmm_dst.getIdx());
 
     for (size_t i = 0lu; i < 2lu; i++) {
-        Xbyak::Label lPerm;
+        Label lPerm;
         const size_t idx = i * elPerXmm;
         const size_t offset0 = idx * typeSize;
 
@@ -427,19 +548,19 @@ void JitKernelBase::load(const Xbyak::Ymm&     vDst,
         }
 
         L(lPerm);
-        vperm2f128(vDst, vDst, vDst, 0x1);
+        vperm2f128(vmm_dst, vmm_dst, vmm_dst, 0x1);
     }
     L(lEnd);
 }
 
-void JitKernelBase::store(const Xbyak::Address& dstAddr,
-                          const Xbyak::Xmm&     vSrc,
-                          const Xbyak::Reg64&   rToStoreNum,
+void JitKernelBase::store(const Address& dstAddr,
+                          const Xmm&     vmm_src,
+                          const Reg64&   rToStoreNum,
                           const size_t          typeSize) {
     if (!one_of(typeSize, 1u, 2u, 4u, 8u)) {
         IE_THROW() << "Could not store data with type size " << typeSize;
     }
-    Xbyak::Label lEnd;
+    Label lEnd;
     const size_t elPerVec = x64::cpu_isa_traits<x64::sse41>::vlen / typeSize;
 
     for (size_t i = 0; i < elPerVec; i++) {
@@ -448,31 +569,31 @@ void JitKernelBase::store(const Xbyak::Address& dstAddr,
 
         const size_t offset = i * typeSize;
         if (typeSize == 1) {
-            uni_vpextrb(ptr[dstAddr.getRegExp() + offset], vSrc, i);
+            uni_vpextrb(ptr[dstAddr.getRegExp() + offset], vmm_src, i);
         } else if (typeSize == 2) {
-            uni_vpextrw(ptr[dstAddr.getRegExp() + offset], vSrc, i);
+            uni_vpextrw(ptr[dstAddr.getRegExp() + offset], vmm_src, i);
         } else if (typeSize == 4) {
-            uni_vpextrd(ptr[dstAddr.getRegExp() + offset], vSrc, i);
+            uni_vpextrd(ptr[dstAddr.getRegExp() + offset], vmm_src, i);
         } else if (typeSize == 8) {
-            uni_vpextrq(ptr[dstAddr.getRegExp() + offset], vSrc, i);
+            uni_vpextrq(ptr[dstAddr.getRegExp() + offset], vmm_src, i);
         }
     }
     L(lEnd);
 }
 
-void JitKernelBase::store(const Xbyak::Address& dstAddr,
-                          const Xbyak::Ymm&     vSrc,
-                          const Xbyak::Reg64&   rToStoreNum,
+void JitKernelBase::store(const Address& dstAddr,
+                          const Ymm&     vmm_src,
+                          const Reg64&   rToStoreNum,
                           const size_t          typeSize) {
     if (!one_of(typeSize, 1u, 2u, 4u, 8u)) {
         IE_THROW() << "Could not store data with type size " << typeSize;
     }
-    Xbyak::Label lEnd;
-    Xbyak::Xmm xmmSrc(vSrc.getIdx());
+    Label lEnd;
+    Xmm xmm_src(vmm_src.getIdx());
     const size_t elPerXmm = x64::cpu_isa_traits<x64::sse41>::vlen / typeSize;
 
     for (int i = 0; i < 2; i++) {
-        Xbyak::Label lPerm;
+        Label lPerm;
         const size_t idx = i * elPerXmm;
         const size_t offset0 = idx * typeSize;
 
@@ -482,32 +603,32 @@ void JitKernelBase::store(const Xbyak::Address& dstAddr,
 
             const size_t offset = offset0 + j * typeSize;
             if (typeSize == 8) {
-                uni_vpextrq(ptr[dstAddr.getRegExp() + offset], xmmSrc, j);
+                uni_vpextrq(ptr[dstAddr.getRegExp() + offset], xmm_src, j);
             } else if (typeSize == 4) {
-                uni_vpextrd(ptr[dstAddr.getRegExp() + offset], xmmSrc, j);
+                uni_vpextrd(ptr[dstAddr.getRegExp() + offset], xmm_src, j);
             } else if (typeSize == 2) {
-                uni_vpextrw(ptr[dstAddr.getRegExp() + offset], xmmSrc, j);
+                uni_vpextrw(ptr[dstAddr.getRegExp() + offset], xmm_src, j);
             } else if (typeSize == 1) {
-                uni_vpextrb(ptr[dstAddr.getRegExp() + offset], xmmSrc, j);
+                uni_vpextrb(ptr[dstAddr.getRegExp() + offset], xmm_src, j);
             }
         }
 
         L(lPerm);
-        vperm2f128(vSrc, vSrc, vSrc, 0x1);
+        vperm2f128(vmm_src, vmm_src, vmm_src, 0x1);
     }
     L(lEnd);
 }
 
-void JitKernelBase::memMovDD(const Xbyak::Reg64& rDst,
-                             const Xbyak::Reg64& rSrc,
-                             const Xbyak::Xmm&   vReadMask,
-                             const Xbyak::Xmm&   vSrcShift,
-                             const Xbyak::Reg64& rToStoreNum,
+void JitKernelBase::memMovDD(const Reg64& rDst,
+                             const Reg64& rSrc,
+                             const Xmm&   vReadMask,
+                             const Xmm&   vSrcShift,
+                             const Reg64& rToStoreNum,
                              const bool          useMask,
                              const bool          zeroFill) {
-    Xbyak::Label lEnd;
+    Label lEnd;
     auto rAux = getReg64();
-    Xbyak::Reg32 r32Aux = Xbyak::Reg32(rAux.getIdx());
+    Reg32 r32Aux = Reg32(rAux.getIdx());
     const uint8_t typeSize = sizeof(int);
     const uint8_t elPerVec = x64::cpu_isa_traits<x64::sse41>::vlen / typeSize;
 
@@ -515,12 +636,12 @@ void JitKernelBase::memMovDD(const Xbyak::Reg64& rDst,
         cmp(rToStoreNum, i);
         jle(lEnd, T_NEAR);
 
-        Xbyak::Label lLoopNext;
+        Label lLoopNext;
         if (useMask) {
             uni_vpextrd(r32Aux, vReadMask, i);
             cmp(r32Aux, 0);
             if (zeroFill) {
-                Xbyak::Label lNotZero;
+                Label lNotZero;
                 jne(lNotZero, T_NEAR);
                 mov(ptr[rDst.getReg() + i * typeSize], r32Aux);
                 jmp(lLoopNext, T_NEAR);
@@ -538,23 +659,23 @@ void JitKernelBase::memMovDD(const Xbyak::Reg64& rDst,
     L(lEnd);
 }
 
-void JitKernelBase::memMovDD(const Xbyak::Reg64& rDst,
-                             const Xbyak::Reg64& rSrc,
-                             const Xbyak::Ymm&   vReadMask,
-                             const Xbyak::Ymm&   vSrcShift,
-                             const Xbyak::Reg64& rToStoreNum,
+void JitKernelBase::memMovDD(const Reg64& rDst,
+                             const Reg64& rSrc,
+                             const Ymm&   vReadMask,
+                             const Ymm&   vSrcShift,
+                             const Reg64& rToStoreNum,
                              const bool          useMask,
                              const bool          zeroFill) {
-    Xbyak::Label lEnd;
+    Label lEnd;
     if (isValidIsa(x64::avx2)) {
-        auto vAux = RegistersPool::Reg<Xbyak::Ymm>(registersPool);
+        auto vAux = RegistersPool::Reg<Ymm>(registersPool);
         gatherdd(vAux, rSrc, vSrcShift, vReadMask, useMask, zeroFill);
         store(ptr[rDst], vAux, rToStoreNum, sizeof(int));
     } else if (isValidIsa(x64::avx)) {
         const uint8_t typeSize = sizeof(int);
         const uint8_t elPerXmm = x64::cpu_isa_traits<x64::sse41>::vlen / typeSize;
-        Xbyak::Xmm xmmReadMask  = Xbyak::Xmm(vReadMask.getIdx()),
-                   xmmSrcShft   = Xbyak::Xmm(vSrcShift.getIdx());
+        Xmm xmmReadMask  = Xmm(vReadMask.getIdx()),
+                   xmmSrcShft   = Xmm(vSrcShift.getIdx());
         for (uint8_t i = 0; i < 2; i++) {
             memMovDD(rDst, rSrc, xmmReadMask, xmmSrcShft, rToStoreNum, useMask, zeroFill);
 
@@ -575,3 +696,582 @@ void JitKernelBase::memMovDD(const Xbyak::Reg64& rDst,
     }
     L(lEnd);
 }
+
+void JitKernelBase::load_vector(const Xmm& vmm_dst,
+                               const Address &adr_src,
+                               const ov::element::Type& dst_prc,
+                               const ov::element::Type& src_prc) {
+    Xmm xmmDst = Xmm(vmm_dst.getIdx());
+    Ymm ymmDst = Ymm(vmm_dst.getIdx());
+
+    switch (src_prc) {
+        case ov::element::f64:
+            if (x64::mayiuse(x64::avx512_core) && one_of(dst_prc, ov::element::i64, ov::element::i32, ov::element::f32)) {
+                if (dst_prc == ov::element::i64) {
+                    vcvtpd2qq(vmm_dst, adr_src);
+                } else if (dst_prc == ov::element::i32) {
+                    uni_vcvtpd2dq(vmm_dst.isZMM() ? ymmDst : vmm_dst, adr_src);
+                } else if (dst_prc == ov::element::f32) {
+                    uni_vcvtpd2ps(vmm_dst.isZMM() ? ymmDst : vmm_dst, adr_src);
+                }
+            } else if (!x64::mayiuse(x64::avx512_core) && one_of(dst_prc, ov::element::f32, ov::element::i32)) {
+                if (dst_prc == ov::element::f32) {
+                    uni_vcvtpd2ps(xmmDst, adr_src);
+                } else if (dst_prc == ov::element::i32) {
+                    uni_vcvtpd2dq(xmmDst, adr_src);
+                }
+            } else {
+                uni_vmovups(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::i64:
+            if (x64::mayiuse(x64::avx512_core) && one_of(dst_prc, ov::element::f64, ov::element::f32)) {
+                if (dst_prc == ov::element::f64) {
+                    vcvtqq2pd(vmm_dst, adr_src);
+                } else if (dst_prc == ov::element::f32) {
+                    vcvtqq2ps(vmm_dst.isZMM() ? ymmDst : vmm_dst, adr_src);
+                }
+            } else {
+                uni_vmovups(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::f32:
+            if (dst_prc == ov::element::i32) {
+                uni_vcvtps2dq(vmm_dst, adr_src);
+            } else {
+                uni_vmovups(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::i32:
+            if (dst_prc == ov::element::f64) {
+                uni_vcvtdq2pd(vmm_dst, adr_src);
+            } else if (dst_prc == ov::element::f32) {
+                uni_vcvtdq2ps(vmm_dst, adr_src);
+            } else {
+                uni_vmovups(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::bf16:
+            uni_vpmovzxwd(vmm_dst, adr_src);
+            uni_vpslld(vmm_dst, vmm_dst, 16);
+            break;
+        case ov::element::u16:
+            if (one_of(dst_prc, ov::element::f32, ov::element::i32)) {
+                uni_vpmovzxwd(vmm_dst, adr_src);
+            } else {
+                uni_vmovups(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::i16:
+            if (one_of(dst_prc, ov::element::f32, ov::element::i32)) {
+                uni_vpmovsxwd(vmm_dst, adr_src);
+            } else {
+                uni_vmovups(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::i8:
+            if (one_of(dst_prc, ov::element::f32, ov::element::i32)) {
+                uni_vpmovsxbd(vmm_dst, adr_src);
+            } else {
+                uni_vmovups(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::u8:
+            if (one_of(dst_prc, ov::element::f32, ov::element::i32)) {
+                uni_vpmovzxbd(vmm_dst, adr_src);
+            } else {
+                uni_vmovups(vmm_dst, adr_src);
+            }
+            break;
+        default:
+            IE_THROW() << "Unsupported source precision: " << src_prc;
+    }
+
+    switch (dst_prc) {
+        case ov::element::f32:
+            if (!x64::mayiuse(x64::avx512_core) && (src_prc == ov::element::i64)) {
+                // Do conversion later.
+            }
+            if (one_of(src_prc, ov::element::u8, ov::element::i8, ov::element::i16, ov::element::u16)) {
+                uni_vcvtdq2ps(vmm_dst, vmm_dst);
+            }
+            break;
+        case ov::element::i32:
+            if (x64::mayiuse(x64::avx512_core)) {
+                if (src_prc == ov::element::i64) {
+                    vpmovsqd(vmm_dst, vmm_dst);
+                }
+            } else {
+                if (src_prc == ov::element::i64) {
+                    // Do conversion later.
+                }
+            }
+            if (one_of(src_prc, ov::element::bf16)) {
+                uni_vcvtps2dq(vmm_dst, vmm_dst);
+            }
+            break;
+        case ov::element::i64:
+        case ov::element::f64:
+            break;
+        default:
+            IE_THROW() << "Unsupported destination precision: " << dst_prc;
+    }
+}
+
+void JitKernelBase::load_scalar(const Xmm& vmm_dst,
+                               const Address &adr_src,
+                               const ov::element::Type& dst_prc,
+                               const ov::element::Type& src_prc) {
+    Address src_adr_bcst(adr_src.getBit(), true, adr_src.getRegExp());
+
+    switch (src_prc) {
+        case ov::element::f64:
+            if (x64::mayiuse(x64::avx512_core) && one_of(dst_prc, ov::element::i64, ov::element::i32, ov::element::f32)) {
+                if (dst_prc == ov::element::i64) {
+                    vcvtpd2qq(vmm_dst, src_adr_bcst);
+                } else if (dst_prc == ov::element::i32) {
+                    vcvtpd2dq(vmm_dst, src_adr_bcst);
+                } else if (dst_prc == ov::element::f32) {
+                    vcvtpd2ps(vmm_dst, src_adr_bcst);
+                }
+            } else {
+                uni_vmovsd(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::i64:
+            if (x64::mayiuse(x64::avx512_core) && one_of(dst_prc, ov::element::f64, ov::element::f32)) {
+                if (dst_prc == ov::element::f64) {
+                    vcvtqq2pd(vmm_dst, src_adr_bcst);
+                } else if (dst_prc == ov::element::f32) {
+                    vcvtqq2ps(vmm_dst, src_adr_bcst);
+                }
+            } else {
+                uni_vmovsd(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::f32:
+            if (x64::mayiuse(x64::avx512_core) && one_of(dst_prc, ov::element::f64, ov::element::i32)) {
+                if (dst_prc == ov::element::f64) {
+                    vcvtps2pd(vmm_dst, src_adr_bcst);
+                } else if (dst_prc == ov::element::i32) {
+                    vcvtps2dq(vmm_dst, src_adr_bcst);
+                }
+            } else {
+                uni_vmovss(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::i32:
+            if (x64::mayiuse(x64::avx512_core) && one_of(dst_prc, ov::element::f32, ov::element::f64)) {
+                if (dst_prc == ov::element::f32) {
+                    vcvtdq2ps(vmm_dst, src_adr_bcst);
+                } else if (dst_prc == ov::element::f64) {
+                    vcvtdq2pd(vmm_dst, src_adr_bcst);
+                }
+            } else {
+                uni_vmovss(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::bf16:
+            uni_vpinsrw(vmm_dst, vmm_dst, adr_src, 0);
+            uni_vpslld(vmm_dst, vmm_dst, 16);
+            break;
+        case ov::element::i16:
+            uni_vpinsrw(vmm_dst, vmm_dst, adr_src, 0);
+            uni_vpmovsxwd(vmm_dst, adr_src);
+            break;
+        case ov::element::u16:
+            uni_vpinsrw(vmm_dst, vmm_dst, adr_src, 0);
+            uni_vpmovzxwd(vmm_dst, adr_src);
+            break;
+        case ov::element::i8:
+            pinsrb(vmm_dst, adr_src, 0);
+            uni_vpmovsxbd(vmm_dst, vmm_dst);
+            break;
+        case ov::element::u8:
+            pinsrb(vmm_dst, adr_src, 0);
+            uni_vpmovzxbd(vmm_dst, vmm_dst);
+            break;
+        default:
+            IE_THROW() << "Unsupported source precision: " << src_prc;
+    }
+
+    switch (dst_prc) {
+        case ov::element::f32:
+            if (x64::mayiuse(x64::avx512_core)) {
+                if (one_of(src_prc, ov::element::u8, ov::element::i8, ov::element::u16, ov::element::i16)) {
+                    uni_vcvtdq2ps(vmm_dst, vmm_dst);
+                }
+            } else {
+                if (src_prc == ov::element::f64) {
+                    uni_vcvtpd2ps(vmm_dst, vmm_dst);
+                } else if (src_prc == ov::element::i64) {
+                    // Do conversion later.
+                } else if (one_of(src_prc, ov::element::u8, ov::element::i8, ov::element::u16, ov::element::i16, ov::element::i32)) {
+                    uni_vcvtdq2ps(vmm_dst, vmm_dst);
+                }
+            }
+            break;
+        case ov::element::i32:
+            if (!x64::mayiuse(x64::avx512_core)) {
+                if (src_prc == ov::element::i64) {
+                    // Do conversion later.
+                } else if (one_of(src_prc, ov::element::f32, ov::element::bf16)) {
+                    uni_vcvtps2dq(vmm_dst, vmm_dst);
+                }
+            } else if (src_prc == ov::element::i64) {
+                vpmovsqd(vmm_dst, vmm_dst);
+            }
+            break;
+        case ov::element::i64:
+        case ov::element::f64:
+            break;
+        default:
+            IE_THROW() << "Unsupported destination precision: " << dst_prc;
+    }
+}
+
+void JitKernelBase::load_with_bcst(const Xmm &vmm_dst,
+                                   const Address &adr_src,
+                                   const ov::element::Type& dst_prc,
+                                   const ov::element::Type& src_prc) {
+    Address src_adr_bcst(adr_src.getBit(), true, adr_src.getRegExp());
+
+    switch (src_prc) {
+        case ov::element::f64:
+            if (x64::mayiuse(x64::avx512_core) && one_of(dst_prc, ov::element::i64, ov::element::i32, ov::element::f32)) {
+                if (dst_prc == ov::element::i64) {
+                    vcvtpd2qq(vmm_dst, src_adr_bcst);
+                } else if (dst_prc == ov::element::i32) {
+                    vcvtpd2dq(vmm_dst, src_adr_bcst);
+                } else if (dst_prc == ov::element::f32) {
+                    vcvtpd2ps(vmm_dst, src_adr_bcst);
+                }
+            } else {
+                uni_vbroadcastsd(vmm_dst, adr_src); // does not work with XMM, use vpbroadcastq instead
+            }
+            break;
+        case ov::element::i64:
+            if (x64::mayiuse(x64::avx512_core) && one_of(dst_prc, ov::element::f64, ov::element::f32)) {
+                if (dst_prc == ov::element::f64) {
+                    vcvtqq2pd(vmm_dst, src_adr_bcst);
+                } else if (dst_prc == ov::element::f32) {
+                    vcvtqq2ps(vmm_dst, src_adr_bcst);
+                }
+            } else {
+                uni_vbroadcastsd(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::f32:
+            if (x64::mayiuse(x64::avx512_core) && one_of(dst_prc, ov::element::f64, ov::element::i32)) {
+                if (dst_prc == ov::element::f64) {
+                    vcvtps2pd(vmm_dst, src_adr_bcst);
+                } else if (dst_prc == ov::element::i32) {
+                    vcvtps2dq(vmm_dst, src_adr_bcst);
+                }
+            } else {
+                uni_vbroadcastss(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::i32:
+            if (x64::mayiuse(x64::avx512_core) && one_of(dst_prc, ov::element::f32, ov::element::f64)) {
+                if (dst_prc == ov::element::f32) {
+                    vcvtdq2ps(vmm_dst, src_adr_bcst);
+                } else if (dst_prc == ov::element::f64) {
+                    vcvtdq2pd(vmm_dst, src_adr_bcst);
+                }
+            } else {
+                uni_vbroadcastss(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::bf16:
+            uni_vpinsrw(vmm_dst, vmm_dst, adr_src, 0);
+            uni_vpslld(vmm_dst, vmm_dst, 16);
+            break;
+        case ov::element::i16:
+            uni_vpinsrw(vmm_dst, vmm_dst, adr_src, 0);
+            uni_vpmovsxwd(vmm_dst, adr_src);
+            break;
+        case ov::element::u16:
+            uni_vpinsrw(vmm_dst, vmm_dst, adr_src, 0);
+            uni_vpmovzxwd(vmm_dst, adr_src);
+            break;
+        case ov::element::i8:
+            if (dst_prc == ov::element::i32) {
+                pinsrb(vmm_dst, adr_src, 0);
+                uni_vpmovsxbd(vmm_dst, vmm_dst);
+            } else {
+                vpbroadcastb(vmm_dst, adr_src);
+            }
+            break;
+        case ov::element::u8:
+            if (dst_prc == ov::element::i32) {
+                pinsrb(vmm_dst, adr_src, 0);
+                uni_vpmovzxbd(vmm_dst, vmm_dst);
+            } else {
+                vpbroadcastb(vmm_dst, adr_src);
+            }
+            break;
+        default:
+            IE_THROW() << "Unsupported source precision: " << src_prc;
+    }
+
+    switch (dst_prc) {
+        case ov::element::f32:
+            if (x64::mayiuse(x64::avx512_core)) {
+                if (one_of(src_prc, ov::element::u8, ov::element::i8, ov::element::u16, ov::element::i16)) {
+                    uni_vcvtdq2ps(vmm_dst, vmm_dst);
+                }
+            } else {
+                if (src_prc == ov::element::f64) {
+                    uni_vcvtpd2ps(vmm_dst, vmm_dst);
+                } else if (src_prc == ov::element::i64) {
+                    // Do conversion later.
+                } else if (one_of(src_prc, ov::element::u8, ov::element::i8, ov::element::u16, ov::element::i16, ov::element::i32)) {
+                    uni_vcvtdq2ps(vmm_dst, vmm_dst);
+                }
+            }
+            break;
+        case ov::element::i32:
+            if (!x64::mayiuse(x64::avx512_core)) {
+                if (src_prc == ov::element::i64) {
+                    // Do conversion later.
+                } else if (one_of(src_prc, ov::element::f32, ov::element::bf16)) {
+                    uni_vcvtps2dq(vmm_dst, vmm_dst);
+                }
+            } else if (src_prc == ov::element::i64) {
+                vpmovsqd(vmm_dst, vmm_dst);
+            }
+            break;
+        case ov::element::i64:
+        case ov::element::f64:
+            break;
+        default:
+            IE_THROW() << "Unsupported destination precision: " << dst_prc;
+    }
+}
+
+void JitKernelBase::store_vector(const Address &adr_dst,
+                                 const Xmm &vmm_src,
+                                 const ov::element::Type& dst_prc,
+                                 const ov::element::Type& src_prc) {
+    auto xmm_src = Xmm(vmm_src.getIdx());
+    auto ymm_src = Ymm(vmm_src.getIdx());
+
+    switch (src_prc) {
+        case ov::element::f64:
+            if (dst_prc == ov::element::f32) {
+                uni_vcvtpd2ps(x64::mayiuse(x64::avx512_core) ? ymm_src : xmm_src, vmm_src);
+            } else if (dst_prc == ov::element::i64) {
+                vcvtpd2qq(vmm_src, vmm_src);
+            } else if (dst_prc == ov::element::i32) {
+                vcvtpd2dq(ymm_src, vmm_src);
+            }
+            break;
+        case ov::element::i64:
+            if (dst_prc == ov::element::f32 || dst_prc == ov::element::bf16) {
+                vcvtqq2ps(ymm_src, vmm_src);
+            } else if (dst_prc == ov::element::f64) {
+                vcvtqq2pd(vmm_src, vmm_src);
+            }
+            break;
+        case ov::element::f32:
+            if (dst_prc == ov::element::i64) {
+                vcvtps2qq(vmm_src, ymm_src);
+            } else if ((dst_prc == ov::element::u8 || dst_prc == ov::element::u16) && x64::mayiuse(x64::avx512_core)) {
+                vcvtps2udq(vmm_src, vmm_src);
+            } else if (dst_prc != ov::element::f32 && dst_prc != ov::element::bf16) {
+                uni_vcvtps2dq(vmm_src, vmm_src);
+            }
+            break;
+        case ov::element::i32:
+            if (dst_prc == ov::element::f32 || dst_prc == ov::element::bf16) {
+                uni_vcvtdq2ps(vmm_src, vmm_src);
+            }
+            break;
+        default:
+            IE_THROW() << "Unsupported source precision: " << src_prc;
+    }
+
+    switch (dst_prc) {
+        case ov::element::f64:
+            uni_vmovups(adr_dst, vmm_src);
+            break;
+        case ov::element::f32:
+            if (src_prc.size() == 8) {
+                uni_vmovups(adr_dst, ymm_src);
+            } else {
+                uni_vmovups(adr_dst, vmm_src);
+            }
+            break;
+        case ov::element::i64:
+            uni_vmovups(adr_dst, vmm_src);
+            break;
+        case ov::element::i32:
+            if (src_prc == ov::element::i64) {
+                vpmovsqd(adr_dst, vmm_src);
+            } else if (src_prc == ov::element::f64) {
+                uni_vmovups(adr_dst, ymm_src);
+            } else {
+                uni_vmovups(adr_dst, vmm_src);
+            }
+            break;
+        case ov::element::bf16:
+            if (!vcvtneps2bf16) {
+                IE_THROW() << "Converter for bf16 was not initialized!";
+            }
+            vcvtneps2bf16->emit_code({static_cast<size_t>(ymm_src.getIdx())}, {static_cast<size_t>(ymm_src.getIdx())});
+            vmovdqu16(adr_dst, ymm_src);
+            break;
+        case ov::element::i16:
+            if (x64::mayiuse(x64::avx512_core)) {
+                vpmovsdw(adr_dst, vmm_src);
+            } else {
+                uni_vpackssdw(vmm_src, vmm_src, vmm_src);
+                if (x64::mayiuse(x64::avx)) {
+                    vpermq(ymm_src, ymm_src, 0x08);
+                    uni_vmovdqu(adr_dst, xmm_src);
+                } else {
+                    movq(adr_dst, xmm_src);
+                }
+            }
+            break;
+        case ov::element::u16:
+            if (x64::mayiuse(x64::avx512_core)) {
+                vpmovusdw(adr_dst, xmm_src);
+            } else {
+                uni_vpackusdw(vmm_src, vmm_src, vmm_src);
+                if (x64::mayiuse(x64::avx)) {
+                    vpermq(ymm_src, ymm_src, 0x08);
+                    uni_vmovdqu(adr_dst, xmm_src);
+                } else {
+                    movq(adr_dst, xmm_src);
+                }
+            }
+            break;
+        case ov::element::i8:
+            if (x64::mayiuse(x64::avx512_core)) {
+                if (src_prc == ov::element::i64) {
+                    vpmovsqb(adr_dst, vmm_src);
+                } else {
+                    vpmovsdb(adr_dst, vmm_src);
+                }
+            } else {
+                uni_vpackssdw(vmm_src, vmm_src, vmm_src);
+                if (x64::mayiuse(x64::avx)) {
+                    vpermq(ymm_src, ymm_src, 0x08);
+                }
+                uni_vpacksswb(vmm_src, vmm_src, vmm_src);
+                if (x64::mayiuse(x64::avx)) {
+                    vmovq(adr_dst, xmm_src);
+                } else {
+                    movd(adr_dst, xmm_src);
+                }
+            }
+            break;
+        case ov::element::u8:
+            if (x64::mayiuse(x64::avx512_core)) {
+                if (src_prc == ov::element::i64) {
+                    vpmovusqb(adr_dst, vmm_src);
+                } else {
+                    vpmovusdb(adr_dst, vmm_src);
+                }
+            } else {
+                uni_vpackusdw(vmm_src, vmm_src, vmm_src);
+                if (x64::mayiuse(x64::avx)) {
+                    vpermq(ymm_src, ymm_src, 0x08);
+                }
+                uni_vpackuswb(vmm_src, vmm_src, vmm_src);
+                if (x64::mayiuse(x64::avx)) {
+                    vmovq(adr_dst, xmm_src);
+                } else {
+                    movd(adr_dst, xmm_src);
+                }
+            }
+            break;
+        default:
+            IE_THROW() << "Unsupported destination precision: " << dst_prc;
+    }
+}
+
+void JitKernelBase::store_scalar(const Address &adr_dst,
+                                 const Xmm &vmm_src,
+                                 const ov::element::Type& dst_prc,
+                                 const ov::element::Type& src_prc) {
+    switch (src_prc) {
+        case ov::element::f64:
+            if (dst_prc == ov::element::f32) {
+                uni_vcvtpd2ps(vmm_src, vmm_src);
+            } else if (dst_prc == ov::element::i64) {
+                vcvtpd2qq(vmm_src, vmm_src);
+            } else if (dst_prc == ov::element::i32) {
+                uni_vcvtpd2dq(vmm_src, vmm_src);
+            }
+            break;
+        case ov::element::i64:
+            if (dst_prc == ov::element::f32 || dst_prc == ov::element::bf16) {
+                vcvtqq2ps(vmm_src, vmm_src);
+            } else if (dst_prc == ov::element::i32) {
+                vpmovsqd(vmm_src, vmm_src);
+            }
+            break;
+        case ov::element::f32:
+            if (dst_prc == ov::element::i64) {
+                vcvtps2qq(vmm_src, vmm_src);
+            } else if (dst_prc == ov::element::u8 && x64::mayiuse(x64::avx512_core)) {
+                vcvtps2udq(vmm_src, vmm_src);
+            } else if (dst_prc != ov::element::f32 && dst_prc != ov::element::bf16) {
+                uni_vcvtps2dq(vmm_src, vmm_src);
+            }
+            break;
+        case ov::element::i32:
+            if (dst_prc == ov::element::f32 || dst_prc == ov::element::bf16) {
+                uni_vcvtdq2ps(vmm_src, vmm_src);
+            }
+            break;
+        default:
+            IE_THROW() << "Unsupported source precision: " << src_prc;
+    }
+
+    switch (dst_prc) {
+        case ov::element::f64:
+        case ov::element::i64:
+            uni_vmovsd(adr_dst, vmm_src);
+            break;
+        case ov::element::f32:
+        case ov::element::i32:
+            uni_vmovss(adr_dst, vmm_src);
+            break;
+        case ov::element::bf16:
+            uni_vpsrld(vmm_src, vmm_src, 16);
+            uni_vpextrw(adr_dst, vmm_src, 0x0);
+            break;
+        case ov::element::i16:
+            uni_vpackssdw(vmm_src, vmm_src, vmm_src);
+            uni_vpextrw(adr_dst, vmm_src, 0x0);
+            break;
+        case ov::element::u16:
+            uni_vpackusdw(vmm_src, vmm_src, vmm_src);
+            uni_vpextrw(adr_dst, vmm_src, 0x0);
+            break;
+        case ov::element::i8:
+            if (x64::mayiuse(x64::avx512_core)) {
+                vpmovsdb(vmm_src, vmm_src);
+            } else {
+                uni_vpackssdw(vmm_src, vmm_src, vmm_src);
+                uni_vpacksswb(vmm_src, vmm_src, vmm_src);
+            }
+            uni_vpextrb(adr_dst, vmm_src, 0x0);
+            break;
+        case ov::element::u8:
+            if (x64::mayiuse(x64::avx512_core)) {
+                vpmovusdb(vmm_src, vmm_src);
+            } else {
+                uni_vpackusdw(vmm_src, vmm_src, vmm_src);
+                uni_vpackuswb(vmm_src, vmm_src, vmm_src);
+            }
+            uni_vpextrb(adr_dst, vmm_src, 0);
+            break;
+        default:
+            IE_THROW() << "Unsupported destination precision: " << dst_prc;
+    }
+}
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.hpp
index e39efde753bbbc..ed5b6e02ea354c 100644
--- a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.hpp
+++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2022 Intel Corporation
+// Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -6,49 +6,67 @@
 
 #include "cpu/x64/jit_generator.hpp"
 #include "registers_pool.hpp"
+#include "emitters/x64/jit_bf16_emitters.hpp"
 
 namespace ov {
 namespace intel_cpu {
+namespace kernel {
 
-#define getReg64() RegistersPool::Reg<Xbyak::Reg64>(registersPool)
-#define getReg32() RegistersPool::Reg<Xbyak::Reg32>(registersPool)
-#define getVmm()   RegistersPool::Reg<Vmm>(registersPool)
-#define getMask()  RegistersPool::Reg<Vmask>(registersPool)
+#define getReg64() RegistersPool::Reg<Xbyak::Reg64>(this->registersPool)
+#define getReg32() RegistersPool::Reg<Xbyak::Reg32>(this->registersPool)
+#define getVmm()   RegistersPool::Reg<Vmm>(this->registersPool)
+#define getMask()  RegistersPool::Reg<Vmask>(this->registersPool)
 
 class JitKernelBase: public dnnl::impl::cpu::x64::jit_generator {
 public:
-    JitKernelBase(const char* name) : dnnl::impl::cpu::x64::jit_generator(name) {}
+    JitKernelBase(const char* name, dnnl::impl::cpu::x64::cpu_isa_t max_cpu_isa);
 
-    void uni_vfmsub132ps(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrc, const Xbyak::Operand& op);
+    void uni_vfmsub132ps(const Xbyak::Xmm& vmm_dst, const Xbyak::Xmm& vmm_src, const Xbyak::Operand& op);
 
-    void uni_vfnmadd132ps(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrc, const Xbyak::Operand& op);
+    void uni_vfnmadd132ps(const Xbyak::Xmm& vmm_dst, const Xbyak::Xmm& vmm_src, const Xbyak::Operand& op);
 
-    void uni_vfmsub231ps(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrc, const Xbyak::Operand& op);
+    void uni_vfmsub231ps(const Xbyak::Xmm& vmm_dst, const Xbyak::Xmm& vmm_src, const Xbyak::Operand& op);
 
-    void uni_vpaddd(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrc, const Xbyak::Operand& op) {
-        jit_generator::uni_vpaddd(vDst, vSrc, op);
+    void uni_vpaddd(const Xbyak::Xmm& vmm_dst, const Xbyak::Xmm& vmm_src, const Xbyak::Operand& op) {
+        jit_generator::uni_vpaddd(vmm_dst, vmm_src, op);
     }
 
-    void uni_vpaddd(const Xbyak::Ymm& vDst, const Xbyak::Ymm& vSrc, const Xbyak::Operand& op);
+    void uni_vpaddd(const Xbyak::Ymm& vmm_dst, const Xbyak::Ymm& vmm_src, const Xbyak::Operand& op);
 
-    void uni_vpsubd(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrc, const Xbyak::Operand& op) {
-        jit_generator::uni_vpsubd(vDst, vSrc, op);
+    void uni_vaddpd(const Xbyak::Xmm& vmm_dst, const Xbyak::Operand &op1, const Xbyak::Operand &op2);
+
+    void uni_vpsubd(const Xbyak::Xmm& vmm_dst, const Xbyak::Xmm& vmm_src, const Xbyak::Operand& op) {
+        jit_generator::uni_vpsubd(vmm_dst, vmm_src, op);
     }
 
-    void uni_vpsubd(const Xbyak::Ymm& vDst, const Xbyak::Ymm& vSrc, const Xbyak::Operand& op);
+    void uni_vpsubd(const Xbyak::Ymm& vmm_dst, const Xbyak::Ymm& vmm_src, const Xbyak::Operand& op);
+
+    void uni_vmulpd(const Xbyak::Xmm& vmm_dst, const Xbyak::Operand& op1, const Xbyak::Operand& op2);
+
+    void uni_vdivps(const Xbyak::Xmm& vmm_dst, const Xbyak::Operand& op1, const Xbyak::Operand& op2);
+
+    void uni_vdivpd(const Xbyak::Xmm& vmm_dst, const Xbyak::Operand& op1, const Xbyak::Operand& op2);
+
+    void uni_vandps(const Xbyak::Xmm& vmm_dst, const Xbyak::Xmm& vmm_src, const Xbyak::Operand &op);
 
-    void uni_vdivps(const Xbyak::Xmm& vDst, const Xbyak::Operand& op1, const Xbyak::Operand& op2);
+    void uni_vandpd(const Xbyak::Xmm& vmm_dst, const Xbyak::Xmm& vmm_src, const Xbyak::Operand &op);
 
-    void uni_vandps(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrs, const Xbyak::Operand &op);
+    void uni_vandnps(const Xbyak::Xmm& vmm_dst, const Xbyak::Xmm& vmm_src, const Xbyak::Operand &op);
 
-    void uni_vandnps(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrs, const Xbyak::Operand &op);
+    void uni_vorpd(const Xbyak::Xmm& vmm_dst, const Xbyak::Xmm& vmm_src, const Xbyak::Operand &op);
+
+    void uni_vcmppd(const Xbyak::Xmm& vmm_dst, const Xbyak::Xmm& vmm_src, const Xbyak::Operand &op, const uint8_t imm);
+
+    void uni_vmaxpd(const Xbyak::Xmm& vmm_dst, const Xbyak::Operand &op1, const Xbyak::Operand &op2);
+
+    void uni_vminpd(const Xbyak::Xmm& vmm_dst, const Xbyak::Operand &op1, const Xbyak::Operand &op2);
 
     void uni_kmovd(const Xbyak::Opmask& kDst, const Xbyak::Opmask& kSrc) {
         kmovd(kDst, kSrc);
     }
 
-    void uni_kmovd(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrc) {
-        uni_vmovups(vDst, vSrc);
+    void uni_kmovd(const Xbyak::Xmm& vmm_dst, const Xbyak::Xmm& vmm_src) {
+        uni_vmovups(vmm_dst, vmm_src);
     }
 
     void uni_kandd(const Xbyak::Opmask& kDst, const Xbyak::Opmask& kSrc1, const Xbyak::Opmask& kSrc2) {
@@ -59,81 +77,127 @@ class JitKernelBase: public dnnl::impl::cpu::x64::jit_generator {
         uni_vandps(kDst, kSrc1, kSrc2);
     }
 
-    void uni_vpbroadcastd(const Xbyak::Xmm &x, const Xbyak::Operand &op);
-
-    void uni_vpbroadcastd(const Xbyak::Ymm &x, const Xbyak::Operand &op);
-
-    void gatherdd(const Xbyak::Xmm&    vDst,
-                  const Xbyak::Reg64&  rSrcPtr,
-                  const Xbyak::Xmm&    vSrcShift,
-                  const Xbyak::Opmask& kReadMask,
-                  const bool useMask   = true,
-                  const bool zeroFill  = false);
-
-    void gatherdd(const Xbyak::Xmm&   vDst,
-                  const Xbyak::Reg64& rSrcPtr,
-                  const Xbyak::Xmm&   vSrcShift,
-                  const Xbyak::Xmm&   vReadMask,
-                  const bool useMask  = true,
-                  const bool zeroFill = false);
-
-    void gatherdd(const Xbyak::Ymm&   vDst,
-                  const Xbyak::Reg64& rSrcPtr,
-                  const Xbyak::Ymm&   vSrcShift,
-                  const Xbyak::Ymm&   vReadMask,
-                  const bool useMask  = true,
-                  const bool zeroFill = false);
-
-    void fillRestWorkMask(const Xbyak::Opmask& kDstMask,
-                          const Xbyak::Reg64& rWorkRest);
-
-    void fillRestWorkMask(const Xbyak::Xmm& ymmDstMask,
-                          const Xbyak::Reg64& rWorkRest,
-                          const uint64_t typeSize = 4);
-
-    void fillRestWorkMask(const Xbyak::Ymm& ymmDstMask,
-                          const Xbyak::Reg64& rWorkRest,
-                          const uint64_t typeSize = 4);
-
-    void load(const Xbyak::Xmm&     vDst,
-              const Xbyak::Address& srcAddr,
-              const Xbyak::Reg64&   rLoadNum,
-              const size_t          typeSize,
-              const bool zeroFill = false);
-
-    void load(const Xbyak::Ymm&     vDst,
-              const Xbyak::Address& srcAddr,
-              const Xbyak::Reg64&   rLoadNum,
-              const size_t          typeSize,
-              const bool zeroFill = false);
-
-    void store(const Xbyak::Address& dstAddr,
-               const Xbyak::Xmm&     vSrc,
-               const Xbyak::Reg64&   rToStoreNum,
-               const size_t          typeSize);
-
-    void store(const Xbyak::Address& dstAddr,
-               const Xbyak::Ymm&     vSrc,
-               const Xbyak::Reg64&   rToStoreNum,
-               const size_t          typeSize);
+    void uni_vpbroadcastd(const Xbyak::Xmm &vmm_dst, const Xbyak::Operand &op);
+
+    void uni_vpbroadcastd(const Xbyak::Ymm &vmm_dst, const Xbyak::Operand &op);
+
+    void uni_vcvtpd2dq(const Xbyak::Xmm& vmm_dst, const Xbyak::Operand &op);
+
+    void uni_vcvtpd2ps(const Xbyak::Xmm& vmm_dst, const Xbyak::Operand &op);
+
+    void gatherdd(
+            const Xbyak::Xmm&    vmm_dst,
+            const Xbyak::Reg64&  rSrcPtr,
+            const Xbyak::Xmm&    vSrcShift,
+            const Xbyak::Opmask& kReadMask,
+            const bool useMask   = true,
+            const bool zeroFill  = false);
+
+    void gatherdd(
+            const Xbyak::Xmm&   vmm_dst,
+            const Xbyak::Reg64& rSrcPtr,
+            const Xbyak::Xmm&   vSrcShift,
+            const Xbyak::Xmm&   vReadMask,
+            const bool useMask  = true,
+            const bool zeroFill = false);
+
+    void gatherdd(
+            const Xbyak::Ymm&   vmm_dst,
+            const Xbyak::Reg64& rSrcPtr,
+            const Xbyak::Ymm&   vSrcShift,
+            const Xbyak::Ymm&   vReadMask,
+            const bool useMask  = true,
+            const bool zeroFill = false);
+
+    void fillRestWorkMask(
+            const Xbyak::Opmask& kDstMask,
+            const Xbyak::Reg64& rWorkRest);
+
+    void fillRestWorkMask(
+            const Xbyak::Xmm& ymmDstMask,
+            const Xbyak::Reg64& rWorkRest,
+            const uint64_t typeSize = 4);
+
+    void fillRestWorkMask(
+            const Xbyak::Ymm& ymmDstMask,
+            const Xbyak::Reg64& rWorkRest,
+            const uint64_t typeSize = 4);
+
+    void load(
+            const Xbyak::Xmm&     vmm_dst,
+            const Xbyak::Address& adr_src,
+            const Xbyak::Reg64&   rLoadNum,
+            const size_t          typeSize,
+            const bool zeroFill = false);
+
+    void load(
+            const Xbyak::Ymm&     vmm_dst,
+            const Xbyak::Address& adr_src,
+            const Xbyak::Reg64&   rLoadNum,
+            const size_t          typeSize,
+            const bool zeroFill = false);
+
+    void store(
+            const Xbyak::Address& dstAddr,
+            const Xbyak::Xmm&     vmm_src,
+            const Xbyak::Reg64&   rToStoreNum,
+            const size_t          typeSize);
+
+    void store(
+            const Xbyak::Address& dstAddr,
+            const Xbyak::Ymm&     vmm_src,
+            const Xbyak::Reg64&   rToStoreNum,
+            const size_t          typeSize);
 
     // Makes gather from memory under the vReadMask and writes to the memory m128.
-    void memMovDD(const Xbyak::Reg64& rDst,
-                  const Xbyak::Reg64& rSrc,
-                  const Xbyak::Xmm&   vReadMask,
-                  const Xbyak::Xmm&   vSrcShift,
-                  const Xbyak::Reg64& rToStoreCounter,
-                  const bool useMask  = true,
-                  const bool zeroFill = false);
+    void memMovDD(
+            const Xbyak::Reg64& rDst,
+            const Xbyak::Reg64& rSrc,
+            const Xbyak::Xmm&   vReadMask,
+            const Xbyak::Xmm&   vSrcShift,
+            const Xbyak::Reg64& rToStoreCounter,
+            const bool useMask  = true,
+            const bool zeroFill = false);
 
     // Makes gather from the memory under the vReadMask and writes to the memory m256.
-    void memMovDD(const Xbyak::Reg64& rDst,
-                  const Xbyak::Reg64& rSrc,
-                  const Xbyak::Ymm&   vReadMask,
-                  const Xbyak::Ymm&   vSrcShift,
-                  const Xbyak::Reg64& rToStoreCounter,
-                  const bool useMask  = true,
-                  const bool zeroFill = false);
+    void memMovDD(
+            const Xbyak::Reg64& rDst,
+            const Xbyak::Reg64& rSrc,
+            const Xbyak::Ymm&   vReadMask,
+            const Xbyak::Ymm&   vSrcShift,
+            const Xbyak::Reg64& rToStoreCounter,
+            const bool useMask  = true,
+            const bool zeroFill = false);
+
+    void load_vector(
+            const Xbyak::Xmm& vmm_dst,
+            const Xbyak::Address &srcAdr,
+            const ov::element::Type& dstPrc,
+            const ov::element::Type& srcPrc);
+
+    void load_scalar(
+            const Xbyak::Xmm& vmm_dst,
+            const Xbyak::Address &srcAdr,
+            const ov::element::Type& dstPrc,
+            const ov::element::Type& srcPrc);
+
+    void load_with_bcst(
+            const Xbyak::Xmm& vmm_dst,
+            const Xbyak::Address &srcAdr,
+            const ov::element::Type& dstPrc,
+            const ov::element::Type& srcPrc);
+
+    void store_vector(
+            const Xbyak::Address &dstAdr,
+            const Xbyak::Xmm& vmm_src,
+            const ov::element::Type& dstPrc,
+            const ov::element::Type& srcPrc);
+
+    void store_scalar(
+            const Xbyak::Address &dstAdr,
+            const Xbyak::Xmm& vmm_src,
+            const ov::element::Type& dstPrc,
+            const ov::element::Type& srcPrc);
 
 protected:
     inline bool isValidIsa(dnnl::impl::cpu::x64::cpu_isa_t isa) {
@@ -142,6 +206,8 @@ class JitKernelBase: public dnnl::impl::cpu::x64::jit_generator {
 
     RegistersPool::Ptr registersPool;
 
+    std::shared_ptr<jit_uni_vcvtneps2bf16> vcvtneps2bf16;
+
     enum {
         // Comparison predicate operand (immediate byte) for single-precision floating-point values.
         CMP_EQ_PS = 0, // Equal (ordered, non-signaling)
@@ -155,5 +221,41 @@ class JitKernelBase: public dnnl::impl::cpu::x64::jit_generator {
     };
 };
 
+template<typename CompileParams, typename CallArgs>
+class JitKernel : public JitKernelBase {
+public:
+    using KernelFunc = void (*)(const CallArgs *);
+
+    explicit JitKernel(const char* name, const CompileParams& jcp, dnnl::impl::cpu::x64::cpu_isa_t max_cpu_isa)
+            : JitKernelBase{name, max_cpu_isa}, jcp{jcp}, func{nullptr} {}
+    ~JitKernel() override = default;
+
+    dnnl::impl::status_t create_kernel() override {
+        const dnnl::impl::status_t code = jit_generator::create_kernel();
+        if (code != dnnl::impl::status::success) {
+            IE_THROW() << "Could not create kernel. Error code: " << std::to_string(code) << ". " <<
+                       "Xbyak error code: " << Xbyak::ConvertErrorToString(Xbyak::GetError());
+        }
+        func = (decltype(func))jit_ker();
+        return code;
+    }
+
+    void operator()(const CallArgs* args) const {
+        assert(func);
+        func(args);
+    }
+
+    void operator()(const CallArgs& args) const {
+        this->operator()(&args);
+    }
+
+protected:
+    CompileParams jcp;
+
+private:
+    KernelFunc func;
+};
+
+} // namespace kernel
 } // namespace intel_cpu
 } // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/reduce.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/reduce.cpp
new file mode 100644
index 00000000000000..cd64ee05706c0c
--- /dev/null
+++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/reduce.cpp
@@ -0,0 +1,1915 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "reduce.hpp"
+#include "utils/bfloat16.hpp"
+#include <ie_ngraph_utils.hpp>
+
+using namespace ov::intel_cpu::kernel;
+using namespace dnnl::impl::utils;
+using namespace dnnl::impl::cpu;
+using namespace Xbyak;
+
+#define GET_OFF(field) offsetof(JitReduceCallArgs, field)
+#define GET_OFF_POST(field) offsetof(JitReducePostCallArgs, field)
+
+
+static inline bool isFloatCompatible(const ov::element::Type& type) {
+    return ov::intel_cpu::one_of(type, ov::element::f32, ov::element::bf16, ov::element::f64);
+}
+
+///////////////////////////////
+///// JitReduceKernelBase /////
+///////////////////////////////
+
+template<typename CallArgs>
+JitReduceKernelBase<CallArgs>::JitReduceKernelBase(const char* name, const JitReduceConfigParams& jcp, x64::cpu_isa_t isa)
+        : JitKernel<JitReduceConfigParams, CallArgs>(name, jcp, isa) {
+    exec_el_type = jcp.src_el_type;
+    if (exec_el_type.size() <= 4) {
+        exec_el_type = ov::element::f32;
+    } else if (exec_el_type == ov::element::u64) {
+        exec_el_type = ov::element::i64;
+    }
+
+    planar_layout = one_of(jcp.layout, ReduceLayoutType::reduce_ncsp, ReduceLayoutType::reduce_nspc);
+
+    if (one_of(ov::element::bf16, exec_el_type, jcp.src_el_type, jcp.dst_el_type)) {
+        this->vcvtneps2bf16 = std::make_shared<jit_uni_vcvtneps2bf16>(this, isa);
+    }
+    if (jcp.reduce_mode == Algorithm::ReduceMax) {
+        max_emitter = std::make_shared<ov::intel_cpu::jit_maximum_emitter>(this, isa, InferenceEngine::details::convertPrecision(exec_el_type));
+    }
+    if (jcp.reduce_mode == Algorithm::ReduceMin) {
+        min_emitter = std::make_shared<ov::intel_cpu::jit_minimum_emitter>(this, isa, InferenceEngine::details::convertPrecision(exec_el_type));
+    }
+    if (one_of(jcp.reduce_mode, Algorithm::ReduceL2, Algorithm::ReduceSumSquare, Algorithm::ReduceProd)) {
+        mul_emitter = std::make_shared<ov::intel_cpu::jit_multiply_emitter>(this, isa, InferenceEngine::details::convertPrecision(exec_el_type));
+    }
+}
+
+////////// FLOAT 32 //////////
+template<typename CallArgs>
+void JitReduceKernelBase<CallArgs>::horiz_ps(const Xmm& vmm_dst, const Operand& op) {
+    switch (this->jcp.reduce_mode) {
+        case Algorithm::ReduceAnd:
+            this->uni_vandps(vmm_dst, vmm_dst, op);
+            break;
+        case Algorithm::ReduceL1:
+        case Algorithm::ReduceL2:
+        case Algorithm::ReduceLogSum:
+        case Algorithm::ReduceMean:
+        case Algorithm::ReduceSum:
+        case Algorithm::ReduceSumSquare:
+        case Algorithm::ReduceLogSumExp:
+            this->uni_vaddps(vmm_dst, vmm_dst, op);
+            break;
+        case Algorithm::ReduceMax:
+            this->uni_vmaxps(vmm_dst, vmm_dst, op);
+            break;
+        case Algorithm::ReduceMin:
+            this->uni_vminps(vmm_dst, vmm_dst, op);
+            break;
+        case Algorithm::ReduceOr:
+            this->uni_vorps(vmm_dst, vmm_dst, op);
+            break;
+        case Algorithm::ReduceProd:
+            this->uni_vmulps(vmm_dst, vmm_dst, op);
+            break;
+        default:
+            IE_THROW() << "Unsupported reduce mode '" << algToString(this->jcp.reduce_mode) << "'";
+    }
+}
+
+template <typename CallArgs>
+template <x64::cpu_isa_t isa>
+void JitReduceKernelBase<CallArgs>::horiz_reduce_store_ps(const Xmm& vmm_dst, const ov::element::Type& dst_el_type, bool load_embedded) {
+    auto xmm_aux_1 = RegistersPool::Reg<Xmm>(this->registersPool);
+    auto xmm_aux_2 = RegistersPool::Reg<Xmm>(this->registersPool);
+
+    if (isa == x64::avx512_core) {
+        auto zmm_dst = Zmm(vmm_dst.getIdx());
+        auto ymm_dst = Ymm(vmm_dst.getIdx());
+        auto ymm_aux_1 = Ymm(xmm_aux_1.getIdx());
+
+        this->vextractf64x4(ymm_aux_1, zmm_dst, 1);
+        this->horiz_ps(ymm_aux_1, ymm_dst);
+        this->vextractf128(xmm_aux_2, ymm_aux_1, 1);
+        this->horiz_ps(xmm_aux_1, xmm_aux_2);
+    } else if (isa == x64::avx2) {
+        auto ymm_dst = Ymm(vmm_dst.getIdx());
+        auto xmm_dst = Xmm(vmm_dst.getIdx());
+
+        this->vextractf128(xmm_aux_1, ymm_dst, 1);
+        this->horiz_ps(xmm_aux_1, xmm_dst);
+    } else if (isa == x64::sse41) {
+        auto xmm_dst = Xmm(vmm_dst.getIdx());
+
+        if (one_of(this->jcp.reduce_mode, Algorithm::ReduceL1, Algorithm::ReduceL2, Algorithm::ReduceLogSum, Algorithm::ReduceMean,
+                                          Algorithm::ReduceSum, Algorithm::ReduceSumSquare, Algorithm::ReduceLogSumExp)) {
+            this->uni_vhaddps(xmm_aux_1, xmm_dst, xmm_dst);
+            this->uni_vhaddps(xmm_aux_1, xmm_aux_1, xmm_aux_1);
+        } else {
+            this->uni_vshufps(xmm_aux_1, xmm_dst, xmm_dst, 0b00001110);
+            this->horiz_ps(xmm_aux_1, xmm_dst);
+            this->uni_vshufps(xmm_aux_2, xmm_aux_1, xmm_aux_1, 0b00000001);
+            this->horiz_ps(xmm_aux_1, xmm_aux_2);
+        }
+    }
+
+    if (isa != x64::sse41) {
+        if (one_of(this->jcp.reduce_mode, Algorithm::ReduceL1, Algorithm::ReduceL2, Algorithm::ReduceLogSum, Algorithm::ReduceMean,
+                                          Algorithm::ReduceSum, Algorithm::ReduceSumSquare, Algorithm::ReduceLogSumExp)) {
+            this->uni_vhaddps(xmm_aux_1, xmm_aux_1, xmm_aux_1);
+            this->uni_vhaddps(xmm_aux_1, xmm_aux_1, xmm_aux_1);
+        } else {
+            this->uni_vshufps(xmm_aux_2, xmm_aux_1, xmm_aux_1, 0b00001110);
+            this->horiz_ps(xmm_aux_1, xmm_aux_2);
+            this->uni_vshufps(xmm_aux_2, xmm_aux_1, xmm_aux_1, 0b00000001);
+            this->horiz_ps(xmm_aux_1, xmm_aux_2);
+        }
+    }
+
+    auto trg_el_type = dst_el_type;
+    Reg64 trg_ptr = reg_dst;
+    if (this->jcp.fuse_low_precision && (post_reduce || post_ops_fusing)) {
+        trg_el_type = ov::element::f32; // TODO i64 fusing
+        trg_ptr = reg_src;
+    }
+    if (load_embedded) {
+        if (isa == x64::avx512_core && exec_el_type == trg_el_type) {
+            this->horiz_ps(xmm_aux_1, this->ptr_b[trg_ptr]);
+        } else {
+            this->load_scalar(xmm_aux_2, this->ptr[trg_ptr], exec_el_type, trg_el_type);
+            this->horiz_ps(xmm_aux_1, xmm_aux_2);
+        }
+    }
+    this->store_scalar(this->ptr[trg_ptr], xmm_aux_1, trg_el_type, exec_el_type);
+}
+
+////////// INTEGER 64 //////////
+template<typename CallArgs>
+template <x64::cpu_isa_t isa>
+void JitReduceKernelBase<CallArgs>::horiz_qq(const Xmm& vmm_dst, const Operand& op) {
+    using Vmm = typename conditional3<isa == x64::sse41, Xmm, isa == x64::avx2, Ymm, Zmm>::type;
+
+    switch (this->jcp.reduce_mode) {
+        case Algorithm::ReduceAnd:
+            this->uni_vandpd(vmm_dst, vmm_dst, op);
+            break;
+        case Algorithm::ReduceL1:
+        case Algorithm::ReduceL2:
+        case Algorithm::ReduceLogSum:
+        case Algorithm::ReduceMean:
+        case Algorithm::ReduceSum:
+        case Algorithm::ReduceSumSquare:
+        case Algorithm::ReduceLogSumExp:
+            this->uni_vpaddq(vmm_dst, vmm_dst, op);
+            break;
+        case Algorithm::ReduceMax:
+            if (isa == x64::avx512_core) {
+                this->vpmaxsq(vmm_dst, vmm_dst, op);
+            } else {
+                auto vmm_aux_0 = getVmm();
+                if (op.isMEM()) {
+                    max_emitter->emit_code({vmm_dst.getIdx()}, {vmm_dst.getIdx()}, {vmm_aux_0.getIdx()}, {op.getIdx()});
+                } else {
+                    max_emitter->emit_code({vmm_dst.getIdx(), op.getIdx()}, {vmm_dst.getIdx()}, {vmm_aux_0.getIdx()});
+                }
+            }
+            break;
+        case Algorithm::ReduceMin:
+            if (isa == x64::avx512_core) {
+                this->vpminsq(vmm_dst, vmm_dst, op);
+            } else {
+                auto vmm_aux_0 = getVmm();
+                if (op.isMEM()) {
+                    min_emitter->emit_code({vmm_dst.getIdx()}, {vmm_dst.getIdx()}, {vmm_aux_0.getIdx()}, {op.getIdx()});
+                } else {
+                    min_emitter->emit_code({vmm_dst.getIdx(), op.getIdx()}, {vmm_dst.getIdx()}, {vmm_aux_0.getIdx()});
+                }
+            }
+            break;
+        case Algorithm::ReduceOr:
+            this->uni_vorpd(vmm_dst, vmm_dst, op);
+            break;
+        case Algorithm::ReduceProd:
+            if (isa == x64::avx512_core) {
+                this->vpmullq(vmm_dst, vmm_dst, op);
+            } else {
+                auto vmm_aux_0 = getVmm();
+                auto vmm_aux_1 = getVmm();
+                if (op.isMEM()) {
+                    mul_emitter->emit_code({vmm_dst.getIdx()}, {vmm_dst.getIdx()}, {vmm_aux_0.getIdx(), vmm_aux_1.getIdx()}, {op.getIdx()});
+                } else {
+                    mul_emitter->emit_code({vmm_dst.getIdx(), op.getIdx()}, {vmm_dst.getIdx()}, {vmm_aux_0.getIdx(), vmm_aux_1.getIdx()});
+                }
+            }
+            break;
+        default:
+            IE_THROW() << "Unsupported reduce mode '" << algToString(this->jcp.reduce_mode) << "'";
+    }
+}
+
+template <typename CallArgs>
+template <x64::cpu_isa_t isa>
+void JitReduceKernelBase<CallArgs>::horiz_reduce_store_qq(const Xmm& vmm_dst, const ov::element::Type& dst_el_type, bool load_embedded) {
+    auto xmm_aux_1 = RegistersPool::Reg<Xmm>(this->registersPool);
+    auto xmm_aux_2 = RegistersPool::Reg<Xmm>(this->registersPool);
+
+    if (isa == x64::avx512_core) {
+        auto zmm_dst = Zmm(vmm_dst.getIdx());
+        auto ymm_dst = Ymm(vmm_dst.getIdx());
+        auto ymm_aux_1 = Ymm(xmm_aux_1.getIdx());
+
+        this->vextractf64x4(ymm_aux_1, zmm_dst, 1);
+        this->horiz_qq<isa>(ymm_aux_1, ymm_dst);
+        this->vextractf128(xmm_aux_2, ymm_aux_1, 1);
+        this->horiz_qq<isa>(xmm_aux_1, xmm_aux_2);
+        this->vshufpd(xmm_aux_2, xmm_aux_1, xmm_aux_1, 0b00000001);
+        this->horiz_qq<isa>(xmm_aux_1, xmm_aux_2);
+    } else if (isa == x64::avx2) {
+        auto ymm_dst = Ymm(vmm_dst.getIdx());
+        auto xmm_dst = Xmm(vmm_dst.getIdx());
+
+        this->vextractf128(xmm_aux_1, ymm_dst, 1);
+        this->horiz_qq<isa>(xmm_aux_1, xmm_dst);
+        this->vshufpd(xmm_aux_2, xmm_aux_1, xmm_aux_1, 0b00000001);
+        this->horiz_qq<isa>(xmm_aux_1, xmm_aux_2);
+    } else if (isa == x64::sse41) {
+        auto xmm_dst = Xmm(vmm_dst.getIdx());
+
+        this->vshufpd(xmm_aux_1, xmm_dst, xmm_dst, 0b00000001);
+        this->horiz_qq<isa>(xmm_aux_1, xmm_dst);
+    }
+
+    auto trg_el_type = dst_el_type;
+    Reg64 trg_ptr = reg_dst;
+    if (this->jcp.fuse_low_precision && (post_reduce || post_ops_fusing)) {
+        trg_el_type = ov::element::f32; // TODO i64 fusing
+        trg_ptr = reg_src;
+    }
+    if (load_embedded) {
+        if (isa == x64::avx512_core && exec_el_type == trg_el_type) {
+            this->horiz_qq<isa>(xmm_aux_1, this->ptr_b[trg_ptr]);
+        } else {
+            this->load_scalar(xmm_aux_2, this->ptr[trg_ptr], exec_el_type, trg_el_type);
+            this->horiz_qq<isa>(xmm_aux_1, xmm_aux_2);
+        }
+    }
+    this->store_scalar(this->ptr[trg_ptr], xmm_aux_1, trg_el_type, exec_el_type);
+}
+
+///////////////////////////////
+/////// JitReduceKernel ///////
+///////////////////////////////
+
+template <x64::cpu_isa_t isa>
+JitReduceKernel<isa>::JitReduceKernel(const JitReduceConfigParams &jcp) : JitReduceKernelBase<JitReduceCallArgs>(jit_name(), jcp, isa) {
+    loop_step = vlen / exec_el_type.size();
+    if (isa == x64::sse41) {
+        loop_step *= 2;
+    }
+
+    if (jcp.reduce_mode == Algorithm::ReduceLogSumExp) {
+        exp_injector = std::make_shared<x64::jit_uni_eltwise_injector_f32<isa>>(this, dnnl::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.f);
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::generate() {
+    this->preamble();
+
+    registersPool = RegistersPool::create(isa, {rax, rcx, rsp, rdi, k0});
+
+    reg_src         = getReg64();
+    reg_dst         = getReg64();
+    reg_work_amount = getReg64();
+    reg_work_batch  = getReg64();
+    mov(reg_src, ptr[reg_params + GET_OFF(src)]);
+    mov(reg_dst, ptr[reg_params + GET_OFF(dst)]);
+    mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]);
+    mov(reg_work_batch, ptr[reg_params + GET_OFF(work_batch)]);
+
+    reg_reduce_stride = getReg64();
+    v_src = getVmm();
+    v_dst = getVmm();
+
+    if (jcp.reduce_mode == Algorithm::ReduceL1 && !(isa == x64::avx512_core && exec_el_type == ov::element::i64)) {
+        v_abs_mask = getVmm();
+    }
+    if (isa == x64::sse41) {
+        v_dst_aux = getVmm();
+    }
+
+    if (planar_layout) {
+        reg_reduce_w = getReg64();
+        mov(reg_reduce_w, ptr[reg_params + GET_OFF(reduce_w)]);
+    }
+    if (one_of(jcp.reduce_mode, Algorithm::ReduceAnd, Algorithm::ReduceL1, Algorithm::ReduceMax,
+                                Algorithm::ReduceMin, Algorithm::ReduceProd, Algorithm::ReduceOr)) { // TODO ReduceProd ?
+        reg_table = getReg64();
+        mov(reg_table, l_table);
+    }
+    if (one_of(jcp.reduce_mode, Algorithm::ReduceAnd, Algorithm::ReduceOr)) {
+        v_zero = getVmm();
+        uni_vpxor(v_zero, v_zero, v_zero);
+    }
+    if (jcp.reduce_mode == Algorithm::ReduceOr) {
+        v_ones = getVmm();
+        uni_vmovups(v_ones, table_val(0));
+    }
+
+    reduce_main();
+    reduce_tail();
+
+    registersPool.reset();
+
+    this->postamble();
+
+    if (vcvtneps2bf16) {
+        vcvtneps2bf16->emit_data();
+    }
+    if (max_emitter) {
+        max_emitter->emit_data();
+    }
+    if (min_emitter) {
+        min_emitter->emit_data();
+    }
+    if (mul_emitter) {
+        mul_emitter->emit_data();
+    }
+    if (one_of(jcp.reduce_mode, Algorithm::ReduceAnd, Algorithm::ReduceL1, Algorithm::ReduceMax,
+                                Algorithm::ReduceMin, Algorithm::ReduceProd, Algorithm::ReduceOr)) {
+        prepare_aux_table();
+    } else if (jcp.reduce_mode == Algorithm::ReduceLogSumExp) {
+        exp_injector->prepare_table();
+    }
+}
+
+template <x64::cpu_isa_t isa>
+inline void JitReduceKernel<isa>::reduce_main() {
+    // ================================================================
+    // ***isa: AVX512***
+    // ReduceAnd (Logical And)
+    // step 1: init dst 0x3f800000 (1.0f)
+    //              aux 0x3f800000 (1.0f)
+    //             zero 0x00000000 (0.0f)
+    // step 2: if src equals 0, set mask bit 0, else set mask bit 1
+    // step 3: src = mask bit == 0 ? zero : aux
+    // step 4: dst = dst & src
+    //                  src    mask_bit    new_src    dst    new_dst
+    //         case 1    ~0        1         1.0f     1.0f     1.0f
+    //         case 2     0        0         0.0f     1.0f     0.0f
+    //         case 3    ~0        1         1.0f     0.0f     0.0f
+    //         case 4     0        0         0.0f     0.0f     0.0f
+    // step 5: loop: offset src, and do step 2 and step 3
+    //
+    // ReduceOr (Logical Or)
+    // step 1: init dst 0x00000000 (0.0f)
+    //              aux 0x3f800000 (1.0f)
+    //             zero 0x00000000 (0.0f)
+    // step 2: if src equals 0, set mask bit 0, else set mask bit 1
+    // step 3: src = mask bit == 0 ? zero : aux
+    // step 4: dst = dst | src
+    //                  src    mask_bit    new_src    dst    new_dst
+    //         case 1     0        0         0.0f     0.0f     0.0f
+    //         case 2    ~0        1         1.0f     0.0f     1.0f
+    //         case 3     0        0         0.0f     1.0f     1.0f
+    //         case 4    ~0        1         1.0f     1.0f     1.0f
+    // step 5: loop: offset src, and do step 2 and step 3
+    // ================================================================
+    // ***isa: OTHER***
+    // ReduceAnd (Logical And)
+    // step 1: init dst 0x3f800000 (1.0f)
+    // step 2: if src equals 0, set it 0x00000000, else set 0xffffffff
+    // step 3: dst = dst & src
+    //         0x3f800000 = 0x3f800000 & 0xffffffff (result: 1.0f)
+    //         0x00000000 = 0x3f800000 & 0x00000000 (result: 0.0f)
+    //         0x00000000 = 0x00000000 & 0xffffffff (result: 0.0f)
+    //         0x00000000 = 0x00000000 & 0x00000000 (result: 0.0f)
+    // step 4: loop: offset src, and do step 2 and step 3
+    //
+    // ReduceOr (Logical Or)
+    // step 1: init dst 0x00000000 (0.0f)
+    //              aux 0x3f800000 (1.0f)
+    // step 2: dst = dst | src
+    //         0x00000000 = 0x00000000 | 0x00000000
+    //                  A = 0x00000000 | A
+    //                  A =          A | 0x00000000
+    //                  C =          A | B
+    // (A, B stand for number other than 0x00000000)
+    // step 3: loop: offset src, and do step 2
+    // step 4: if dst equals 0, set it 0x00000000, else set 0xffffffff
+    // step 5: dst = dst & aux
+    //         0x00000000 = 0x00000000 & 0x3f800000 (result: 0.0f)
+    //         0x3f800000 = 0xffffffff & 0x3f800000 (result: 1.0f)
+    // ================================================================
+    Label reduce_to_scalar_label;
+    Label reduce_to_gather_label;
+    Label reduce_main_end_label;
+    if (planar_layout) {
+        cmp(reg_work_batch, 0);
+        je(reduce_to_gather_label, T_NEAR);
+
+        cmp(reg_reduce_w, 1); // planar layout reducing W
+        je(reduce_to_scalar_label, T_NEAR);
+    }
+
+    // store v_dst directly into memory after reducing
+    // cases: [planar layout reducing other dimensions but W] [blocked layout]
+    {
+        cmp(reg_work_amount, loop_step);
+        jl(reduce_main_end_label, T_NEAR); // avoid illegal loading and storing
+
+        if (jcp.reduce_mode == Algorithm::ReduceL1 && !(isa == x64::avx512_core && exec_el_type == ov::element::i64)) {
+            uni_vmovups(v_abs_mask, table_val(1));
+        }
+
+        load_dst_vector();
+
+        reduce_kernel();
+
+        if (jcp.reduce_mode == Algorithm::ReduceMean) {
+            auto reg_can_divide = getReg64();
+            auto reg_divider = getReg64();
+            auto vmm_divider = getVmm();
+            Label reduce_divide_end_label;
+
+            mov(reg_can_divide, ptr[reg_params + GET_OFF(can_divide)]);
+            cmp(reg_can_divide, 0);
+            je(reduce_divide_end_label, T_NEAR);
+            {
+                mov(reg_divider, ptr[reg_params + GET_OFF(divisor)]);
+                if (exec_el_type.size() == 4) {
+                    uni_vbroadcastss(vmm_divider, ptr[reg_divider]);
+                } else if (exec_el_type.size() == 8) {
+                    uni_vbroadcastsd(vmm_divider, ptr[reg_divider]);
+                }
+                if (exec_el_type == ov::element::f32) {
+                    uni_vdivps(v_dst, v_dst, vmm_divider);
+                    if (isa == x64::sse41) {
+                        uni_vdivps(v_dst_aux, v_dst_aux, vmm_divider);
+                    }
+                } else if (exec_el_type == ov::element::f64) {
+                    uni_vdivpd(v_dst, v_dst, vmm_divider);
+                    if (isa == x64::sse41) {
+                        uni_vdivpd(v_dst_aux, v_dst_aux, vmm_divider);
+                    }
+                } else if (exec_el_type == ov::element::i64) {
+                    if (isa == x64::avx512_core) {
+                        vcvtqq2pd(v_dst, v_dst);
+                    } else {
+                        // TODO
+                    }
+                    uni_vdivpd(v_dst, v_dst, vmm_divider);
+                    uni_vroundpd(v_dst, v_dst, 0x3); // Truncation
+                    if (isa == x64::avx512_core) {
+                        vcvtpd2qq(v_dst, v_dst);
+                    } else {
+                        // TODO
+                    }
+                    if (isa == x64::sse41) {
+                        // cvt
+                        uni_vdivpd(v_dst_aux, v_dst_aux, vmm_divider);
+                        // cvt
+                    }
+                }
+            }
+            L(reduce_divide_end_label);
+        }
+
+        store_dst_vector();
+
+        jmp(reduce_main_end_label, T_NEAR);
+    }
+
+    // reduce vector in v_dst to be a scalar before store into memory
+    // cases: [planar layout reducing W]
+    L(reduce_to_scalar_label);
+    {
+        // init dst, dst loading is embedded in horiz_reduce_store
+        switch (jcp.reduce_mode) {
+            case Algorithm::ReduceAnd:
+            case Algorithm::ReduceProd:
+                uni_vmovups(v_dst, table_val(0));
+                break;
+            case Algorithm::ReduceL1:
+                if (!(isa == x64::avx512_core && exec_el_type == ov::element::i64)) {
+                    uni_vmovups(v_abs_mask, table_val(1));
+                }
+                uni_vpxor(v_dst, v_dst, v_dst);
+                break;
+            case Algorithm::ReduceL2:
+            case Algorithm::ReduceLogSum:
+            case Algorithm::ReduceLogSumExp:
+            case Algorithm::ReduceMean:
+            case Algorithm::ReduceOr:
+            case Algorithm::ReduceSum:
+            case Algorithm::ReduceSumSquare:
+                uni_vpxor(v_dst, v_dst, v_dst);
+                break;
+            case Algorithm::ReduceMax:
+                if (isFloatCompatible(jcp.dst_el_type)) {
+                    uni_vmovups(v_dst, table_val(2));
+                } else {
+                    uni_vmovups(v_dst, table_val(4));
+                }
+                break;
+            case Algorithm::ReduceMin:
+                if (isFloatCompatible(jcp.dst_el_type)) {
+                    uni_vmovups(v_dst, table_val(3));
+                } else {
+                    uni_vmovups(v_dst, table_val(5));
+                }
+                break;
+            default:
+                IE_THROW() << "Unsupported reduce mode '" << algToString(jcp.reduce_mode) << "'";
+        }
+        // reduce
+        reduce_main_loop();
+        if (jcp.reduce_mode == Algorithm::ReduceOr && isa != x64::avx512_core) {
+            uni_cmpneqps(v_dst, v_dst, v_zero);
+            uni_vandps(v_dst, v_dst, v_ones);
+        }
+        // store
+        // store after horizontal calculation and calculation with loaded original ptr[reg_dst]
+        if (exec_el_type == ov::element::f32) {
+            horiz_reduce_store_ps<isa>(v_dst, jcp.dst_el_type, true);
+        } else if (exec_el_type == ov::element::i64) {
+            horiz_reduce_store_qq<isa>(v_dst, jcp.dst_el_type, true);
+        }
+
+        jmp(reduce_main_end_label, T_NEAR);
+    }
+
+    // load v_src with gather, then store v_dst directly into memory after reducing
+    // cases: [planar layout reducing small W]
+    L(reduce_to_gather_label);
+    {
+        int step = 1;
+        cmp(reg_work_amount, step);
+        jl(reduce_main_end_label, T_NEAR); // Avoid illegal loading and storing.
+
+        auto reg_idx = getReg64();
+        v_idx = getVmm();
+        mov(reg_idx, ptr[reg_params + GET_OFF(idx)]);
+        uni_vmovdqu(v_idx, ptr[reg_idx]);
+
+        if (jcp.reduce_mode == Algorithm::ReduceL1 && !(isa == x64::avx512_core && exec_el_type == ov::element::i64)) {
+            uni_vmovups(v_abs_mask, table_val(1));
+        }
+
+        // load
+        load_dst_vector();
+
+        // reduce
+        Label reduce_loop_label;
+        Label reduce_loop_end_label;
+        L(reduce_loop_label);
+        {
+            cmp(reg_work_amount, step);
+            jl(reduce_loop_end_label, T_NEAR);
+
+            reduce_gather(v_dst, 0);
+            if (isa == x64::sse41) {
+                reduce_gather(v_dst_aux, 4 * jcp.src_el_type.size());
+            }
+
+            add(reg_src, step * jcp.src_el_type.size());
+            sub(reg_work_amount, step);
+            jmp(reduce_loop_label, T_NEAR);
+        }
+        L(reduce_loop_end_label);
+
+        // store
+        store_dst_vector();
+
+        jmp(reduce_main_end_label, T_NEAR);
+    }
+
+    L(reduce_main_end_label);
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::reduce_tail() {
+    if (jcp.reduce_mode == Algorithm::ReduceL1 && !(isa == x64::avx512_core && exec_el_type == ov::element::i64)) {
+        auto xmm_abs_mask = Xmm(v_abs_mask.getIdx());
+        uni_vmovups(xmm_abs_mask, table_val(1));
+    }
+
+    Label tail_dst_shifted_label;
+    Label tail_dst_fixed_label;
+    Label reduce_tail_end_label;
+    if (planar_layout) {
+        cmp(reg_reduce_w, 1);  // planar layout reducing W
+        je(tail_dst_fixed_label, T_NEAR);
+    }
+
+    // each src scalar reduce to each dst scalar (X1, X2, X3, ...) -> (Y1, Y2, Y3, ...)
+    // cases: [planar layout reducing other dimensions but W] [blocked layout concern padding]
+    L(tail_dst_shifted_label);
+    {
+        reduce_kernel_tail();
+
+        jmp(reduce_tail_end_label, T_NEAR);
+    }
+
+    // each src scalar reduce to the same dst scalar (X1, X2, X3, ...) -> (Y1)
+    // cases: [planar layout reducing W]
+    L(tail_dst_fixed_label);
+    {
+        auto xmm_dst = Xmm(v_dst.getIdx());
+        auto xmm_src = Xmm(v_src.getIdx());
+
+        // load
+        load_scalar(xmm_dst, ptr[reg_dst], exec_el_type, jcp.dst_el_type);
+
+        Label reduce_loop_label;
+        Label reduce_loop_end_label;
+
+        // reduce
+        int step = 1;
+        L(reduce_loop_label);
+        {
+            cmp(reg_work_amount, step);
+            jl(reduce_loop_end_label, T_NEAR);
+
+            load_scalar(xmm_src, ptr[reg_src], exec_el_type, jcp.src_el_type);
+
+            reduce_kernel_scalar(xmm_src, xmm_dst);
+            if (jcp.reduce_mode == Algorithm::ReduceOr) {
+                auto xmm_ones = Xmm(v_ones.getIdx());
+                auto xmm_zero = Xmm(v_zero.getIdx());
+
+                if (exec_el_type == ov::element::f32) {
+                    uni_vcmpps(xmm_dst, xmm_dst, xmm_zero, _cmp_neq_uq);
+                    uni_vandps(xmm_dst, xmm_dst, xmm_ones);
+                } else if (exec_el_type == ov::element::f64 || exec_el_type == ov::element::i64) {
+                    uni_vcmppd(xmm_dst, xmm_dst, xmm_zero, _cmp_neq_uq);
+                    uni_vandpd(xmm_dst, xmm_dst, xmm_ones);
+                }
+            }
+
+            add(reg_src, step * jcp.src_el_type.size());
+            sub(reg_work_amount, step);
+
+            jmp(reduce_loop_label, T_NEAR);
+        }
+        L(reduce_loop_end_label);
+
+        // store
+        store_scalar(ptr[reg_dst], xmm_dst, jcp.dst_el_type, exec_el_type);
+    }
+
+    L(reduce_tail_end_label);
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::init_reg_reduce_stride() {
+    auto reg_tmp_64 = getReg64();
+    mov(reg_reduce_stride, ptr[reg_params + GET_OFF(reduce_stride)]);
+    mul_by_const(reg_reduce_stride, reg_tmp_64, jcp.src_el_type.size());
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::reduce_kernel() {
+    Label reduce_label;
+    Label reduce_end_label;
+    Label reduce_batch_label;
+
+    cmp(reg_work_batch, 1);
+    je(reduce_label, T_NEAR);
+
+    init_reg_reduce_stride();
+
+    L(reduce_batch_label);
+    {
+        cmp(reg_work_amount, loop_step);
+        jl(reduce_end_label, T_NEAR);
+
+        reduce_batch();
+
+        add(reg_src, loop_step * jcp.src_el_type.size());
+        sub(reg_work_amount, loop_step);
+        jmp(reduce_batch_label, T_NEAR);
+    }
+
+    L(reduce_label);
+    {
+        cmp(reg_work_amount, loop_step);
+        jl(reduce_end_label, T_NEAR);
+
+        reduce_once();
+
+        add(reg_src, loop_step * jcp.src_el_type.size());
+        sub(reg_work_amount, loop_step);
+        jmp(reduce_label, T_NEAR);
+    }
+    L(reduce_end_label);
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::reduce_once() {
+    load_vector(v_src, ptr[reg_src], exec_el_type, jcp.src_el_type);
+    reduce_kernel(v_src, v_dst);
+
+    if (isa == x64::sse41) {
+        load_vector(v_src, ptr[reg_src + 4 * jcp.src_el_type.size()], exec_el_type, jcp.src_el_type);
+        reduce_kernel(v_src, v_dst_aux);
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::reduce_batch() {
+    auto reg_src_aux = getReg64();
+    auto reg_work_batch_aux = getReg64();
+
+    mov(reg_src_aux, reg_src);
+    mov(reg_work_batch_aux, reg_work_batch);
+
+    Label reduce_batch_loop_label;
+    Label reduce_batch_loop_end_label;
+    L(reduce_batch_loop_label);
+    {
+        cmp(reg_work_batch_aux, 1);
+        jl(reduce_batch_loop_end_label, T_NEAR);
+
+        load_vector(v_src, ptr[reg_src_aux], exec_el_type, jcp.src_el_type);
+        reduce_kernel(v_src, v_dst);
+        if (isa == x64::sse41) {
+            load_vector(v_src, ptr[reg_src_aux + 4 * jcp.src_el_type.size()], exec_el_type, jcp.src_el_type);
+            reduce_kernel(v_src, v_dst_aux);
+        }
+
+        add(reg_src_aux, reg_reduce_stride);
+        sub(reg_work_batch_aux, 1);
+        jmp(reduce_batch_loop_label, T_NEAR);
+    }
+    L(reduce_batch_loop_end_label);
+}
+
+template <>
+void JitReduceKernel<x64::avx512_core>::reduce_gather(const Zmm& vmm_dst, int64_t offset) {
+    switch (jcp.src_el_type.size()) {
+        case 8: {
+                auto ymm_idx = Ymm(v_idx.getIdx());
+
+                kxnorq(k_mask, k_mask, k_mask);
+                vgatherdpd((Zmm)v_src | k_mask, ptr[reg_src + offset + ymm_idx]);
+                if (jcp.src_el_type == ov::element::f64 && exec_el_type == ov::element::i64) {
+                    vcvtpd2qq(v_src, v_src);
+                } else if (jcp.src_el_type == ov::element::i64 && exec_el_type == ov::element::f64) {
+                    vcvtqq2pd(v_src, v_src);
+                }
+            }
+            break;
+        case 4: {
+                kxnord(k_mask, k_mask, k_mask);
+                vgatherdps((Zmm)v_src | k_mask, ptr[reg_src + offset + v_idx]);
+                if (jcp.src_el_type == ov::element::i32) {
+                    uni_vcvtdq2ps(v_src, v_src);
+                }
+            }
+            break;
+        case 2:
+        case 1:
+            pack_gathered_vector(v_src, v_idx, offset, jcp.src_el_type);
+            break;
+        default:
+            IE_THROW() << "Unkown source element type '" << jcp.src_el_type << "'";
+    }
+    reduce_kernel(v_src, vmm_dst);
+}
+
+template <>
+void JitReduceKernel<x64::avx2>::reduce_gather(const Ymm& vmm_dst, int64_t offset) {
+    switch (jcp.src_el_type.size()) {
+        case 8: {
+                auto v_mask = getVmm();
+                auto xmm_idx = Xmm(v_idx.getIdx());
+
+                uni_vpcmpeqq(v_mask, v_mask, v_mask);
+                vgatherdpd(v_src, ptr[reg_src + offset + xmm_idx], v_mask);
+                if (exec_el_type == ov::element::i64) {
+                    // TODO Convert pd tp qq (v_src, v_src);
+                }
+            }
+            break;
+        case 4: {
+                auto v_mask = getVmm();
+
+                uni_vpcmpeqd(v_mask, v_mask, v_mask);
+                vgatherdps(v_src, ptr[reg_src + offset + v_idx], v_mask);
+                if (jcp.src_el_type == ov::element::i32) {
+                    uni_vcvtdq2ps(v_src, v_src);
+                }
+            }
+            break;
+        case 2:
+        case 1:
+            pack_gathered_vector(v_src, v_idx, offset, jcp.src_el_type);
+            break;
+        default:
+            IE_THROW() << "Unkown source element type '" << jcp.src_el_type << "'";
+    }
+    reduce_kernel(v_src, vmm_dst);
+}
+
+template <>
+void JitReduceKernel<x64::sse41>::reduce_gather(const Xmm& vmm_dst, int64_t offset) {
+    pack_gathered_vector(v_src, v_idx, offset, jcp.src_el_type);
+    reduce_kernel(v_src, vmm_dst);
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::pack_gathered_vector(const Vmm& vmm_val, const Vmm& vmm_index, int64_t offset, const ov::element::Type& src_el_type) {
+    sub(rsp, vlen);
+    uni_vmovdqu(ptr[rsp], vmm_index);
+    const size_t repeats = vlen / exec_el_type.size();
+    auto reg_tmp_64 = getReg64();
+    auto reg_tmp_32 = Reg32(reg_tmp_64.getIdx());
+    auto reg_tmp_16 = Reg16(reg_tmp_64.getIdx());
+    auto reg_tmp_8  = Reg8(reg_tmp_64.getIdx());
+    for (size_t i = 0; i < repeats; i++) {
+        mov(reg_tmp_32, ptr[rsp + i * sizeof(int)]);
+        Address table_idx = ptr[reg_src + offset + reg_tmp_64];
+
+        switch (src_el_type.size()) {
+            case 8:
+                mov(reg_tmp_64, table_idx);
+                mov(ptr[rsp + i * sizeof(int64_t)], reg_tmp_64);
+                break;
+            case 4:
+                mov(reg_tmp_32, table_idx);
+                mov(ptr[rsp + i * sizeof(int32_t)], reg_tmp_32);
+                break;
+            case 2:
+                mov(reg_tmp_16, table_idx);
+                mov(ptr[rsp + i * sizeof(ov::intel_cpu::bfloat16_t)], reg_tmp_16);
+                break;
+            case 1:
+                mov(reg_tmp_8, table_idx);
+                mov(ptr[rsp + i * sizeof(char)], reg_tmp_8);
+                break;
+            default:
+                IE_THROW() << "Unkown source element type '" << src_el_type << "'";
+        }
+    }
+
+    switch (src_el_type) {
+        case ov::element::f64:
+        case ov::element::f32:
+        case ov::element::i64:
+        case ov::element::i32:
+            uni_vmovups(vmm_val, ptr[rsp]);
+            break;
+        case ov::element::bf16:
+            uni_vpmovzxwd(vmm_val, ptr[rsp]);
+            uni_vpslld(vmm_val, vmm_val, 16);
+        break;
+        case ov::element::i8:
+            uni_vpmovsxbd(vmm_val, ptr[rsp]);
+            break;
+        case ov::element::u8:
+            uni_vpmovzxbd(vmm_val, ptr[rsp]);
+            break;
+        default:
+            IE_THROW() << "Unkown source element type '" << src_el_type << "'";
+    }
+
+    if (!isFloatCompatible(src_el_type)) {
+        uni_vcvtdq2ps(vmm_val, vmm_val); // TODO i64?
+    }
+    add(rsp, vlen);
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::reduce_kernel_tail() {
+    Label reduce_label;
+    Label reduce_end_label;
+    Label reduce_batch_label;
+    auto xmm_dst = Xmm(v_dst.getIdx());
+
+    int step = 1;
+    cmp(reg_work_batch, 1);
+    je(reduce_label, T_NEAR);
+
+    init_reg_reduce_stride();
+
+    L(reduce_batch_label);
+    {
+        cmp(reg_work_amount, step);
+        jl(reduce_end_label, T_NEAR);
+
+        load_scalar(xmm_dst, ptr[reg_dst], exec_el_type, jcp.dst_el_type);
+
+        reduce_batch_tail();
+
+        store_scalar(ptr[reg_dst], xmm_dst, jcp.dst_el_type, exec_el_type);
+
+        add(reg_dst, step * jcp.dst_el_type.size());
+        add(reg_src, step * jcp.src_el_type.size());
+        sub(reg_work_amount, step);
+
+        jmp(reduce_batch_label, T_NEAR);
+    }
+
+    L(reduce_label);
+    {
+        cmp(reg_work_amount, step);
+        jl(reduce_end_label, T_NEAR);
+
+        load_scalar(xmm_dst, ptr[reg_dst], exec_el_type, jcp.dst_el_type);
+
+        reduce_batch_tail();
+
+        store_scalar(ptr[reg_dst], xmm_dst, jcp.dst_el_type, exec_el_type);
+
+        add(reg_dst, step * jcp.dst_el_type.size());
+        add(reg_src, step * jcp.src_el_type.size());
+        sub(reg_work_amount, step);
+
+        jmp(reduce_label, T_NEAR);
+    }
+    L(reduce_end_label);
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::reduce_once_tail() {
+    auto xmm_dst = Xmm(v_dst.getIdx());
+    auto xmm_src = Xmm(v_src.getIdx());
+
+    load_scalar(xmm_src, ptr[reg_src], exec_el_type, jcp.src_el_type);
+    reduce_kernel_scalar(xmm_src, xmm_dst);
+    if (jcp.reduce_mode == Algorithm::ReduceOr) {
+        auto xmm_zero = Xmm(v_zero.getIdx());
+        auto xmm_ones = Xmm(v_ones.getIdx());
+
+        if (exec_el_type == ov::element::f32) {
+            uni_cmpneqps(xmm_dst, xmm_dst, xmm_zero);
+            uni_vandps(xmm_dst, xmm_dst, xmm_ones);
+        } else if (exec_el_type == ov::element::f64 || exec_el_type == ov::element::i64) {
+            uni_vcmppd(xmm_dst, xmm_dst, xmm_zero, _cmp_neq_uq);
+            uni_vandpd(xmm_dst, xmm_dst, xmm_ones);
+        }
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::reduce_batch_tail() {
+    auto reg_src_aux = getReg64();
+    auto reg_work_batch_aux = getReg64();
+    auto xmm_src = Xmm(v_src.getIdx());
+    auto xmm_dst = Xmm(v_dst.getIdx());
+
+    mov(reg_src_aux, reg_src);
+    mov(reg_work_batch_aux, reg_work_batch);
+
+    Label reduce_batch_loop_label;
+    Label reduce_batch_loop_end_label;
+    L(reduce_batch_loop_label);
+    {
+        cmp(reg_work_batch_aux, 1);
+        jl(reduce_batch_loop_end_label, T_NEAR);
+
+        load_scalar(xmm_src, ptr[reg_src_aux], exec_el_type, jcp.src_el_type);
+        reduce_kernel_scalar(xmm_src, xmm_dst);
+        if (jcp.reduce_mode == Algorithm::ReduceOr) {
+            auto xmm_zero = Xmm(v_zero.getIdx());
+            auto xmm_ones = Xmm(v_ones.getIdx());
+
+            if (exec_el_type == ov::element::f32) {
+                uni_cmpneqps(xmm_dst, xmm_dst, xmm_zero);
+                uni_vandps(xmm_dst, xmm_dst, xmm_ones);
+            } else if (exec_el_type == ov::element::f64 || exec_el_type == ov::element::i64) {
+                uni_vcmppd(xmm_dst, xmm_dst, xmm_zero, _cmp_neq_uq);
+                uni_vandpd(xmm_dst, xmm_dst, xmm_ones);
+            }
+        }
+
+        add(reg_src_aux, reg_reduce_stride);
+        sub(reg_work_batch_aux, 1);
+        jmp(reduce_batch_loop_label, T_NEAR);
+    }
+    L(reduce_batch_loop_end_label);
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::reduce_main_loop() {
+    Label reduce_loop_label;
+    Label reduce_loop_end_label;
+
+    L(reduce_loop_label);
+    {
+        cmp(reg_work_amount, loop_step);
+        jl(reduce_loop_end_label, T_NEAR);
+
+        load_vector(v_src, ptr[reg_src], exec_el_type, jcp.src_el_type);
+        reduce_kernel(v_src, v_dst);
+
+        if (isa == x64::sse41) {
+            load_vector(v_src, ptr[reg_src + 4 * jcp.src_el_type.size()], exec_el_type, jcp.src_el_type);
+            reduce_kernel(v_src, v_dst);
+        }
+
+        add(reg_src, loop_step * jcp.src_el_type.size());
+        sub(reg_work_amount, loop_step);
+
+        jmp(reduce_loop_label, T_NEAR);
+    }
+    L(reduce_loop_end_label);
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::reduce_kernel(const Vmm& vmm_src, const Vmm& vmm_dst) {
+    const size_t src_idx = static_cast<size_t>(vmm_src.getIdx());
+    const size_t dst_idx = static_cast<size_t>(vmm_dst.getIdx());
+
+    if (exec_el_type == ov::element::f32) {
+        switch (jcp.reduce_mode) {
+            case Algorithm::ReduceAnd:
+                if (isa == x64::avx512_core) {
+                    vcmpps(k_mask, vmm_src, v_zero, _cmp_neq_uq);
+                    vmovups(vmm_dst | k_mask | T_z, vmm_dst);
+                } else {
+                    uni_cmpneqps(vmm_src, vmm_src, v_zero);
+                    uni_vandps(vmm_dst, vmm_dst, vmm_src);
+                }
+                break;
+            case Algorithm::ReduceL1:
+                uni_vandps(vmm_src, vmm_src, v_abs_mask);
+                uni_vaddps(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceLogSum:
+            case Algorithm::ReduceMean:
+            case Algorithm::ReduceSum:
+                uni_vaddps(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceMax:
+                uni_vmaxps(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceMin:
+                uni_vminps(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceL2:
+            case Algorithm::ReduceSumSquare:
+                uni_vmulps(vmm_src, vmm_src, vmm_src);
+                uni_vaddps(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceLogSumExp:
+                exp_injector->compute_vector_range(src_idx, src_idx + 1);
+                uni_vaddps(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceOr:
+                if (isa == x64::avx512_core) {
+                    vcmpps(k_mask, vmm_src, v_zero, _cmp_neq_uq);
+                    vorps(vmm_dst | k_mask, vmm_dst, v_ones);
+                } else {
+                    uni_vorps(vmm_dst, vmm_dst, vmm_src);
+                }
+                break;
+            case Algorithm::ReduceProd:
+                uni_vmulps(vmm_dst, vmm_dst, vmm_src);
+                break;
+            default:
+                IE_THROW() << "Unsupported reduce mode '" << algToString(jcp.reduce_mode) << "'";
+        }
+    } else if (exec_el_type == ov::element::f64) {
+        switch (jcp.reduce_mode) {
+            case Algorithm::ReduceAnd:
+                if (isa == x64::avx512_core) {
+                    vcmppd(k_mask, vmm_src, v_zero, _cmp_neq_uq);
+                    vandpd(vmm_dst | k_mask | T_z, vmm_dst, vmm_src);
+                } else {
+                    uni_vcmppd(vmm_src, vmm_src, v_zero, _cmp_neq_uq);
+                    uni_vandpd(vmm_dst, vmm_dst, vmm_src);
+                }
+                break;
+            case Algorithm::ReduceL1:
+                uni_vandpd(vmm_src, vmm_src, v_abs_mask);
+                uni_vaddpd(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceLogSum:
+            case Algorithm::ReduceMean:
+            case Algorithm::ReduceSum:
+                uni_vaddpd(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceMax:
+                uni_vmaxpd(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceMin:
+                uni_vminpd(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceL2:
+            case Algorithm::ReduceSumSquare:
+                uni_vmulpd(vmm_src, vmm_src, vmm_src);
+                uni_vaddpd(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceLogSumExp:
+                exp_injector->compute_vector_range(src_idx, src_idx + 1);
+                uni_vaddpd(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceOr:
+                if (isa == x64::avx512_core) {
+                    vcmppd(k_mask, vmm_src, v_zero, _cmp_neq_uq);
+                    vblendmps(vmm_src | k_mask, v_zero, v_ones);
+                }
+                uni_vorpd(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceProd:
+                uni_vmulpd(vmm_dst, vmm_dst, vmm_src);
+                break;
+            default:
+                IE_THROW() << "Unsupported reduce mode '" << algToString(jcp.reduce_mode) << "'";
+        }
+    }  else if (exec_el_type == ov::element::i64) {
+        switch (jcp.reduce_mode) {
+            case Algorithm::ReduceAnd:
+                if (isa == x64::avx512_core) {
+                    vcmppd(k_mask, vmm_src, v_zero, _cmp_neq_uq);
+                    vmovups(vmm_dst | k_mask | T_z, vmm_dst);
+                } else {
+                    uni_vcmppd(vmm_src, vmm_src, v_zero, _cmp_neq_uq);
+                    uni_vandpd(vmm_dst, vmm_dst, vmm_src);
+                }
+                break;
+            case Algorithm::ReduceL1:
+                if (isa == x64::avx512_core) {
+                    vpabsq(vmm_src, vmm_src);
+                } else {
+                    uni_vandpd(vmm_src, vmm_src, v_abs_mask);
+                }
+                uni_vpaddq(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceLogSum:
+            case Algorithm::ReduceMean:
+            case Algorithm::ReduceSum:
+                uni_vpaddq(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceMax:
+                if (isa == x64::avx512_core) {
+                    max_emitter->emit_code({dst_idx, src_idx}, {dst_idx});
+                } else {
+                    auto vmm_aux_0 = getVmm();
+                    max_emitter->emit_code({dst_idx, src_idx}, {dst_idx}, {vmm_aux_0.getIdx()});
+                }
+                break;
+            case Algorithm::ReduceMin:
+                if (isa == x64::avx512_core) {
+                    min_emitter->emit_code({dst_idx, src_idx}, {dst_idx});
+                } else {
+                    auto vmm_aux_0 = getVmm();
+                    min_emitter->emit_code({dst_idx, src_idx}, {dst_idx}, {vmm_aux_0.getIdx()});
+                }
+                break;
+            case Algorithm::ReduceL2:
+            case Algorithm::ReduceSumSquare:
+                if (isa == x64::avx512_core) {
+                    mul_emitter->emit_code({src_idx, src_idx}, {src_idx});
+                } else {
+                    auto vmm_aux_0 = getVmm();
+                    auto vmm_aux_1 = getVmm();
+                    mul_emitter->emit_code({src_idx, src_idx}, {src_idx}, {vmm_aux_0.getIdx(), vmm_aux_1.getIdx()});
+                }
+                uni_vpaddq(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceLogSumExp:
+                exp_injector->compute_vector_range(src_idx, src_idx + 1);
+                uni_vpaddq(vmm_dst, vmm_dst, vmm_src);
+                break;
+            case Algorithm::ReduceOr:
+                if (isa == x64::avx512_core) {
+                    // vcmppd(k_mask, vmm_src, v_zero, _cmp_neq_uq);
+                    // vblendmps(vmm_src | k_mask, v_zero, v_ones);
+                    vcmppd(k_mask, vmm_src, v_zero, _cmp_neq_uq);
+                    vorpd(vmm_dst | k_mask, vmm_dst, v_ones);
+                } else {
+                    uni_vorpd(vmm_dst, vmm_dst, vmm_src);
+                }
+                break;
+            case Algorithm::ReduceProd:
+                if (isa == x64::avx512_core) {
+                    mul_emitter->emit_code({dst_idx, src_idx}, {dst_idx});
+                } else {
+                    auto vmm_aux_0 = getVmm();
+                    auto vmm_aux_1 = getVmm();
+                    mul_emitter->emit_code({dst_idx, src_idx}, {dst_idx}, {vmm_aux_0.getIdx(), vmm_aux_1.getIdx()});
+                }
+                break;
+            default:
+                IE_THROW() << "Unsupported reduce mode '" << algToString(jcp.reduce_mode) << "'";
+        }
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::reduce_kernel_scalar(const Xmm& xmm_src, const Xmm& xmm_dst) {
+    if (exec_el_type == ov::element::f32) {
+        switch (jcp.reduce_mode) {
+            case Algorithm::ReduceAnd: {
+                    auto xmm_zero = Xmm(v_zero.getIdx());
+                    uni_cmpneqps(xmm_src, xmm_src, xmm_zero);
+                    uni_vandps(xmm_dst, xmm_dst, xmm_src);
+                } break;
+            case Algorithm::ReduceL1: {
+                    auto xmm_abs_mask = Xmm(v_abs_mask.getIdx());
+                    uni_vandps(xmm_src, xmm_src, xmm_abs_mask);
+                    uni_vaddps(xmm_dst, xmm_dst, xmm_src);
+                } break;
+            case Algorithm::ReduceLogSum:
+            case Algorithm::ReduceMean:
+            case Algorithm::ReduceSum:
+                uni_vaddps(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceMax:
+                uni_vmaxps(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceMin:
+                uni_vminps(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceL2:
+            case Algorithm::ReduceSumSquare:
+                uni_vmulps(xmm_src, xmm_src, xmm_src);
+                uni_vaddps(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceLogSumExp:
+                exp_injector->compute_vector_range(xmm_src.getIdx(), xmm_src.getIdx() + 1);
+                uni_vaddps(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceOr:
+                uni_vorps(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceProd:
+                uni_vmulps(xmm_dst, xmm_dst, xmm_src);
+                break;
+            default:
+                IE_THROW() << "Unsupported reduce mode '" << algToString(jcp.reduce_mode) << "'";
+        }
+    } else if (exec_el_type == ov::element::f64) {
+        switch (jcp.reduce_mode) {
+            case Algorithm::ReduceAnd: {
+                    auto xmm_zero = Xmm(v_zero.getIdx());
+                    uni_vcmppd(xmm_src, xmm_src, xmm_zero, _cmp_neq_uq);
+                    uni_vandpd(xmm_dst, xmm_dst, xmm_src);
+                } break;
+            case Algorithm::ReduceL1: {
+                    auto xmm_abs_mask = Xmm(v_abs_mask.getIdx());
+                    uni_vandpd(xmm_src, xmm_src, xmm_abs_mask);
+                    uni_vaddpd(xmm_dst, xmm_dst, xmm_abs_mask);
+                } break;
+            case Algorithm::ReduceLogSum:
+            case Algorithm::ReduceMean:
+            case Algorithm::ReduceSum:
+                uni_vaddpd(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceMax:
+                uni_vmaxpd(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceMin:
+                uni_vminpd(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceL2:
+            case Algorithm::ReduceSumSquare:
+                uni_vmulpd(xmm_src, xmm_src, xmm_src);
+                uni_vaddpd(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceLogSumExp:
+                exp_injector->compute_vector_range(xmm_src.getIdx(), xmm_src.getIdx() + 1);
+                uni_vaddpd(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceOr:
+                uni_vorpd(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceProd:
+                uni_vmulpd(xmm_dst, xmm_dst, xmm_src);
+                break;
+            default:
+                IE_THROW() << "Unsupported reduce mode '" << algToString(jcp.reduce_mode) << "'";
+        }
+    } else if (exec_el_type == ov::element::i64) {
+        switch (jcp.reduce_mode) {
+            case Algorithm::ReduceAnd: {
+                    auto xmm_zero = Xmm(v_zero.getIdx());
+                    uni_vcmppd(xmm_src, xmm_src, xmm_zero, _cmp_neq_uq);
+                    uni_vandpd(xmm_dst, xmm_dst, xmm_src);
+                } break;
+            case Algorithm::ReduceL1:
+                if (isa == x64::avx512_core) {
+                    vpabsq(xmm_src, xmm_src);
+                } else {
+                    auto xmm_abs_mask = Xmm(v_abs_mask.getIdx());
+                    uni_vandpd(xmm_src, xmm_src, xmm_abs_mask);
+                }
+                uni_vpaddq(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceLogSum:
+            case Algorithm::ReduceMean:
+            case Algorithm::ReduceSum:
+                uni_vpaddq(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceMax: {
+                    const size_t src_idx = static_cast<size_t>(xmm_src.getIdx()), dst_idx = static_cast<size_t>(xmm_dst.getIdx());
+                    if (isa == x64::avx512_core) {
+                        max_emitter->emit_code({dst_idx, src_idx}, {dst_idx});
+                    } else {
+                        auto vmm_aux_0 = getVmm();
+                        max_emitter->emit_code({dst_idx, src_idx}, {dst_idx}, {vmm_aux_0.getIdx()});
+                    }
+                } break;
+            case Algorithm::ReduceMin: {
+                    const size_t src_idx = static_cast<size_t>(xmm_src.getIdx()), dst_idx = static_cast<size_t>(xmm_dst.getIdx());
+                    if (isa == x64::avx512_core) {
+                        min_emitter->emit_code({dst_idx, src_idx}, {dst_idx});
+                    } else {
+                        auto vmm_aux_0 = getVmm();
+                        min_emitter->emit_code({dst_idx, src_idx}, {dst_idx}, {vmm_aux_0.getIdx()});
+                    }
+                } break;
+            case Algorithm::ReduceL2:
+            case Algorithm::ReduceSumSquare: {
+                    const size_t src_idx = static_cast<size_t>(xmm_src.getIdx());
+                    if (isa == x64::avx512_core) {
+                        mul_emitter->emit_code({src_idx, src_idx}, {src_idx});
+                    } else {
+                        auto vmm_aux_0 = getVmm();
+                        auto vmm_aux_1 = getVmm();
+                        mul_emitter->emit_code({src_idx, src_idx}, {src_idx}, {vmm_aux_0.getIdx(), vmm_aux_1.getIdx()});
+                    }
+                    uni_vpaddq(xmm_dst, xmm_dst, xmm_src);
+                } break;
+            case Algorithm::ReduceLogSumExp:
+                exp_injector->compute_vector_range(xmm_src.getIdx(), xmm_src.getIdx() + 1);
+                uni_vpaddq(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceOr:
+                uni_vorpd(xmm_dst, xmm_dst, xmm_src);
+                break;
+            case Algorithm::ReduceProd: {
+                    const size_t src_idx = static_cast<size_t>(xmm_src.getIdx()), dst_idx = static_cast<size_t>(xmm_dst.getIdx());
+                    if (isa == x64::avx512_core) {
+                        mul_emitter->emit_code({dst_idx, src_idx}, {dst_idx});
+                    } else {
+                        auto vmm_aux_0 = getVmm();
+                        auto vmm_aux_1 = getVmm();
+                        mul_emitter->emit_code({dst_idx, src_idx}, {dst_idx}, {vmm_aux_0.getIdx(), vmm_aux_1.getIdx()});
+                    }
+                } break;
+            default:
+                IE_THROW() << "Unsupported reduce mode '" << algToString(jcp.reduce_mode) << "'";
+        }
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::load_dst_vector() {
+    load_vector(v_dst, ptr[reg_dst], exec_el_type, jcp.dst_el_type);
+    if (isa == x64::sse41) {
+        load_vector(v_dst_aux, ptr[reg_dst + 4 * jcp.dst_el_type.size()], exec_el_type, jcp.dst_el_type);
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::store_dst_vector() {
+    if (jcp.reduce_mode == Algorithm::ReduceOr && isa != x64::avx512_core) {
+        if (exec_el_type == ov::element::f32) {
+            uni_cmpneqps(v_dst, v_dst, v_zero);
+            uni_vandps(v_dst, v_dst, v_ones);
+        } else if (exec_el_type == ov::element::f64 || exec_el_type == ov::element::i64) {
+            uni_vcmppd(v_dst, v_dst, v_zero, _cmp_neq_uq);
+            uni_vandpd(v_dst, v_dst, v_ones);
+        }
+
+        if (isa == x64::sse41) {
+            uni_cmpneqps(v_dst_aux, v_dst_aux, v_zero);
+            uni_vandps(v_dst_aux, v_dst_aux, v_ones);
+        }
+    }
+    store_vector(ptr[reg_dst], v_dst, jcp.dst_el_type, exec_el_type);
+    if (isa == x64::sse41) {
+        store_vector(ptr[reg_dst + 4 * jcp.dst_el_type.size()], v_dst_aux, jcp.dst_el_type, exec_el_type);
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReduceKernel<isa>::prepare_aux_table() {
+    auto broadcast_int32 = [&](uint32_t val) {
+        for (size_t d = 0; d < vlen / exec_el_type.size(); ++d) {
+            dd(val);
+        }
+    };
+    auto broadcast_int64 = [&](uint64_t val) {
+        for (size_t d = 0; d < vlen / exec_el_type.size(); ++d) {
+            dq(val);
+        }
+    };
+
+    align(64);
+    L(l_table);
+
+    if (exec_el_type == ov::element::f32) {
+        broadcast_int32(aux_vals.float_one);
+        broadcast_int32(aux_vals.float_abs);
+        broadcast_int32(aux_vals.float_min);
+        broadcast_int32(aux_vals.float_max);
+        broadcast_int32(aux_vals.float_int32_min);
+        broadcast_int32(aux_vals.float_int32_max);
+    } else if (exec_el_type == ov::element::f64) {
+        broadcast_int64(aux_vals.double_one);
+        broadcast_int64(aux_vals.double_abs);
+        broadcast_int64(aux_vals.double_min);
+        broadcast_int64(aux_vals.double_max);
+        broadcast_int64(aux_vals.double_int64_min);
+        broadcast_int64(aux_vals.double_int64_max);
+    } else if (exec_el_type == ov::element::i64) {
+        broadcast_int64(aux_vals.int64_one);
+        broadcast_int64(aux_vals.int64_abs);
+        broadcast_int64(aux_vals.int64_min);
+        broadcast_int64(aux_vals.int64_max);
+        broadcast_int64(aux_vals.int64_min);
+        broadcast_int64(aux_vals.int64_max);
+    }
+}
+
+///////////////////////////////
+///// JitReducePostKernel /////
+///////////////////////////////
+
+template <x64::cpu_isa_t isa>
+JitReducePostKernel<isa>::JitReducePostKernel(const JitReduceConfigParams& jcp, const dnnl_primitive_attr& attr)
+        : JitReduceKernelBase<JitReducePostCallArgs>(jit_name(), jcp, isa), attr(attr) {
+    post_reduce = one_of(jcp.reduce_mode, Algorithm::ReduceL2, Algorithm::ReduceMean, Algorithm::ReduceLogSum, Algorithm::ReduceLogSumExp);
+    post_ops_fusing = attr.post_ops_.len() != 0;
+
+    loop_step = vlen / exec_el_type.size();
+    if (isa == x64::sse41) {
+        loop_step *= 2;
+    }
+
+    if (jcp.reduce_mode == Algorithm::ReduceLogSum || jcp.reduce_mode == Algorithm::ReduceLogSumExp) {
+        log_injector = std::make_shared<x64::jit_uni_eltwise_injector_f32<isa>>(this, dnnl::impl::alg_kind::eltwise_log, 0.f, 0.f, 1.f);
+    }
+
+    if (jcp.reduce_mode == Algorithm::ReduceMean) {
+        division_emitter = std::make_shared<ov::intel_cpu::jit_divide_emitter>(this, isa, InferenceEngine::details::convertPrecision(exec_el_type));
+        division_emitter->second_is_float = true;
+    }
+    if (jcp.reduce_mode == Algorithm::ReduceL2) {
+        sqrt_emitter = std::make_shared<ov::intel_cpu::jit_sqrt_emitter>(this, isa, InferenceEngine::details::convertPrecision(exec_el_type));
+        sqrt_emitter->rounding_type = jit_emitter::RoundType::truncation;
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReducePostKernel<isa>::generate() {
+    registersPool = RegistersPool::create(isa, {rax, rcx, rsp, rdi, k0});
+
+    const auto &p = attr.post_ops_;
+    for (int i = 0; i < p.len(); i++) {
+        auto &post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors.push_back(std::make_shared<x64::jit_uni_eltwise_injector_f32<isa>>(
+                    this, post_op.eltwise.alg, post_op.eltwise.alpha, post_op.eltwise.beta, post_op.eltwise.scale));
+        } else if (post_op.is_depthwise()) {
+           if (!reg_d_weights.isInitialized()) {
+               reg_d_weights = getReg64();
+           }
+            depthwise_injectors.push_back(std::make_shared<x64::jit_uni_depthwise_injector_f32<isa>>(
+                    this, post_op));
+        } else if (post_op.is_quantization()) {
+           if (!reg_d_weights.isInitialized()) {
+               reg_d_weights = getReg64();
+           }
+           if (!reg_d_bias.isInitialized()) {
+               reg_d_bias = getReg64();
+           }
+           if (!v_d_weights.isInitialized()) {
+               v_d_weights = getVmm();
+           }
+           if (!v_d_bias.isInitialized()) {
+               v_d_bias = getVmm();
+           }
+            quantization_injectors.push_back(std::make_shared<x64::jit_uni_quantization_injector_f32<isa>>(
+                    this, post_op, v_d_weights, v_d_bias, reg_d_weights, reg_d_bias));
+        }
+    }
+
+    this->preamble();
+
+    reg_dst         = getReg64();
+    reg_work_amount = getReg64();
+    mov(reg_dst, ptr[reg_params + GET_OFF_POST(dst)]);
+    mov(reg_work_amount, ptr[reg_params + GET_OFF_POST(work_amount)]);
+
+    v_dst = getVmm();
+
+    if (!planar_layout) {
+        reg_reduce_c = getReg64();
+        mov(reg_reduce_c, ptr[reg_params + GET_OFF_POST(reduce_c)]);
+    }
+    if (post_ops_fusing) {
+        reg_oc_off        = getReg64();
+        reg_post_ops_data = getReg64();
+        mov(reg_post_ops_data, ptr[reg_params + GET_OFF_POST(post_op_data)]);
+        mov(reg_oc_off, ptr[reg_params + GET_OFF_POST(oc_off)]);
+    }
+    if (jcp.reduce_mode == Algorithm::ReduceMean) {
+        v_divisor = getVmm();
+        reg_divisor = getReg64();
+        mov(reg_divisor, ptr[reg_params + GET_OFF_POST(divisor)]);
+    }
+    if (jcp.fuse_low_precision) {
+        reg_src = getReg64();
+        mov(reg_src, ptr[reg_params + GET_OFF_POST(src)]);
+    }
+
+    if (jcp.layout == ReduceLayoutType::reduce_blocked) {
+        reduce_post_main();
+    } else if (jcp.layout == ReduceLayoutType::reduce_nspc && post_ops_fusing) {
+        auto reg_channel_size      = getReg64();
+        auto reg_total_work_amount = getReg64();
+        // the tail of channel dimension should always be concerned during post ops fusing for nspc layout
+        Label reduce_nspc_loop_label;
+        Label reduce_nspc_loop_end_label;
+        mov(reg_channel_size, ptr[reg_params + GET_OFF_POST(channel_size)]);
+        mov(reg_total_work_amount, reg_work_amount);
+        L(reduce_nspc_loop_label);
+        {
+            cmp(reg_total_work_amount, 0);
+            jle(reduce_nspc_loop_end_label, T_NEAR);
+
+            mov(reg_oc_off, 0);
+            mov(reg_work_amount, reg_channel_size);
+            reduce_post_main();
+            reduce_post_tail();
+
+            sub(reg_total_work_amount, reg_channel_size);
+            jmp(reduce_nspc_loop_label, T_NEAR);
+        }
+        L(reduce_nspc_loop_end_label);
+    } else {
+        reduce_post_main();
+        reduce_post_tail();
+    }
+
+    registersPool.reset();
+
+    this->postamble();
+
+    if (vcvtneps2bf16) {
+        vcvtneps2bf16->emit_data();
+    }
+    if (max_emitter) {
+        max_emitter->emit_data();
+    }
+    if (min_emitter) {
+        min_emitter->emit_data();
+    }
+    if (mul_emitter) {
+        mul_emitter->emit_data();
+    }
+    if (division_emitter) {
+        division_emitter->emit_data();
+    }
+    if (sqrt_emitter) {
+        sqrt_emitter->emit_data();
+    }
+    if (one_of(jcp.reduce_mode, Algorithm::ReduceLogSum, Algorithm::ReduceLogSumExp)) {
+        log_injector->prepare_table();
+    }
+    for (auto& inj : eltwise_injectors) {
+        inj->prepare_table();
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReducePostKernel<isa>::reduce_post_main() {
+    Label reduce_map_label;
+    if (planar_layout) {
+        jmp(reduce_map_label, T_NEAR);
+    } else {
+        cmp(reg_reduce_c, 1);
+        jne(reduce_map_label, T_NEAR);
+    }
+
+    // further reduce channel block since reduce channel batch has already been reduced
+    // (X1, X2, X3, X4, X5, X6, X7, X8) -> (Y1, N/A, N/A, N/A, N/A, N/A, N/A, N/A)
+    // cases: [blocked layout reducing channel dimensions]
+    {
+        Label reduce_loop_label;
+        Label reduce_loop_end_label;
+        RegistersPool::Reg<Vmm> v_dst_aux;
+        if (isa == x64::sse41) {
+            v_dst_aux = getVmm();
+        }
+
+        // int step = vlen / exec_el_type.size() < 8 ? 8 : vlen / exec_el_type.size();
+        L(reduce_loop_label);
+        {
+            cmp(reg_work_amount, loop_step);
+            jl(reduce_loop_end_label, T_NEAR);
+
+            // load
+            wrap_load_vector(v_dst, exec_el_type, jcp.dst_el_type, 0);
+            if (isa == x64::sse41) {
+                wrap_load_vector(v_dst_aux, exec_el_type, jcp.dst_el_type, 4);
+            }
+
+            // reduce and store
+            if (exec_el_type == ov::element::f32) {
+                horiz_reduce_store_ps<isa>(v_dst, jcp.dst_el_type);
+            } else if (exec_el_type == ov::element::i64) {
+                horiz_reduce_store_qq<isa>(v_dst, jcp.dst_el_type);
+            }
+            if (isa == x64::sse41) {
+                if (exec_el_type == ov::element::f32) {
+                    horiz_reduce_store_ps<isa>(v_dst_aux, jcp.dst_el_type, true);
+                } else if (exec_el_type == ov::element::i64) {
+                    horiz_reduce_store_qq<isa>(v_dst_aux, jcp.dst_el_type, true);
+                }
+            }
+
+            add(reg_dst, loop_step * jcp.dst_el_type.size());
+            if (jcp.fuse_low_precision) {
+                add(reg_src, loop_step * sizeof(float)); // TODO i64 fusing
+            }
+            sub(reg_work_amount, loop_step);
+
+            jmp(reduce_loop_label, T_NEAR);
+        }
+        L(reduce_loop_end_label);
+
+        if (post_reduce || post_ops_fusing) {
+            mov(reg_dst, ptr[reg_params + GET_OFF_POST(dst)]);
+            if (jcp.fuse_low_precision)
+                mov(reg_src, ptr[reg_params + GET_OFF_POST(src)]);
+            mov(reg_work_amount, ptr[reg_params + GET_OFF_POST(work_amount)]);
+        }
+    }
+
+    // reduce map for value in dst memory
+    // cases: [ReduceL2] [ReduceLogSum] [ReduceLogSumExp] [ReduceMean]
+    L(reduce_map_label);
+    {
+        if (post_reduce) {
+            if (jcp.reduce_mode == Algorithm::ReduceMean) {
+                if (exec_el_type.size() == 4) {
+                    uni_vbroadcastss(v_divisor, ptr[reg_divisor]);
+                } else if (exec_el_type.size() == 8) {
+                    uni_vbroadcastsd(v_divisor, ptr[reg_divisor]);
+                }
+            }
+
+            Label reduce_loop_label;
+            Label reduce_loop_end_label;
+
+            L(reduce_loop_label);
+            {
+                cmp(reg_work_amount, loop_step);
+                jl(reduce_loop_end_label, T_NEAR);
+
+                wrap_load_vector(v_dst, exec_el_type, jcp.dst_el_type, 0);
+                reduce_map_kernel(v_dst);
+                if (post_ops_fusing) {
+                    apply_post_ops(jcp.dst_el_type, jcp.layout == ReduceLayoutType::reduce_ncsp);
+                }
+                store_vector(ptr[reg_dst], v_dst, jcp.dst_el_type, exec_el_type);
+
+                if (isa == x64::sse41) {
+                    wrap_load_vector(v_dst, exec_el_type, jcp.dst_el_type, 4);
+                    reduce_map_kernel(v_dst);
+                    if (post_ops_fusing) {
+                        if (jcp.layout != ReduceLayoutType::reduce_ncsp) {
+                            add(reg_oc_off, 4 * exec_el_type.size());
+                        }
+                        apply_post_ops(jcp.dst_el_type, jcp.layout == ReduceLayoutType::reduce_ncsp);
+                        if (jcp.layout != ReduceLayoutType::reduce_ncsp) {
+                            sub(reg_oc_off, 4 * exec_el_type.size());
+                        }
+                    }
+                    store_vector(ptr[reg_dst + 4 * jcp.dst_el_type.size()], v_dst, jcp.dst_el_type, exec_el_type);
+                }
+
+                add(reg_dst, loop_step * jcp.dst_el_type.size());
+                if (jcp.layout == ReduceLayoutType::reduce_nspc && post_ops_fusing) {
+                    add(reg_oc_off, loop_step * exec_el_type.size());
+                }
+                sub(reg_work_amount, loop_step);
+
+                jmp(reduce_loop_label, T_NEAR);
+            }
+            L(reduce_loop_end_label);
+        } else {
+            if (post_ops_fusing) {
+                Label reduce_loop_label;
+                Label reduce_loop_end_label;
+
+                L(reduce_loop_label);
+                {
+                    cmp(reg_work_amount, loop_step);
+                    jl(reduce_loop_end_label, T_NEAR);
+
+                    load_vector(v_dst, ptr[reg_dst], exec_el_type, jcp.dst_el_type);
+                    apply_post_ops(jcp.dst_el_type, jcp.layout == ReduceLayoutType::reduce_ncsp);
+                    store_vector(ptr[reg_dst], v_dst, jcp.dst_el_type, exec_el_type);
+
+                    if (isa == x64::sse41) {
+                        load_vector(v_dst, ptr[reg_dst + 4 * jcp.dst_el_type.size()], exec_el_type, jcp.dst_el_type);
+                        if (jcp.layout != ReduceLayoutType::reduce_ncsp) {
+                            add(reg_oc_off, 4 * exec_el_type.size());
+                        }
+                        apply_post_ops(jcp.dst_el_type, jcp.layout == ReduceLayoutType::reduce_ncsp);
+                        if (jcp.layout != ReduceLayoutType::reduce_ncsp) {
+                            sub(reg_oc_off, 4 * exec_el_type.size());
+                        }
+                        store_vector(ptr[reg_dst + 4 * jcp.dst_el_type.size()], v_dst, jcp.dst_el_type, exec_el_type);
+                    }
+
+                    add(reg_dst, loop_step * jcp.dst_el_type.size());
+                    if (jcp.fuse_low_precision) {
+                        add(reg_src, loop_step * sizeof(float)); //TODO i64
+                    }
+                    if (jcp.layout == ReduceLayoutType::reduce_nspc && post_ops_fusing) {
+                        add(reg_oc_off, loop_step * exec_el_type.size());
+                    }
+                    sub(reg_work_amount, loop_step);
+
+                    jmp(reduce_loop_label, T_NEAR);
+                }
+                L(reduce_loop_end_label);
+            }
+        }
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReducePostKernel<isa>::reduce_post_tail() {
+    // reduce map for tail in dst memory
+    // cases: [ReduceL2] [ReduceLogSum] [ReduceLogSumExp] [ReduceMean] in planar layout
+    auto xmm_dst = Xmm(v_dst.getIdx());
+    if (one_of(jcp.reduce_mode, Algorithm::ReduceL2, Algorithm::ReduceMean, Algorithm::ReduceLogSum, Algorithm::ReduceLogSumExp)) {
+        if (jcp.reduce_mode == Algorithm::ReduceMean) {
+            auto xmm_divisor = Xmm(v_divisor.getIdx());
+            if (exec_el_type.size() == 4) {
+                uni_vbroadcastss(xmm_divisor, ptr[reg_divisor]);
+            } else if (exec_el_type.size() == 8) {
+                auto ymm_aux = Ymm(xmm_divisor.getIdx());
+                vbroadcastsd(ymm_aux, ptr[reg_divisor]);
+            }
+        }
+
+        Label reduce_loop_label;
+        Label reduce_loop_end_label;
+
+        int step = 1;
+        L(reduce_loop_label);
+        {
+            cmp(reg_work_amount, step);
+            jl(reduce_loop_end_label, T_NEAR);
+
+            wrap_load_scalar(xmm_dst, exec_el_type, jcp.dst_el_type, 0);
+
+            reduce_map_kernel_scalar(xmm_dst);
+
+            if (post_ops_fusing) {
+                apply_post_ops(jcp.dst_el_type, jcp.layout == ReduceLayoutType::reduce_ncsp);
+            }
+            store_scalar(ptr[reg_dst], xmm_dst, jcp.dst_el_type, exec_el_type);
+
+            add(reg_dst, step * jcp.dst_el_type.size());
+            if (jcp.fuse_low_precision) {
+                add(reg_src, step * sizeof(float)); // TODO i64
+            }
+            if (jcp.layout == ReduceLayoutType::reduce_nspc && post_ops_fusing) {
+                add(reg_oc_off, step * exec_el_type.size());
+            }
+            sub(reg_work_amount, step);
+
+            jmp(reduce_loop_label, T_NEAR);
+        }
+        L(reduce_loop_end_label);
+    } else {
+        if (post_ops_fusing) {
+            Label reduce_loop_label;
+            Label reduce_loop_end_label;
+
+            int step = 1;
+            L(reduce_loop_label);
+            {
+                cmp(reg_work_amount, step);
+                jl(reduce_loop_end_label, T_NEAR);
+
+                wrap_load_scalar(xmm_dst, exec_el_type, jcp.dst_el_type, 0);
+
+                apply_post_ops(jcp.dst_el_type, jcp.layout == ReduceLayoutType::reduce_ncsp);
+                store_scalar(ptr[reg_dst], xmm_dst, jcp.dst_el_type, exec_el_type);
+
+                add(reg_dst, step * jcp.dst_el_type.size());
+                if (jcp.fuse_low_precision) {
+                    add(reg_src, step * sizeof(float)); // TODO i64
+                }
+                if (jcp.layout == ReduceLayoutType::reduce_nspc && post_ops_fusing) {
+                    add(reg_oc_off, step * exec_el_type.size());
+                }
+                sub(reg_work_amount, step);
+
+                jmp(reduce_loop_label, T_NEAR);
+            }
+            L(reduce_loop_end_label);
+        }
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReducePostKernel<isa>::apply_post_ops(const ov::element::Type& dst_el_type, bool is_broadcast) {
+    const auto &p = attr.post_ops_;
+    int eltwise_inj_idx = 0;
+    int depthwise_inj_idx = 0;
+    int quantization_inj_idx = 0;
+    int post_ops_data_offset = 0;
+    for (int i = 0; i < p.len(); i++) {
+        auto& post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors[eltwise_inj_idx]->compute_vector_range(v_dst.getIdx(), v_dst.getIdx() + 1);
+            eltwise_inj_idx++;
+        } else if (post_op.is_depthwise()) {
+            mov(reg_d_weights, ptr[reg_post_ops_data + post_ops_data_offset]);
+            add(reg_d_weights, reg_oc_off);
+
+            depthwise_injectors[depthwise_inj_idx]->compute_vector_range(
+                    v_dst.getIdx(), v_dst.getIdx() + 1, reg_d_weights, reg_d_weights, is_broadcast);
+
+            post_ops_data_offset += depthwise_injectors[depthwise_inj_idx]->memoryStep();
+            depthwise_inj_idx++;
+        } else if (post_op.is_quantization()) {
+            bool do_dequantization = post_op.quantization.alg == dnnl::impl::alg_kind::quantization_quantize_dequantize;
+            bool do_rounding = do_dequantization || isFloatCompatible(dst_el_type) || i != p.len() - 1;
+
+            int s_idx = v_dst.getIdx();
+
+            quantization_injectors[quantization_inj_idx]->init_crop_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
+            quantization_injectors[quantization_inj_idx]->compute_crop(s_idx, s_idx + 1, 0, 0, is_broadcast);
+
+            quantization_injectors[quantization_inj_idx]->init_input_scale_shift_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
+            quantization_injectors[quantization_inj_idx]->compute_input_scale_shift(s_idx, s_idx + 1, 0, do_rounding, 0, is_broadcast);
+
+            if (do_dequantization) {
+                quantization_injectors[quantization_inj_idx]->init_output_scale_shift_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
+                quantization_injectors[quantization_inj_idx]->compute_output_scale_shift(s_idx, s_idx + 1, 0, 0, is_broadcast);
+            }
+
+            post_ops_data_offset += quantization_injectors[quantization_inj_idx]->memoryStep();
+            quantization_inj_idx++;
+        }
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReducePostKernel<isa>::reduce_map_kernel(const Vmm& vmm_dst) {
+    if (jcp.reduce_mode == Algorithm::ReduceMean) {
+        division_emitter->emit_code({ vmm_dst.getIdx(), v_divisor.getIdx() }, { vmm_dst.getIdx() });
+    } else if (jcp.reduce_mode == Algorithm::ReduceL2) {
+        sqrt_emitter->emit_code({ vmm_dst.getIdx() }, { vmm_dst.getIdx() });
+    } else if (one_of(jcp.reduce_mode, Algorithm::ReduceLogSum, Algorithm::ReduceLogSumExp)) {
+        log_injector->compute_vector_range(vmm_dst.getIdx(), vmm_dst.getIdx() + 1);
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReducePostKernel<isa>::reduce_map_kernel_scalar(const Xmm& xmm_dst) {
+    if (jcp.reduce_mode == Algorithm::ReduceMean) {
+        division_emitter->emit_code({ xmm_dst.getIdx(), v_divisor.getIdx() }, { xmm_dst.getIdx() });
+    } else if (jcp.reduce_mode == Algorithm::ReduceL2) {
+        sqrt_emitter->emit_code({ xmm_dst.getIdx() }, { xmm_dst.getIdx() });
+    } else if (one_of(jcp.reduce_mode, Algorithm::ReduceLogSum, Algorithm::ReduceLogSumExp)) {
+        log_injector->compute_vector_range(xmm_dst.getIdx(), xmm_dst.getIdx() + 1);
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReducePostKernel<isa>::wrap_load_vector(const Vmm& vmm_val, const element::Type& dst_dt, const element::Type& src_dt, size_t offset) {
+    if (jcp.fuse_low_precision) {
+        load_vector(vmm_val, ptr[reg_src + offset * sizeof(float)], dst_dt, src_dt); // TODO i64 fusing
+    } else {
+        load_vector(vmm_val, ptr[reg_dst + offset * dst_dt.size()], dst_dt, src_dt);
+    }
+}
+
+template <x64::cpu_isa_t isa>
+void JitReducePostKernel<isa>::wrap_load_scalar(const Xmm& xmm_val, const element::Type& dst_dt, const element::Type& src_dt, size_t offset) {
+    if (jcp.fuse_low_precision) {
+        load_scalar(xmm_val, ptr[reg_src + offset * sizeof(float)], dst_dt, src_dt); // TODO i64 fusing
+    } else {
+        load_scalar(xmm_val, ptr[reg_dst + offset * dst_dt.size()], dst_dt, src_dt);
+    }
+}
+
+
+template class JitReduceKernel<x64::avx512_core>;
+template class JitReduceKernel<x64::avx2>;
+template class JitReduceKernel<x64::sse41>;
+
+template class JitReducePostKernel<x64::avx512_core>;
+template class JitReducePostKernel<x64::avx2>;
+template class JitReducePostKernel<x64::sse41>;
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/reduce.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/reduce.hpp
new file mode 100644
index 00000000000000..0dc30f24cd1bbc
--- /dev/null
+++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/reduce.hpp
@@ -0,0 +1,246 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "jit_kernel_base.hpp"
+#include <cpu/x64/injectors/jit_uni_depthwise_injector.hpp>
+#include <cpu/x64/injectors/jit_uni_quantization_injector.hpp>
+#include <cpu/x64/injectors/jit_uni_eltwise_injector.hpp>
+#include <emitters/x64/jit_eltwise_emitters.hpp>
+
+namespace ov {
+namespace intel_cpu {
+namespace kernel {
+
+enum ReduceLayoutType {
+    reduce_ncsp,
+    reduce_nspc,
+    reduce_blocked
+};
+
+struct JitReduceConfigParams {
+    ReduceLayoutType layout;
+    Algorithm reduce_mode;
+    bool fuse_low_precision;
+    element::Type src_el_type;
+    element::Type dst_el_type;
+};
+
+struct JitReduceCallArgs {
+    const void* src;
+    const void* idx;
+    void* dst;
+    size_t work_amount;
+    size_t work_batch;
+    size_t reduce_w = 2;    // only used in planar layout  [1: reduce width dimension]   [0: reduce other dimension] [other value: N/A]
+    size_t reduce_stride;   // only used in planar layout while reducing dimensions except for width
+    size_t can_divide;      // if apply division in reduce_kernel [1: Yes] [0: No]
+    const void* divisor;    // mean = sum / divisor
+};
+
+struct JitReducePostCallArgs {
+    const void *src;
+    void *dst;
+    size_t work_amount;
+    size_t reduce_c = 2;    // only used in blocked layout [1: reduce channel dimension] [0: reduce other dimension] [other value: N/A]
+    size_t oc_off;          // offset in byte along channel on output tensor
+    size_t channel_size;    // only for post ops fusion of nspc layout
+    const void *divisor;    // mean = sum / divisor
+    const void** post_op_data;
+};
+
+
+template<typename CallArgs>
+class JitReduceKernelBase : public JitKernel<JitReduceConfigParams, CallArgs> {
+public:
+    explicit JitReduceKernelBase(const char* name, const JitReduceConfigParams& jcp, dnnl::impl::cpu::x64::cpu_isa_t isa);
+
+    virtual ~JitReduceKernelBase() = default;
+
+    const element::Type &get_exec_prc() const {
+        return exec_el_type;
+    }
+
+protected:
+    void horiz_ps(const Xbyak::Xmm& xmm, const Xbyak::Operand& op);
+
+    template <dnnl::impl::cpu::x64::cpu_isa_t isa>
+    void horiz_qq(const Xbyak::Xmm& xmm, const Xbyak::Operand& op);
+
+    template <dnnl::impl::cpu::x64::cpu_isa_t isa>
+    void horiz_reduce_store_ps(const Xbyak::Xmm& vmm_dst, const element::Type& dst_dt, bool load_embedded = false);
+
+    template <dnnl::impl::cpu::x64::cpu_isa_t isa>
+    void horiz_reduce_store_qq(const Xbyak::Xmm& vmm_dst, const element::Type& dst_dt, bool load_embedded = false);
+
+
+    RegistersPool::Reg<Xbyak::Reg64> reg_src;
+    RegistersPool::Reg<Xbyak::Reg64> reg_dst;
+    RegistersPool::Reg<Xbyak::Reg64> reg_work_amount;
+
+    element::Type exec_el_type;
+    bool post_reduce = false;
+    bool post_ops_fusing = false;
+    bool planar_layout = false;
+    int loop_step = 1;
+
+    std::shared_ptr<jit_maximum_emitter>  max_emitter;
+    std::shared_ptr<jit_minimum_emitter>  min_emitter;
+    std::shared_ptr<jit_multiply_emitter> mul_emitter;
+};
+
+
+template <dnnl::impl::cpu::x64::cpu_isa_t isa>
+struct JitReduceKernel : public JitReduceKernelBase<JitReduceCallArgs> {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(JitReduceKernel)
+
+    explicit JitReduceKernel(const JitReduceConfigParams &jcp);
+
+    void generate() override;
+
+private:
+    using Vmm = typename dnnl::impl::utils::conditional3<isa == dnnl::impl::cpu::x64::sse41, Xbyak::Xmm,
+                                                         isa == dnnl::impl::cpu::x64::avx2,  Xbyak::Ymm,
+                                                                                             Xbyak::Zmm>::type;
+    const size_t vlen = dnnl::impl::cpu::x64::cpu_isa_traits<isa>::vlen;
+
+    Xbyak::Address table_val(int index) { return ptr[reg_table + index * vlen]; }
+
+    const Xbyak::Reg64 reg_params = Xbyak::Reg64(dnnl::impl::cpu::x64::abi_param_regs[0]);
+
+    RegistersPool::Reg<Xbyak::Reg64> reg_reduce_w;
+    RegistersPool::Reg<Xbyak::Reg64> reg_reduce_stride;
+    RegistersPool::Reg<Xbyak::Reg64> reg_work_batch;
+    RegistersPool::Reg<Xbyak::Reg64> reg_table;
+
+    RegistersPool::Reg<Vmm> v_src;
+    RegistersPool::Reg<Vmm> v_dst;
+    RegistersPool::Reg<Vmm> v_zero;
+    RegistersPool::Reg<Vmm> v_dst_aux;
+    RegistersPool::Reg<Vmm> v_idx;
+    RegistersPool::Reg<Vmm> v_ones;
+    RegistersPool::Reg<Vmm> v_abs_mask;
+
+    const Xbyak::Opmask &k_mask = k1;
+
+    Xbyak::Label l_table;
+
+    std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_eltwise_injector_f32<isa>> exp_injector;
+
+    void reduce_main();
+
+    void reduce_tail();
+
+    void init_reg_reduce_stride();
+
+    void reduce_kernel();
+
+    void reduce_once();
+
+    void reduce_batch();
+
+    void reduce_gather(const Vmm& vmm_dst, int64_t offset);
+
+    void pack_gathered_vector(const Vmm& vmm_val, const Vmm& vmm_index, int64_t offset, const element::Type& src_dt);
+
+    void reduce_kernel_tail();
+
+    void reduce_once_tail();
+
+    void reduce_batch_tail();
+
+    void reduce_main_loop();
+
+    void reduce_kernel(const Vmm& vmm_src, const Vmm& vmm_dst);
+
+    void reduce_kernel_scalar(const Xbyak::Xmm& xmm_src, const Xbyak::Xmm& xmm_dst);
+
+    void load_dst_vector();
+
+    void store_dst_vector();
+
+    void prepare_aux_table();
+
+    const struct aux_vals_type {
+        uint32_t float_one = 0x3f800000; // 1.0f
+        uint32_t float_abs = 0x7fffffff; // mask to make positive
+        uint32_t float_min = 0xff7fffff; // float lowest
+        uint32_t float_max = 0x7f7fffff; // float maximum
+        uint32_t float_int32_min = 0xcf000000; // -2^31 presented in float
+        uint32_t float_int32_max = 0x4effffff; // 2^31-1 presented in float
+
+        uint64_t double_one = 0x3ff0000000000000; // 1.0
+        uint64_t double_abs = 0x7fffffffffffffff; // mask to make positive
+        uint64_t double_min = 0xffefffffffffffff; // double lowest
+        uint64_t double_max = 0x7fefffffffffffff; // double maximum
+        uint64_t double_int64_min = 0xc3e0000000000000; // lowest int64 presented in double
+        uint64_t double_int64_max = 0x43dfffffffffffff; // max int64 presented in double
+
+        uint64_t int64_one = 0x0000000000000001; // 1
+        uint64_t int64_abs = 0x7fffffffffffffff; // mask to make positive
+        // uint64_t int64_min = 0x0000000000000000; // lowest int64
+        uint64_t int64_min = 0x8000000000000000; // lowest int64
+        uint64_t int64_max = 0x7fffffffffffffff; // max int64
+    } aux_vals;
+};
+
+template <dnnl::impl::cpu::x64::cpu_isa_t isa>
+struct JitReducePostKernel : public JitReduceKernelBase<JitReducePostCallArgs> {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(JitReducePostKernel)
+
+    explicit JitReducePostKernel(const JitReduceConfigParams& jcp, const dnnl_primitive_attr& attr);
+
+    void generate() override;
+
+private:
+    const dnnl_primitive_attr &attr;
+
+    using Vmm = typename dnnl::impl::utils::conditional3<isa == dnnl::impl::cpu::x64::sse41, Xbyak::Xmm,
+                                                         isa == dnnl::impl::cpu::x64::avx2,  Xbyak::Ymm,
+                                                                                             Xbyak::Zmm>::type;
+    const size_t vlen = dnnl::impl::cpu::x64::cpu_isa_traits<isa>::vlen;
+
+    const Xbyak::Reg64 reg_params = Xbyak::Reg64(dnnl::impl::cpu::x64::abi_param_regs[0]);
+
+    RegistersPool::Reg<Xbyak::Reg64> reg_divisor;
+    RegistersPool::Reg<Xbyak::Reg64> reg_reduce_c;
+    RegistersPool::Reg<Xbyak::Reg64> reg_oc_off;
+    RegistersPool::Reg<Xbyak::Reg64> reg_d_weights;
+    RegistersPool::Reg<Xbyak::Reg64> reg_d_bias;
+    RegistersPool::Reg<Xbyak::Reg64> reg_post_ops_data;
+
+    RegistersPool::Reg<Vmm> v_dst;
+    RegistersPool::Reg<Vmm> v_d_weights;
+    RegistersPool::Reg<Vmm> v_d_bias;
+    RegistersPool::Reg<Vmm> v_divisor;
+
+    std::shared_ptr<jit_divide_emitter>  division_emitter;
+    std::shared_ptr<jit_sqrt_emitter>    sqrt_emitter;
+    std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_eltwise_injector_f32<isa>> log_injector;
+
+    std::vector<std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_eltwise_injector_f32<isa>>> eltwise_injectors;
+    std::vector<std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_depthwise_injector_f32<isa>>> depthwise_injectors;
+    std::vector<std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_quantization_injector_f32<isa>>> quantization_injectors;
+
+    void reduce_post_main();
+
+    void reduce_post_tail();
+
+    void apply_post_ops(const element::Type& dst_dt, bool is_broadcast);
+
+    void reduce_map_kernel(const Vmm& vmm_dst);
+
+    void reduce_map_kernel_scalar(const Xbyak::Xmm& xmm_dst);
+
+    void wrap_load_vector(const Vmm& vmm_val, const element::Type& dst_dt, const element::Type& src_dt, size_t offset);
+
+    void wrap_load_scalar(const Xbyak::Xmm& xmm_val, const element::Type& dst_dt, const element::Type& src_dt, size_t offset);
+
+    void horiz_store(const Xbyak::Xmm& xmm_dst, const element::Type& dst_dt, bool load_embedded);
+};  // JitReducePostKernel
+
+}   // namespace kernel
+}   // namespace intel_cpu
+}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/registers_pool.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/registers_pool.hpp
index 1c1b6218b87bd1..743ead12eb52d6 100644
--- a/src/plugins/intel_cpu/src/nodes/kernels/x64/registers_pool.hpp
+++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/registers_pool.hpp
@@ -4,11 +4,8 @@
 
 #pragma once
 
-#include "cpu/x64/jit_generator.hpp"
-#include <dnnl_types.h>
-#include "ie_common.h"
+#include "cpu/x64/cpu_isa_traits.hpp"
 #include "utils/cpu_utils.hpp"
-#include <utility>
 
 namespace ov {
 namespace intel_cpu {
diff --git a/src/plugins/intel_cpu/src/nodes/mathematics.cpp b/src/plugins/intel_cpu/src/nodes/mathematics.cpp
index 926e09fd9770d6..5f0002e734258e 100644
--- a/src/plugins/intel_cpu/src/nodes/mathematics.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mathematics.cpp
@@ -6,7 +6,7 @@
 #include <vector>
 #include <string>
 
-#include <ngraph/ops.hpp>
+#include <openvino/opsets/opset1.hpp>
 #include "ie_parallel.hpp"
 #include "mathematics.h"
 #include "utils/general_utils.h"
@@ -18,16 +18,16 @@ namespace ov {
 namespace intel_cpu {
 namespace node {
 
-bool Math::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool Math::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
         if (initializers.find(op->get_type_info()) == initializers.end()) {
             errorMessage = "Unsupported Math layer type.";
             return false;
         }
 
-        if (one_of(op->get_type_info(), ngraph::op::v0::HardSigmoid::get_type_info_static(), ngraph::op::v0::Selu::get_type_info_static())) {
-            auto firstConst = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
-            auto secondConst = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2));
+        if (one_of(op->get_type_info(), op::v0::HardSigmoid::get_type_info_static(), op::v0::Selu::get_type_info_static())) {
+            auto firstConst = ov::as_type_ptr<op::v0::Constant>(op->get_input_node_shared_ptr(1));
+            auto secondConst = ov::as_type_ptr<op::v0::Constant>(op->get_input_node_shared_ptr(2));
             if (!firstConst || !secondConst) {
                 errorMessage = "Constant expected as the second and third inputs.";
                 return false;
@@ -39,7 +39,7 @@ bool Math::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, s
     return true;
 }
 
-Math::Math(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
+Math::Math(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context)
     : Node(op, context, PassThroughShapeInferFactory()),
       alpha(0.f),
       beta(0.f),
@@ -201,66 +201,66 @@ bool Math::created() const {
     return getType() == Type::Math;
 }
 
-std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>&, Math& node)>> Math::initializers {
-        {ngraph::op::v0::Abs::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+std::map<const ov::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ov::Node>&, Math& node)>> Math::initializers {
+        {op::v0::Abs::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathAbs;
         }},
-        {ngraph::op::v0::Acos::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v0::Acos::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathAcos;
         }},
-        {ngraph::op::v3::Acosh::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v3::Acosh::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathAcosh;
         }},
-        {ngraph::op::v0::Asin::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v0::Asin::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathAsin;
         }},
-        {ngraph::op::v3::Asinh::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v3::Asinh::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathAsinh;
         }},
-        {ngraph::op::v0::Atan::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v0::Atan::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathAtan;
         }},
-        {ngraph::op::v0::Ceiling::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v0::Ceiling::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathCeiling;
         }},
-        {ngraph::op::v0::Cos::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v0::Cos::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathCos;
         }},
-        {ngraph::op::v0::Cosh::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v0::Cosh::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathCosh;
         }},
-        {ngraph::op::v0::Floor::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v0::Floor::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathFloor;
         }},
-        {ngraph::op::v0::HardSigmoid::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v0::HardSigmoid::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathHardSigmoid;
-            node.alpha = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1))->cast_vector<float>()[0];
-            node.beta = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2))->cast_vector<float>()[0];
+            node.alpha = ov::as_type_ptr<op::v0::Constant>(op->get_input_node_shared_ptr(1))->cast_vector<float>()[0];
+            node.beta = ov::as_type_ptr<op::v0::Constant>(op->get_input_node_shared_ptr(2))->cast_vector<float>()[0];
         }},
-        {ngraph::op::v0::Negative::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v0::Negative::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathNegative;
         }},
-        {ngraph::op::v0::Selu::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v0::Selu::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathSelu;
-            node.alpha = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1))->cast_vector<float>()[0];
-            node.gamma = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2))->cast_vector<float>()[0];
+            node.alpha = ov::as_type_ptr<op::v0::Constant>(op->get_input_node_shared_ptr(1))->cast_vector<float>()[0];
+            node.gamma = ov::as_type_ptr<op::v0::Constant>(op->get_input_node_shared_ptr(2))->cast_vector<float>()[0];
         }},
-        {ngraph::op::v0::Sign::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v0::Sign::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathSign;
         }},
-        {ngraph::op::v0::Sin::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v0::Sin::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathSin;
         }},
-        {ngraph::op::v0::Sinh::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v0::Sinh::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathSinh;
         }},
-        {ngraph::op::v4::SoftPlus::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v4::SoftPlus::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathSoftPlus;
         }},
-        {ngraph::op::v0::Tan::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v0::Tan::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathTan;
         }},
-        {ngraph::op::v3::Atanh::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Math& node) {
+        {op::v3::Atanh::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Math& node) {
             node.algorithm = Algorithm::MathAtanh;
         }}
 };
diff --git a/src/plugins/intel_cpu/src/nodes/mathematics.h b/src/plugins/intel_cpu/src/nodes/mathematics.h
index 88235fb54e8b78..e8289cfb1f2117 100644
--- a/src/plugins/intel_cpu/src/nodes/mathematics.h
+++ b/src/plugins/intel_cpu/src/nodes/mathematics.h
@@ -13,7 +13,7 @@ namespace node {
 
 class Math : public Node {
 public:
-    Math(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    Math(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context);
 
     void getSupportedDescriptors() override {};
     void initSupportedPrimitiveDescriptors() override;
@@ -23,10 +23,10 @@ class Math : public Node {
     bool needPrepareParams() const override { return false; };
     void executeDynamicImpl(dnnl::stream strm) override;
 
-    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
 
 private:
-    static std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>&, Math& node)>> initializers;
+    static std::map<const ov::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ov::Node>&, Math& node)>> initializers;
 
     float alpha = 0.0f;
     float beta = 0.0f;
diff --git a/src/plugins/intel_cpu/src/nodes/non_zero.cpp b/src/plugins/intel_cpu/src/nodes/non_zero.cpp
index cbb0b134211359..1ff6a5ae02c012 100644
--- a/src/plugins/intel_cpu/src/nodes/non_zero.cpp
+++ b/src/plugins/intel_cpu/src/nodes/non_zero.cpp
@@ -7,7 +7,7 @@
 #include <nodes/common/cpu_memcpy.h>
 
 #include <ie_parallel.hpp>
-#include <ngraph/opsets/opset3.hpp>
+#include <openvino/op/non_zero.hpp>
 #include <utils/bfloat16.hpp>
 #include <utils/shape_inference/shape_inference_internal_dyn.hpp>
 
@@ -20,9 +20,9 @@ namespace node {
 static constexpr int blockSize = dnnl::impl::cpu::platform::get_cache_line_size() * 2;
 static constexpr int elementsStride = blockSize / sizeof(int);
 
-bool NonZero::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool NonZero::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        if (op->get_type_info() != ngraph::op::v3::NonZero::get_type_info_static()) {
+        if (op->get_type_info() != op::v3::NonZero::get_type_info_static()) {
             errorMessage = "Node is not an instance of NonZero from the operation set v3.";
             return false;
         }
@@ -32,38 +32,37 @@ bool NonZero::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op
     return true;
 }
 
-NonZero::NonZero(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
-    : Node(op, context, InternalDynShapeInferFactory()) {
+NonZero::NonZero(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context)
+        : Node(op, context, InternalDynShapeInferFactory()) {
     std::string errorMessage;
-    if (isSupportedOperation(op, errorMessage)) {
-        errorPrefix = "NonZero layer with name '" + getName() + "' ";
-    } else {
+    if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
     }
-    if (op->get_output_element_type(0) != ngraph::element::i32) {
-        IE_THROW() << errorPrefix << "doesn't support demanded output precision";
-    }
 }
 
 void NonZero::getSupportedDescriptors() {
     if (getParentEdges().size() != 1)
-        IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getParentEdges().size();
+        THROW_CPU_NODE_ERR << "has incorrect number of input edges: " << getParentEdges().size();
     if (!getChildEdges().size())
-        IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size();
+        THROW_CPU_NODE_ERR << "has incorrect number of output edges: " << getChildEdges().size();
 }
 
 void NonZero::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    const auto &inPrc = getOriginalInputPrecisionAtPort(0);
-    if (!one_of(inPrc, Precision::FP32, Precision::BF16, Precision::I32, Precision::U32, Precision::I8,  Precision::U8)) {
+    const auto inPrc = getOriginalInputPrecisionAtPort(0);
+    if (!one_of(inPrc, Precision::FP32, Precision::BF16, Precision::I64, Precision::I32, Precision::U32, Precision::I8,  Precision::U8)) {
         IE_THROW() << "Can't create primitive descriptor for NonZero layer with name: " << getName() << " doesn't support "
                    << inPrc.name() << " precision on 0 port";
     }
+    auto outPrc = getOriginalOutputPrecisionAtPort(0);
+    if (!one_of(outPrc, /*Precision::I64,*/ Precision::I32)) {
+        outPrc = Precision::I32;
+    }
 
     addSupportedPrimDesc({{LayoutType::ncsp}},
-                         {{LayoutType::ncsp, Precision::I32}},
+                         {{LayoutType::ncsp, outPrc}},
                          impl_desc_type::ref);
 }
 
@@ -123,7 +122,8 @@ void NonZero::execute(dnnl::stream strm) {
     OV_SWITCH(intel_cpu, NonZeroExecute, ctx, inputPrec,
               OV_CASE(Precision::FP32, float),
               OV_CASE(Precision::BF16, bfloat16_t),
-              OV_CASE(Precision::I32, int),
+              OV_CASE(Precision::I64, int64_t),
+              OV_CASE(Precision::I32, int32_t),
               OV_CASE(Precision::U32, uint32_t),
               OV_CASE(Precision::I8, int8_t),
               OV_CASE(Precision::U8, uint8_t))
diff --git a/src/plugins/intel_cpu/src/nodes/non_zero.h b/src/plugins/intel_cpu/src/nodes/non_zero.h
index 57f2683cd56eff..0e9d1fb3255703 100644
--- a/src/plugins/intel_cpu/src/nodes/non_zero.h
+++ b/src/plugins/intel_cpu/src/nodes/non_zero.h
@@ -18,7 +18,7 @@ namespace node {
 
 class NonZero : public Node {
 public:
-  NonZero(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+  NonZero(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context);
 
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
@@ -27,13 +27,12 @@ class NonZero : public Node {
     bool needShapeInfer() const override {return false;};
     bool needPrepareParams() const override {return false;};
     void executeDynamicImpl(dnnl::stream strm) override;
-    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
 
     bool isExecutable() const override { return true; }
 
 private:
     int threadsCount = 1;
-    std::string errorPrefix;
     template <typename inputType>
     void executeSpecified();
     template<typename T>
diff --git a/src/plugins/intel_cpu/src/nodes/one_hot.cpp b/src/plugins/intel_cpu/src/nodes/one_hot.cpp
index 5eefbf0131324c..2cfd84a6134cb7 100644
--- a/src/plugins/intel_cpu/src/nodes/one_hot.cpp
+++ b/src/plugins/intel_cpu/src/nodes/one_hot.cpp
@@ -2,18 +2,14 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <vector>
-#include <string>
-#include <dnnl_types.h>
+#include "one_hot.h"
+
 #include "ie_parallel.hpp"
 #include <selective_build.h>
-#include "one_hot.h"
-#include <nodes/common/blocked_desc_creator.h>
-#include <ngraph/opsets/opset1.hpp>
-#include <ie_ngraph_utils.hpp>
-#include <utils/shape_inference/static_shape.hpp>
-#include <utils/shape_inference/shape_inference.hpp>
-#include "common/cpu_memcpy.h"
+#include <openvino/opsets/opset1.hpp>
+
+#include <string>
+#include <vector>
 
 using namespace InferenceEngine;
 
@@ -51,9 +47,9 @@ class OneHotShapeInfer : public ShapeInferEmptyPads {
 
 class OneHotShapeInferFactory : public ShapeInferFactory {
 public:
-    OneHotShapeInferFactory(std::shared_ptr<ov::Node> op) : m_op(op) {}
+    OneHotShapeInferFactory(const std::shared_ptr<ov::Node> &op) : m_op(op) {}
     ShapeInferPtr makeShapeInfer() const override {
-        auto oneHot = ov::as_type_ptr<const ngraph::opset1::OneHot>(m_op);
+        auto oneHot = ov::as_type_ptr<const ov::opset1::OneHot>(m_op);
         if (!oneHot) {
             IE_THROW() << "Unexpected op type in OneHot shape inference factory: " << m_op->get_type_name();
         }
@@ -73,18 +69,17 @@ class OneHotShapeInferFactory : public ShapeInferFactory {
 
 } // namespace
 
-bool OneHot::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool OneHot::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        const auto oneHot = std::dynamic_pointer_cast<const ngraph::opset1::OneHot>(op);
-        if (!oneHot) {
+        if (op->get_type_info() != ov::opset1::OneHot::get_type_info_static()) {
             errorMessage = "Only opset1 OneHot operation is supported";
             return false;
         }
-        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(oneHot->get_input_node_shared_ptr(ON_VALUE_ID)) == nullptr) {
+        if (std::dynamic_pointer_cast<const ov::opset1::Constant>(op->get_input_node_shared_ptr(ON_VALUE_ID)) == nullptr) {
             errorMessage = "Only const 'on_value' input is supported";
             return false;
         }
-        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(oneHot->get_input_node_shared_ptr(OFF_VALUEAXES_ID)) == nullptr) {
+        if (std::dynamic_pointer_cast<const ov::opset1::Constant>(op->get_input_node_shared_ptr(OFF_VALUEAXES_ID)) == nullptr) {
             errorMessage = "Only const 'off_value' input is supported";
             return false;
         }
@@ -94,27 +89,26 @@ bool OneHot::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op,
     return true;
 }
 
-OneHot::OneHot(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
-    : Node(op, context, OneHotShapeInferFactory(op)) {
+OneHot::OneHot(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context)
+        : Node(op, context, OneHotShapeInferFactory(op)) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
     }
 
-    errorPrefix = "OneHot layer with name '" + op->get_friendly_name() + "'";
-    const auto oneHot = std::dynamic_pointer_cast<const ngraph::opset1::OneHot>(op);
-    const auto depthNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(oneHot->get_input_node_shared_ptr(DEPTH_ID));
+    const auto oneHot = std::dynamic_pointer_cast<const ov::opset1::OneHot>(op);
+    const auto depthNode = std::dynamic_pointer_cast<const ov::opset1::Constant>(oneHot->get_input_node_shared_ptr(DEPTH_ID));
     if (depthNode) {
         depth = depthNode->cast_vector<uint32_t>()[0];
     }
     axis = oneHot->get_axis();
 
     VectorDims srcDims = getInputShapeAtPort(INDICES_ID).getDims();
-    if (ngraph::is_scalar(srcDims)) {
+    if (ov::is_scalar(srcDims)) {
         srcDims = SizeVector{1};
     }
     VectorDims dstDims = getOutputShapeAtPort(0).getDims();
-    if (ngraph::is_scalar(dstDims)) {
+    if (ov::is_scalar(dstDims)) {
         dstDims = SizeVector{1};
     }
 
@@ -123,12 +117,12 @@ OneHot::OneHot(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr
         axis += output_dims_size;
     }
     if (axis < 0 || axis >= output_dims_size) {
-        IE_THROW() << errorPrefix << " has unsupported 'axis' attribute: " << oneHot->get_axis();
+        THROW_CPU_NODE_ERR << " has unsupported 'axis' attribute: " << oneHot->get_axis();
     }
 
     if (!(((1 + srcDims.size()) == dstDims.size()) ||
             (depthNode && (srcDims.size() == 1 && dstDims.size() == 1 && dstDims[0] == depth && srcDims[0] == 1))))
-        IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions!";
+        THROW_CPU_NODE_ERR << " has incorrect number of input/output dimensions!";
 }
 
 bool OneHot::needShapeInfer() const {
@@ -146,23 +140,22 @@ void OneHot::initSupportedPrimitiveDescriptors() {
         return;
 
     // check a precision of the input tensor
-    auto input_precision = getOriginalInputPrecisionAtPort(INDICES_ID);
-    if (input_precision != Precision::I32) {
-        IE_THROW() << errorPrefix << " has incorrect input precision for the input. Only I32 is supported!";
+    inputPrecision = getOriginalInputPrecisionAtPort(INDICES_ID);
+    if (!one_of(inputPrecision, Precision::I32, Precision::I64)) {
+        THROW_CPU_NODE_ERR << " has incorrect input precision for the input. Only I32 and I64 are supported!";
     }
-    output_precision = getOriginalOutputPrecisionAtPort(0);
+    outputPrecision = getOriginalOutputPrecisionAtPort(0);
 
-    addSupportedPrimDesc({{LayoutType::ncsp, input_precision},
-                          {LayoutType::ncsp, input_precision},
-                          {LayoutType::ncsp, output_precision},
-                          {LayoutType::ncsp, output_precision}},
-                         {{LayoutType::ncsp, output_precision}},
+    addSupportedPrimDesc({{LayoutType::ncsp, inputPrecision},
+                          {LayoutType::ncsp, inputPrecision},
+                          {LayoutType::ncsp, outputPrecision},
+                          {LayoutType::ncsp, outputPrecision}},
+                         {{LayoutType::ncsp, outputPrecision}},
                          impl_desc_type::ref_any);
 }
 
 template<typename out_type>
 void OneHot::one_hot(size_t prefix_size, size_t suffix_size) {
-    const auto *src_data = reinterpret_cast<const in_type *>(getParentEdgeAt(0)->getMemoryPtr()->getData());
     auto *dst_data = reinterpret_cast<out_type *>(getChildEdgeAt(0)->getMemoryPtr()->getData());
 
     const out_type on_value = reinterpret_cast<const out_type *>(getParentEdgeAt(2)->getMemoryPtr()->getData())[0];
@@ -174,16 +167,31 @@ void OneHot::one_hot(size_t prefix_size, size_t suffix_size) {
 
     // set on_value at needed locations
     auto on_val = on_value;
-    parallel_for(prefix_size, [&](std::size_t prefix_idx) {
-        const in_type* src_dataPtr = &src_data[prefix_idx * suffix_size];
-        out_type* dst_dataPtr = &dst_data[prefix_idx * depth * suffix_size];
-        for (std::size_t suffix_idx = 0; suffix_idx < suffix_size; ++suffix_idx, ++src_dataPtr, ++dst_dataPtr) {
-            auto v = static_cast<std::size_t>(*src_dataPtr);
-            if (v < depth) {
-                dst_dataPtr[v * suffix_size] = on_val;
+    if (inputPrecision == Precision::I64) {
+        const auto *src_data = reinterpret_cast<const int64_t *>(getParentEdgeAt(0)->getMemoryPtr()->getData());
+        parallel_for(prefix_size, [&](std::size_t prefix_idx) {
+            auto src_dataPtr = &src_data[prefix_idx * suffix_size];
+            out_type *dst_dataPtr = &dst_data[prefix_idx * depth * suffix_size];
+            for (std::size_t suffix_idx = 0; suffix_idx < suffix_size; ++suffix_idx, ++src_dataPtr, ++dst_dataPtr) {
+                auto v = static_cast<std::size_t>(*src_dataPtr);
+                if (v < depth) {
+                    dst_dataPtr[v * suffix_size] = on_val;
+                }
             }
-        }
-    });
+        });
+    } else {
+        const auto *src_data = reinterpret_cast<const int32_t *>(getParentEdgeAt(0)->getMemoryPtr()->getData());
+        parallel_for(prefix_size, [&](std::size_t prefix_idx) {
+            auto src_dataPtr = &src_data[prefix_idx * suffix_size];
+            out_type *dst_dataPtr = &dst_data[prefix_idx * depth * suffix_size];
+            for (std::size_t suffix_idx = 0; suffix_idx < suffix_size; ++suffix_idx, ++src_dataPtr, ++dst_dataPtr) {
+                auto v = static_cast<std::size_t>(*src_dataPtr);
+                if (v < depth) {
+                    dst_dataPtr[v * suffix_size] = on_val;
+                }
+            }
+        });
+    }
 }
 
 void OneHot::executeDynamicImpl(dnnl::stream strm) {
@@ -201,7 +209,8 @@ void OneHot::execute(dnnl::stream strm) {
     std::size_t suffix_size = getParentEdgeAt(0)->getMemory().getShape().getElementsCount() / prefix_size;
 
     OneHotContext ctx = {this, prefix_size, suffix_size};
-    OV_SWITCH(intel_cpu, OneHotExecute, ctx, output_precision.size(),
+    OV_SWITCH(intel_cpu, OneHotExecute, ctx, outputPrecision.size(),
+              OV_CASE(sizeof(uint64_t), uint64_t),
               OV_CASE(sizeof(uint32_t), uint32_t),
               OV_CASE(sizeof(uint16_t), uint16_t),
               OV_CASE(sizeof(uint8_t), uint8_t))
diff --git a/src/plugins/intel_cpu/src/nodes/one_hot.h b/src/plugins/intel_cpu/src/nodes/one_hot.h
index 9db0a066c76f8c..731b9c7da0ac28 100644
--- a/src/plugins/intel_cpu/src/nodes/one_hot.h
+++ b/src/plugins/intel_cpu/src/nodes/one_hot.h
@@ -4,12 +4,11 @@
 
 #pragma once
 
-#include <ie_common.h>
 #include <node.h>
-#include <string>
+
 #include <memory>
+#include <string>
 #include <vector>
-#include <ie_blob.h>
 
 namespace ov {
 namespace intel_cpu {
@@ -17,7 +16,7 @@ namespace node {
 
 class OneHot : public Node {
 public:
-    OneHot(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    OneHot(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context);
 
     void getSupportedDescriptors() override {};
     void initSupportedPrimitiveDescriptors() override;
@@ -29,11 +28,9 @@ class OneHot : public Node {
     bool needPrepareParams() const override { return false; };
     void executeDynamicImpl(dnnl::stream strm) override;
 
-    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
 
 private:
-    typedef InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type in_type;
-
     struct OneHotContext {
         OneHot* nodePtr;
         size_t prefix_size;
@@ -50,9 +47,8 @@ class OneHot : public Node {
     mutable Dim depth = Shape::UNDEFINED_DIM;
     int32_t axis = -1;
 
-    InferenceEngine::Precision output_precision;
-
-    std::string errorPrefix;
+    InferenceEngine::Precision inputPrecision;
+    InferenceEngine::Precision outputPrecision;
 
     static const size_t INDICES_ID = 0;
     static const size_t DEPTH_ID = 1;
diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp
index 1bd288697bef34..e98f1e2741454e 100644
--- a/src/plugins/intel_cpu/src/nodes/pooling.cpp
+++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp
@@ -11,13 +11,13 @@
 #include <oneapi/dnnl/dnnl.hpp>
 #include <string>
 #include <vector>
-#include <onednn/dnnl.h>
 #include <dnnl_extension_utils.h>
-#include <utils/general_utils.h>
 #include <memory_desc/cpu_memory_desc_utils.h>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
 #include "nodes/node_config.h"
 #include <common/primitive_hashing_utils.hpp>
+#include <openvino/op/avg_pool.hpp>
+#include <openvino/op/max_pool.hpp>
 
 // to access and change C pooling primitive desc internal padding field
 #include <common/primitive_desc_iface.hpp>
diff --git a/src/plugins/intel_cpu/src/nodes/range.cpp b/src/plugins/intel_cpu/src/nodes/range.cpp
index c7b47e55449a21..35ab3867cd17dd 100644
--- a/src/plugins/intel_cpu/src/nodes/range.cpp
+++ b/src/plugins/intel_cpu/src/nodes/range.cpp
@@ -3,7 +3,7 @@
 //
 
 #include <string>
-#include <ngraph/opsets/opset1.hpp>
+#include <openvino/opsets/opset1.hpp>
 #include "ie_parallel.hpp"
 #include "range.h"
 #include <utils/general_utils.h>
@@ -15,9 +15,9 @@ namespace ov {
 namespace intel_cpu {
 namespace node {
 
-bool Range::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool Range::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        if (!one_of(op->get_type_info(), ngraph::op::v0::Range::get_type_info_static(), ngraph::op::v4::Range::get_type_info_static())) {
+        if (!one_of(op->get_type_info(), ov::op::v0::Range::get_type_info_static(), ov::op::v4::Range::get_type_info_static())) {
             errorMessage = "Only opset1 and opset4 Range operation is supported";
             return false;
         }
@@ -27,7 +27,7 @@ bool Range::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op,
     return true;
 }
 
-Range::Range(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
+Range::Range(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context)
     : Node(op, context, InternalDynShapeInferFactory()) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
@@ -40,15 +40,15 @@ Range::Range(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr c
         IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
 
     SizeVector start_dims = op->get_input_shape(RANGE_START);
-    if (ngraph::shape_size(start_dims) != 1)
+    if (ov::shape_size(start_dims) != 1)
         IE_THROW() << errorPrefix << " has start scalar with more than 1 value";
 
     SizeVector limit_dims = op->get_input_shape(RANGE_LIMIT);
-    if (ngraph::shape_size(limit_dims) != 1)
+    if (ov::shape_size(limit_dims) != 1)
         IE_THROW() << errorPrefix << " has limit scalar with more than 1 value";
 
     SizeVector delta_dims = op->get_input_shape(RANGE_DELTA);
-    if (ngraph::shape_size(delta_dims) != 1)
+    if (ov::shape_size(delta_dims) != 1)
         IE_THROW() << errorPrefix << " has delta scalar with more than 1 value";
 
     size_t dstRank = op->get_output_partial_shape(0).size();
diff --git a/src/plugins/intel_cpu/src/nodes/range.h b/src/plugins/intel_cpu/src/nodes/range.h
index e0b424e0e06ae9..4cefbe04811e22 100644
--- a/src/plugins/intel_cpu/src/nodes/range.h
+++ b/src/plugins/intel_cpu/src/nodes/range.h
@@ -13,7 +13,7 @@ namespace node {
 
 class Range : public Node {
 public:
-    Range(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    Range(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr& context);
 
     void getSupportedDescriptors() override {};
     void initSupportedPrimitiveDescriptors() override;
diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp
index bb3992f98cb38e..2026b074cd0883 100644
--- a/src/plugins/intel_cpu/src/nodes/reduce.cpp
+++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp
@@ -4,33 +4,18 @@
 
 #include "reduce.h"
 
-#include "fake_quantize.h"
 #include "eltwise.h"
-#include <string>
-#include <vector>
-#include <set>
-#include <onednn/dnnl.h>
-#include <dnnl_extension_utils.h>
-#include "utils/bfloat16.hpp"
-#include "emitters/x64/jit_bf16_emitters.hpp"
+#include "fake_quantize.h"
 #include "ie_parallel.hpp"
-#include <algorithm>
-
-#include <cpu/x64/jit_generator.hpp>
-#include <cpu/x64/jit_uni_eltwise.hpp>
-#include <cpu/x64/injectors/jit_uni_depthwise_injector.hpp>
-#include <cpu/x64/injectors/jit_uni_quantization_injector.hpp>
-#include <cpu/x64/injectors/jit_uni_eltwise_injector.hpp>
-#include <ngraph/opsets/opset1.hpp>
-#include <ngraph/opsets/opset4.hpp>
+#include "utils/bfloat16.hpp"
+#include <ie_ngraph_utils.hpp>
+
 #include <common/primitive_hashing_utils.hpp>
 
-using namespace dnnl;
+using namespace ov::intel_cpu::node;
+using namespace ov::intel_cpu::kernel;
 using namespace InferenceEngine;
-using namespace dnnl::impl;
-using namespace dnnl::impl::cpu::x64;
-using namespace dnnl::impl::utils;
-using namespace Xbyak;
+using namespace dnnl::impl::cpu;
 
 #define SET_SRC_DIM_VALUE(batch, channel, depth, height, width) IB = batch;   \
                                                                 IC = channel; \
@@ -43,8 +28,8 @@ using namespace Xbyak;
                                                                 OH = height;  \
                                                                 OW = width;
 
-#define GET_OFF(field) offsetof(jit_reduce_call_args, field)
-#define GET_OFF_POST(field) offsetof(jit_reduce_post_call_args, field)
+#define GET_OFF(field) offsetof(JitReduceCallArgs, field)
+#define GET_OFF_POST(field) offsetof(JitReducePostCallArgs, field)
 
 #define GET_PTR_N_PLN              const uint8_t    *in_ptr_n      = in_ptr       + src_data_size * ib * IC * ID * IH * IW;               \
                                          uint8_t    *out_ptr_n     = out_ptr      + dst_data_size * ob * OC * OD * OH * OW;
@@ -69,13 +54,10 @@ using namespace Xbyak;
 #define GET_PTR_NCD_BASE_PTR_N_BLK const uint8_t    *in_ptr_ncd    = in_ptr_n     + src_data_size * (icb * ID + id) * IH * IW * blk_size; \
                                          uint8_t    *out_ptr_ncd   = out_ptr_n    + dst_data_size * (ocb * OD + od) * OH * OW * blk_size;
 
-namespace ov {
-namespace intel_cpu {
-namespace node {
 namespace {
 
 struct ReduceKey {
-    jit_reduce_config_params jcp;
+    JitReduceConfigParams jcp;
     dnnl::post_ops postOps;
 
     size_t hash() const;
@@ -90,8 +72,8 @@ size_t ReduceKey::hash() const {
     seed = hash_combine(seed, jcp.layout);
     seed = hash_combine(seed, jcp.reduce_mode);
     seed = hash_combine(seed, jcp.fuse_low_precision);
-    seed = hash_combine(seed, jcp.src_dt);
-    seed = hash_combine(seed, jcp.dst_dt);
+    seed = hash_combine(seed, (ov::element::Type_t)jcp.src_el_type);
+    seed = hash_combine(seed, (ov::element::Type_t)jcp.dst_el_type);
     seed = get_post_op_hash(seed, *postOps.get());
 
     return seed;
@@ -100,1768 +82,129 @@ size_t ReduceKey::hash() const {
 bool ReduceKey::operator==(const ReduceKey &rhs) const {
     return jcp.layout == rhs.jcp.layout && jcp.reduce_mode == rhs.jcp.reduce_mode &&
            jcp.fuse_low_precision == rhs.jcp.fuse_low_precision &&
-           jcp.src_dt == rhs.jcp.src_dt && jcp.dst_dt == rhs.jcp.dst_dt && *postOps.get() == *rhs.postOps.get();
-}
-} // namespace
-
-#if defined(OPENVINO_ARCH_X86_64)
-
-// some utility functions
-static inline bool isFloatCompatible(memory::data_type type) {
-    return memory::data_type::f32 == type || memory::data_type::bf16 == type;
+           jcp.src_el_type == rhs.jcp.src_el_type && jcp.dst_el_type == rhs.jcp.dst_el_type && *postOps.get() == *rhs.postOps.get();
 }
 
-template <cpu_isa_t isa>
-struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_generator {
-    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_reduce_kernel_f32)
-
-    explicit jit_uni_reduce_kernel_f32(jit_reduce_config_params jcp)
-    : jit_uni_reduce_kernel(jcp), jit_generator(jit_name()) {}
-
-    void create_ker() override {
-        jit_generator::create_kernel();
-        ker_ = (decltype(ker_))jit_ker();
-    }
-
-    void generate() override {
-        if (jcp_.reduce_mode == Algorithm::ReduceLogSumExp) {
-            exp_injector = std::make_shared<jit_uni_eltwise_injector_f32<isa>>(this, alg_kind::eltwise_exp, 0.f, 0.f, 1.f);
-        }
-
-        if (mayiuse(avx512_core))
-            uni_vcvtneps2bf16 = std::make_shared<jit_uni_vcvtneps2bf16>(this, isa);
-
-        this->preamble();
-
-        planar_layout = jcp_.layout == ReduceLayoutType::reduce_ncsp || jcp_.layout == ReduceLayoutType::reduce_nspc;
-
-        mov(reg_src, ptr[reg_params + GET_OFF(src)]);
-        mov(reg_dst, ptr[reg_params + GET_OFF(dst)]);
-        mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]);
-        mov(reg_work_batch, ptr[reg_params + GET_OFF(work_batch)]);
-        if (planar_layout)
-            mov(reg_reduce_w, ptr[reg_params + GET_OFF(reduce_w)]);
-
-        if (jcp_.reduce_mode == Algorithm::ReduceAnd || jcp_.reduce_mode == Algorithm::ReduceL1 || jcp_.reduce_mode == Algorithm::ReduceMax ||
-            jcp_.reduce_mode == Algorithm::ReduceMin || jcp_.reduce_mode == Algorithm::ReduceProd || jcp_.reduce_mode == Algorithm::ReduceOr) {
-            mov(reg_table, l_table);
-        }
-
-        if (isa == cpu::x64::avx512_core || jcp_.reduce_mode == Algorithm::ReduceAnd || jcp_.reduce_mode == Algorithm::ReduceOr)
-            uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
-
-        if ((isa == cpu::x64::avx512_core && jcp_.reduce_mode == Algorithm::ReduceAnd) || jcp_.reduce_mode == Algorithm::ReduceOr) {
-            uni_vmovups(vmm_aux, table_val(0));
-        }
-
-        reduce_main();
-        reduce_tail();
-
-        this->postamble();
-
-        if (mayiuse(avx512_core))
-            uni_vcvtneps2bf16->emit_data();
-
-        if (jcp_.reduce_mode == Algorithm::ReduceAnd || jcp_.reduce_mode == Algorithm::ReduceL1 || jcp_.reduce_mode == Algorithm::ReduceMax ||
-            jcp_.reduce_mode == Algorithm::ReduceMin || jcp_.reduce_mode == Algorithm::ReduceProd || jcp_.reduce_mode == Algorithm::ReduceOr) {
-            prepare_aux_table();
-        } else if (jcp_.reduce_mode == Algorithm::ReduceLogSumExp) {
-            exp_injector->prepare_table();
-        }
-    }
-
-private:
-    using Vmm = typename conditional3<isa == cpu::x64::sse41, Xbyak::Xmm, isa == cpu::x64::avx2,
-            Xbyak::Ymm, Xbyak::Zmm>::type;
-    size_t vlen = cpu_isa_traits<isa>::vlen;
-    bool planar_layout = false;
-
-    Xbyak::Address table_val(int index) { return ptr[reg_table + index * vlen]; }
-
-    Xbyak::Reg64 reg_src = r8;
-    Xbyak::Reg64 reg_dst = r9;
-    Xbyak::Reg64 reg_idx = rdx;
-    Xbyak::Reg64 reg_work_amount = r10;
-    Xbyak::Reg64 reg_reduce_w = r11;
-    Xbyak::Reg64 reg_reduce_stride = r12;
-    Xbyak::Reg64 reg_work_batch = r13;
-    Xbyak::Reg64 reg_table = r14;
-    Xbyak::Reg64 reg_params = abi_param1;
-
-    Xbyak::Reg8 reg_tmp_8 = r15b;
-    Xbyak::Reg32 reg_tmp_32 = r15d;
-    Xbyak::Reg64 reg_tmp_64 = r15;
-
-    Xbyak::Reg64 reg_src_aux = rax;
-    Xbyak::Reg64 reg_work_batch_aux = rbx;
-    Xbyak::Reg64 reg_can_divide = rbp;
-    Xbyak::Reg64 reg_divisor = reg_can_divide;
-
-    Vmm vmm_aux = Vmm(0);
-    Xmm xmm_aux = Xmm(0);
-    Vmm vmm_src = Vmm(1);
-    Xmm xmm_src = Xmm(1);
-    Vmm vmm_dst = Vmm(2);
-    Xmm xmm_dst = Xmm(2);
-    Vmm vmm_zero = Vmm(3);
-    Xmm xmm_zero = Xmm(3);
-    Vmm vmm_dst_aux = Vmm(4);
-    Xmm xmm_aux1 = Xmm(5);
-    Xmm xmm_aux2 = Xmm(6);
-    Xmm xmm_aux3 = Xmm(7);
-    Vmm vmm_idx = Vmm(8);
-    Vmm vmm_mask = Vmm(9);
-
-    const Xbyak::Opmask k_mask = Xbyak::Opmask(1);
-
-    Xbyak::Label l_table;
-
-    std::shared_ptr<jit_uni_vcvtneps2bf16> uni_vcvtneps2bf16;
-    std::shared_ptr<jit_uni_eltwise_injector_f32<isa>> exp_injector;
-
-    inline void reduce_main() {
-        // ================================================================
-        // ***isa: AVX512***
-        // ReduceAnd (Logical And)
-        // step 1: init dst 0x3f800000 (1.0f)
-        //              aux 0x3f800000 (1.0f)
-        //             zero 0x00000000 (0.0f)
-        // step 2: if src equals 0, set mask bit 0, else set mask bit 1
-        // step 3: src = mask bit == 0 ? zero : aux
-        // step 4: dst = dst & src
-        //                  src    mask_bit    new_src    dst    new_dst
-        //         case 1    ~0        1         1.0f     1.0f     1.0f
-        //         case 2     0        0         0.0f     1.0f     0.0f
-        //         case 3    ~0        1         1.0f     0.0f     0.0f
-        //         case 4     0        0         0.0f     0.0f     0.0f
-        // step 5: loop: offset src, and do step 2 and step 3
-        //
-        // ReduceOr (Logical Or)
-        // step 1: init dst 0x00000000 (0.0f)
-        //              aux 0x3f800000 (1.0f)
-        //             zero 0x00000000 (0.0f)
-        // step 2: if src equals 0, set mask bit 0, else set mask bit 1
-        // step 3: src = mask bit == 0 ? zero : aux
-        // step 4: dst = dst | src
-        //                  src    mask_bit    new_src    dst    new_dst
-        //         case 1     0        0         0.0f     0.0f     0.0f
-        //         case 2    ~0        1         1.0f     0.0f     1.0f
-        //         case 3     0        0         0.0f     1.0f     1.0f
-        //         case 4    ~0        1         1.0f     1.0f     1.0f
-        // step 5: loop: offset src, and do step 2 and step 3
-        // ================================================================
-        // ***isa: OTHER***
-        // ReduceAnd (Logical And)
-        // step 1: init dst 0x3f800000 (1.0f)
-        // step 2: if src equals 0, set it 0x00000000, else set 0xffffffff
-        // step 3: dst = dst & src
-        //         0x3f800000 = 0x3f800000 & 0xffffffff (result: 1.0f)
-        //         0x00000000 = 0x3f800000 & 0x00000000 (result: 0.0f)
-        //         0x00000000 = 0x00000000 & 0xffffffff (result: 0.0f)
-        //         0x00000000 = 0x00000000 & 0x00000000 (result: 0.0f)
-        // step 4: loop: offset src, and do step 2 and step 3
-        //
-        // ReduceOr (Logical Or)
-        // step 1: init dst 0x00000000 (0.0f)
-        //              aux 0x3f800000 (1.0f)
-        // step 2: dst = dst | src
-        //         0x00000000 = 0x00000000 | 0x00000000
-        //                  A = 0x00000000 | A
-        //                  A =          A | 0x00000000
-        //                  C =          A | B
-        // (A, B stand for number other than 0x00000000)
-        // step 3: loop: offset src, and do step 2
-        // step 4: if dst equals 0, set it 0x00000000, else set 0xffffffff
-        // step 5: dst = dst & aux
-        //         0x00000000 = 0x00000000 & 0x3f800000 (result: 0.0f)
-        //         0x3f800000 = 0xffffffff & 0x3f800000 (result: 1.0f)
-        // ================================================================
-        Xbyak::Label reduce_to_vector_label;
-        Xbyak::Label reduce_to_scalar_label;
-        Xbyak::Label reduce_to_gather_label;
-        Xbyak::Label reduce_main_end_label;
-        if (planar_layout) {
-            cmp(reg_work_batch, 0);
-            je(reduce_to_gather_label, T_NEAR);
-
-            cmp(reg_reduce_w, 1); // planar layout reducing W
-            je(reduce_to_scalar_label, T_NEAR);
-        }
-
-        // store vmm_dst directly into memory after reducing
-        // cases: [planar layout reducing other dimensions but W] [blocked layout]
-        L(reduce_to_vector_label);
-        {
-            int step = vlen / sizeof(float) < 8 ? 8 : vlen / sizeof(float);
-            cmp(reg_work_amount, step);
-            jl(reduce_main_end_label, T_NEAR); //avoid illegal loading and storing
-
-            if (jcp_.reduce_mode == Algorithm::ReduceL1) {
-                uni_vmovups(vmm_aux, table_val(1));
-            }
-
-            // load
-            load_dst_vector();
-
-            // reduce
-            reduce_kernel();
-
-            if (jcp_.reduce_mode == Algorithm::ReduceMean) {
-                Xbyak::Label reduce_divide_end_label;
-                mov(reg_can_divide, ptr[reg_params + GET_OFF(can_divide)]);
-                cmp(reg_can_divide, 0);
-                je(reduce_divide_end_label, T_NEAR);
-                {
-                    mov(reg_divisor, ptr[reg_params + GET_OFF(divisor)]);
-                    uni_vbroadcastss(vmm_aux, ptr[reg_divisor]);
-                    uni_vdivps(vmm_dst, vmm_dst, vmm_aux);
-                    if (isa == cpu::x64::sse41) {
-                        uni_vdivps(vmm_dst_aux, vmm_dst_aux, vmm_aux);
-                    }
-                }
-                L(reduce_divide_end_label);
-            }
-
-            // store
-            store_dst_vector();
-
-            jmp(reduce_main_end_label, T_NEAR);
-        }
-
-        // reduce vector in vmm_dst to be a scalar before store into memory
-        // cases: [planar layout reducing W]
-        L(reduce_to_scalar_label);
-        {
-            // init dst, dst loading is embedded in horiz_reduce_store
-            switch (jcp_.reduce_mode) {
-                case Algorithm::ReduceAnd:
-                case Algorithm::ReduceProd:
-                    uni_vmovups(vmm_dst, table_val(0));
-                    break;
-                case Algorithm::ReduceL1:
-                    uni_vmovups(vmm_aux, table_val(1));
-                    uni_vpxor(vmm_dst, vmm_dst, vmm_dst);
-                    break;
-                case Algorithm::ReduceL2:
-                case Algorithm::ReduceLogSum:
-                case Algorithm::ReduceLogSumExp:
-                case Algorithm::ReduceMean:
-                case Algorithm::ReduceOr:
-                case Algorithm::ReduceSum:
-                case Algorithm::ReduceSumSquare:
-                    uni_vpxor(vmm_dst, vmm_dst, vmm_dst);
-                    break;
-                case Algorithm::ReduceMax:
-                    if (isFloatCompatible(jcp_.dst_dt))
-                        uni_vmovups(vmm_dst, table_val(2));
-                    else
-                        uni_vmovups(vmm_dst, table_val(4));
-                    break;
-                case Algorithm::ReduceMin:
-                    if (isFloatCompatible(jcp_.dst_dt))
-                        uni_vmovups(vmm_dst, table_val(3));
-                    else
-                        uni_vmovups(vmm_dst, table_val(5));
-                    break;
-                default:
-                    assert(!"unsupported reduce mode");
-            }
-            // reduce
-            reduce_main_loop();
-            if (jcp_.reduce_mode == Algorithm::ReduceOr && isa != cpu::x64::avx512_core) {
-                uni_cmpneqps(vmm_dst, vmm_dst, vmm_zero);
-                uni_vandps(vmm_dst, vmm_dst, vmm_aux);
-            }
-            // store
-            // store after horizontal calculation and calculation with loaded original ptr[reg_dst]
-            horiz_reduce_store(vmm_dst, jcp_.dst_dt, true);
-
-            jmp(reduce_main_end_label, T_NEAR);
-        }
-
-        // load vmm_src with gather, then store vmm_dst directly into memory after reducing
-        // cases: [planar layout reducing small W]
-        L(reduce_to_gather_label);
-        {
-            int step = 1;
-            cmp(reg_work_amount, step);
-            jl(reduce_main_end_label, T_NEAR); //avoid illegal loading and storing
-
-            mov(reg_idx, ptr[reg_params + GET_OFF(idx)]);
-            uni_vmovdqu(vmm_idx, ptr[reg_idx]);
-
-            if (jcp_.reduce_mode == Algorithm::ReduceL1) {
-                uni_vmovups(vmm_aux, table_val(1));
-            }
-
-            // load
-            load_dst_vector();
-
-            // reduce
-            Xbyak::Label reduce_loop_label;
-            Xbyak::Label reduce_loop_end_label;
-            L(reduce_loop_label);
-            {
-                cmp(reg_work_amount, step);
-                jl(reduce_loop_end_label, T_NEAR);
-
-                reduce_gather(vmm_dst, 0);
-                if (isa == cpu::x64::sse41) {
-                    reduce_gather(vmm_dst_aux, 4 * jcp_.src_data_size);
-                }
-
-                add(reg_src, step * jcp_.src_data_size);
-                sub(reg_work_amount, step);
-                jmp(reduce_loop_label, T_NEAR);
-            }
-            L(reduce_loop_end_label);
-
-            // store
-            store_dst_vector();
-
-            jmp(reduce_main_end_label, T_NEAR);
-        }
-
-        L(reduce_main_end_label);
-    }
-
-    inline void reduce_tail() {
-        if (jcp_.reduce_mode == Algorithm::ReduceL1) {
-            uni_vmovups(xmm_aux, table_val(1));
-        }
-
-        Xbyak::Label tail_dst_shifted_label;
-        Xbyak::Label tail_dst_fixed_label;
-        Xbyak::Label reduce_tail_end_label;
-        if (planar_layout) {
-            cmp(reg_reduce_w, 1);  // planar layout reducing W
-            je(tail_dst_fixed_label, T_NEAR);
-        }
-
-        // each src scalar reduce to each dst scalar (X1, X2, X3, ...) -> (Y1, Y2, Y3, ...)
-        // cases: [planar layout reducing other dimensions but W] [blocked layout concern padding]
-        L(tail_dst_shifted_label);
-        {
-            reduce_kernel_tail();
-
-            jmp(reduce_tail_end_label, T_NEAR);
-        }
-
-        // each src scalar reduce to the same dst scalar (X1, X2, X3, ...) -> (Y1)
-        // cases: [planar layout reducing W]
-        L(tail_dst_fixed_label);
-        {
-            // load
-            load_scalar(xmm_dst, ptr[reg_dst], jcp_.dst_dt);
-
-            Xbyak::Label reduce_loop_label;
-            Xbyak::Label reduce_loop_end_label;
-
-            // reduce
-            int step = 1;
-            L(reduce_loop_label);
-            {
-                cmp(reg_work_amount, step);
-                jl(reduce_loop_end_label, T_NEAR);
-
-                load_scalar(xmm_src, ptr[reg_src], jcp_.src_dt);
-
-                reduce_kernel_scalar(xmm_src, xmm_dst);
-                if (jcp_.reduce_mode == Algorithm::ReduceOr) {
-                    uni_cmpneqps(xmm_dst, xmm_dst, xmm_zero);
-                    uni_vandps(xmm_dst, xmm_dst, xmm_aux);
-                }
-
-                add(reg_src, step * jcp_.src_data_size);
-                sub(reg_work_amount, step);
-
-                jmp(reduce_loop_label, T_NEAR);
-            }
-            L(reduce_loop_end_label);
-
-            // store
-            store_scalar(ptr[reg_dst], xmm_dst, jcp_.dst_dt);
-        }
-
-        L(reduce_tail_end_label);
-    }
-
-    inline void init_reg_reduce_stride() {
-        mov(reg_reduce_stride, ptr[reg_params + GET_OFF(reduce_stride)]);
-        mul_by_const(reg_reduce_stride, reg_tmp_64, jcp_.src_data_size);
-    }
-
-    inline void reduce_kernel() {
-        Xbyak::Label reduce_label;
-        Xbyak::Label reduce_end_label;
-        Xbyak::Label reduce_batch_label;
-        Xbyak::Label reduce_batch_end_label;
-
-        int step = vlen / sizeof(float) < 8 ? 8 : vlen / sizeof(float);
-        cmp(reg_work_batch, 1);
-        je(reduce_label, T_NEAR);
-
-        init_reg_reduce_stride();
-
-        L(reduce_batch_label);
-        {
-            cmp(reg_work_amount, step);
-            jl(reduce_end_label, T_NEAR);
-
-            reduce_batch();
-
-            add(reg_src, step * jcp_.src_data_size);
-            sub(reg_work_amount, step);
-            jmp(reduce_batch_label, T_NEAR);
-        }
-        L(reduce_batch_end_label);
-
-        L(reduce_label);
-        {
-            cmp(reg_work_amount, step);
-            jl(reduce_end_label, T_NEAR);
-
-            reduce_once();
-
-            add(reg_src, step * jcp_.src_data_size);
-            sub(reg_work_amount, step);
-            jmp(reduce_label, T_NEAR);
-        }
-        L(reduce_end_label);
-    }
-
-    inline void reduce_once() {
-        load_vector(vmm_src, ptr[reg_src], jcp_.src_dt);
-        reduce_kernel(vmm_src, vmm_dst);
-
-        if (isa == cpu::x64::sse41) {
-            load_vector(vmm_src, ptr[reg_src + 4 * jcp_.src_data_size], jcp_.src_dt);
-            reduce_kernel(vmm_src, vmm_dst_aux);
-        }
-    }
-
-    inline void reduce_batch() {
-        mov(reg_src_aux, reg_src);
-        mov(reg_work_batch_aux, reg_work_batch);
-
-        Xbyak::Label reduce_batch_loop_label;
-        Xbyak::Label reduce_batch_loop_end_label;
-        L(reduce_batch_loop_label);
-        {
-            cmp(reg_work_batch_aux, 1);
-            jl(reduce_batch_loop_end_label, T_NEAR);
-
-            load_vector(vmm_src, ptr[reg_src_aux], jcp_.src_dt);
-            reduce_kernel(vmm_src, vmm_dst);
-            if (isa == cpu::x64::sse41) {
-                load_vector(vmm_src, ptr[reg_src_aux + 4 * jcp_.src_data_size], jcp_.src_dt);
-                reduce_kernel(vmm_src, vmm_dst_aux);
-            }
-
-            add(reg_src_aux, reg_reduce_stride);
-            sub(reg_work_batch_aux, 1);
-            jmp(reduce_batch_loop_label, T_NEAR);
-        }
-        L(reduce_batch_loop_end_label);
-    }
-
-    inline void reduce_gather(Vmm vmm_dst, int offset) {
-        switch (jcp_.src_dt) {
-            case memory::data_type::f32:
-            case memory::data_type::s32:
-                if (isa == cpu::x64::avx512_core) {
-                    kxnord(k_mask, k_mask, k_mask);
-                    if (jcp_.src_dt == memory::data_type::f32) {
-                        vgatherdps(vmm_src | k_mask, ptr[reg_src + offset + vmm_idx]);
-                    } else {
-                        vpgatherdd(vmm_src | k_mask, ptr[reg_src + offset + vmm_idx]);
-                        uni_vcvtdq2ps(vmm_src, vmm_src);
-                    }
-                } else if (isa == cpu::x64::avx2) {
-                    uni_vpcmpeqd(vmm_mask, vmm_mask, vmm_mask);
-                    if (jcp_.src_dt == memory::data_type::f32) {
-                        vgatherdps(vmm_src, ptr[reg_src + offset + vmm_idx], vmm_mask);
-                    } else {
-                        vpgatherdd(vmm_src, ptr[reg_src + offset + vmm_idx], vmm_mask);
-                        uni_vcvtdq2ps(vmm_src, vmm_src);
-                    }
-                } else {
-                    pack_gathered_vector(vmm_src, vmm_idx, offset, jcp_.src_dt);
-                }
-                break;
-            case memory::data_type::bf16:
-            case memory::data_type::s8:
-            case memory::data_type::u8:
-                pack_gathered_vector(vmm_src, vmm_idx, offset, jcp_.src_dt);
-                break;
-            default:
-                assert(!"unknown src_dt");
-        }
-        reduce_kernel(vmm_src, vmm_dst);
-    }
-
-    inline void pack_gathered_vector(Vmm vmm_val, Vmm vmm_index, int offset, memory::data_type src_dt) {
-        sub(rsp, vlen);
-        uni_vmovdqu(ptr[rsp], vmm_index);
-        size_t repeats = vlen / sizeof(float);
-        for (size_t i = 0; i < repeats; i++) {
-            mov(reg_tmp_64.cvt32(), ptr[rsp + i * sizeof(int)]);
-            Xbyak::Address table_idx = ptr[reg_src + offset + reg_tmp_64];
-            switch (src_dt) {
-                case memory::data_type::f32:
-                case memory::data_type::s32:
-                    mov(reg_tmp_64.cvt32(), table_idx);
-                    mov(ptr[rsp + i * sizeof(int)], reg_tmp_64.cvt32());
-                    break;
-                case memory::data_type::bf16:
-                    mov(reg_tmp_64.cvt16(), table_idx);
-                    mov(ptr[rsp + i * sizeof(ov::intel_cpu::bfloat16_t)], reg_tmp_64.cvt16());
-                    break;
-                case memory::data_type::s8:
-                case memory::data_type::u8:
-                    mov(reg_tmp_64.cvt8(), table_idx);
-                    mov(ptr[rsp + i * sizeof(char)], reg_tmp_64.cvt8());
-                    break;
-                default:
-                    assert(!"unknown src_dt");
-            }
-        }
-
-        switch (src_dt) {
-            case memory::data_type::f32:
-            case memory::data_type::s32:
-                uni_vmovups(vmm_val, ptr[rsp]);
-                break;
-            case memory::data_type::bf16:
-                uni_vpmovzxwd(vmm_val, ptr[rsp]);
-                uni_vpslld(vmm_val, vmm_val, 16);
-            break;
-            case memory::data_type::s8:
-                uni_vpmovsxbd(vmm_val, ptr[rsp]);
-                break;
-            case memory::data_type::u8:
-                uni_vpmovzxbd(vmm_val, ptr[rsp]);
-                break;
-            default:
-                assert(!"unknown src_dt");
-        }
-
-        if (!isFloatCompatible(src_dt))
-            uni_vcvtdq2ps(vmm_val, vmm_val);
-        add(rsp, vlen);
-    }
-
-    inline void reduce_kernel_tail() {
-        Xbyak::Label reduce_label;
-        Xbyak::Label reduce_end_label;
-        Xbyak::Label reduce_batch_label;
-        Xbyak::Label reduce_batch_end_label;
-
-        int step = 1;
-        cmp(reg_work_batch, 1);
-        je(reduce_label, T_NEAR);
-
-        init_reg_reduce_stride();
-
-        L(reduce_batch_label);
-        {
-            cmp(reg_work_amount, step);
-            jl(reduce_end_label, T_NEAR);
-
-            // load
-            load_scalar(xmm_dst, ptr[reg_dst], jcp_.dst_dt);
-
-            // reduce
-            reduce_batch_tail();
-
-            // store
-            store_scalar(ptr[reg_dst], xmm_dst, jcp_.dst_dt);
-
-            add(reg_dst, step * jcp_.dst_data_size);
-            add(reg_src, step * jcp_.src_data_size);
-            sub(reg_work_amount, step);
-
-            jmp(reduce_batch_label, T_NEAR);
-        }
-        L(reduce_batch_end_label);
-
-        L(reduce_label);
-        {
-            cmp(reg_work_amount, step);
-            jl(reduce_end_label, T_NEAR);
-
-            // load
-            load_scalar(xmm_dst, ptr[reg_dst], jcp_.dst_dt);
-
-            // reduce
-            reduce_batch_tail();
-
-            // store
-            store_scalar(ptr[reg_dst], xmm_dst, jcp_.dst_dt);
-
-            add(reg_dst, step * jcp_.dst_data_size);
-            add(reg_src, step * jcp_.src_data_size);
-            sub(reg_work_amount, step);
-
-            jmp(reduce_label, T_NEAR);
-        }
-        L(reduce_end_label);
-    }
-
-    inline void reduce_once_tail() {
-        load_scalar(xmm_src, ptr[reg_src], jcp_.src_dt);
-        reduce_kernel_scalar(xmm_src, xmm_dst);
-        if (jcp_.reduce_mode == Algorithm::ReduceOr) {
-            uni_cmpneqps(xmm_dst, xmm_dst, xmm_zero);
-            uni_vandps(xmm_dst, xmm_dst, xmm_aux);
-        }
-    }
-
-    inline void reduce_batch_tail() {
-        mov(reg_src_aux, reg_src);
-        mov(reg_work_batch_aux, reg_work_batch);
-
-        Xbyak::Label reduce_batch_loop_label;
-        Xbyak::Label reduce_batch_loop_end_label;
-        L(reduce_batch_loop_label);
-        {
-            cmp(reg_work_batch_aux, 1);
-            jl(reduce_batch_loop_end_label, T_NEAR);
-
-            load_scalar(xmm_src, ptr[reg_src_aux], jcp_.src_dt);
-            reduce_kernel_scalar(xmm_src, xmm_dst);
-            if (jcp_.reduce_mode == Algorithm::ReduceOr) {
-                uni_cmpneqps(xmm_dst, xmm_dst, xmm_zero);
-                uni_vandps(xmm_dst, xmm_dst, xmm_aux);
-            }
-
-            add(reg_src_aux, reg_reduce_stride);
-            sub(reg_work_batch_aux, 1);
-            jmp(reduce_batch_loop_label, T_NEAR);
-        }
-        L(reduce_batch_loop_end_label);
-    }
-
-    inline void reduce_main_loop() {
-        Xbyak::Label reduce_loop_label;
-        Xbyak::Label reduce_loop_end_label;
-
-        int step = vlen / sizeof(float) < 8 ? 8 : vlen / sizeof(float);
-        L(reduce_loop_label);
-        {
-            cmp(reg_work_amount, step);
-            jl(reduce_loop_end_label, T_NEAR);
-
-            load_vector(vmm_src, ptr[reg_src], jcp_.src_dt);
-            reduce_kernel(vmm_src, vmm_dst);
-
-            if (isa == cpu::x64::sse41) {
-                load_vector(vmm_src, ptr[reg_src + 4 * jcp_.src_data_size], jcp_.src_dt);
-                reduce_kernel(vmm_src, vmm_dst);
-            }
-
-            add(reg_src, step * jcp_.src_data_size);
-            sub(reg_work_amount, step);
-
-            jmp(reduce_loop_label, T_NEAR);
-        }
-        L(reduce_loop_end_label);
-    }
-
-    inline void reduce_kernel(Vmm vmm_src, Vmm vmm_dst) {
-        switch (jcp_.reduce_mode) {
-            case Algorithm::ReduceAnd:
-                if (isa == cpu::x64::avx512_core) {
-                    vcmpps(k_mask, vmm_src, vmm_zero, _cmp_neq_uq);
-                    vblendmps(vmm_src | k_mask, vmm_zero, vmm_aux);
-                } else {
-                    uni_cmpneqps(vmm_src, vmm_src, vmm_zero);
-                }
-                uni_vandps(vmm_dst, vmm_dst, vmm_src);
-                break;
-            case Algorithm::ReduceL1:
-                uni_vandps(vmm_src, vmm_src, vmm_aux);
-                uni_vaddps(vmm_dst, vmm_dst, vmm_src);
-                break;
-            case Algorithm::ReduceLogSum:
-            case Algorithm::ReduceMean:
-            case Algorithm::ReduceSum:
-                uni_vaddps(vmm_dst, vmm_dst, vmm_src);
-                break;
-            case Algorithm::ReduceMax:
-                uni_vmaxps(vmm_dst, vmm_dst, vmm_src);
-                break;
-            case Algorithm::ReduceMin:
-                uni_vminps(vmm_dst, vmm_dst, vmm_src);
-                break;
-            case Algorithm::ReduceL2:
-            case Algorithm::ReduceSumSquare:
-                uni_vmulps(vmm_src, vmm_src, vmm_src);
-                uni_vaddps(vmm_dst, vmm_dst, vmm_src);
-                break;
-            case Algorithm::ReduceLogSumExp:
-                exp_injector->compute_vector_range(vmm_src.getIdx(), vmm_src.getIdx() + 1);
-                uni_vaddps(vmm_dst, vmm_dst, vmm_src);
-                break;
-            case Algorithm::ReduceOr:
-                if (isa == cpu::x64::avx512_core) {
-                    vcmpps(k_mask, vmm_src, vmm_zero, _cmp_neq_uq);
-                    vblendmps(vmm_src | k_mask, vmm_zero, vmm_aux);
-                }
-                uni_vorps(vmm_dst, vmm_dst, vmm_src);
-                break;
-            case Algorithm::ReduceProd:
-                uni_vmulps(vmm_dst, vmm_dst, vmm_src);
-                break;
-            default:
-                assert(!"unsupported reduce mode");
-        }
-    }
-
-    inline void reduce_kernel_scalar(Xmm xmm_src, Xmm xmm_dst) {
-        switch (jcp_.reduce_mode) {
-            case Algorithm::ReduceAnd:
-                uni_cmpneqps(xmm_src, xmm_src, xmm_zero);
-                uni_vandps(xmm_dst, xmm_dst, xmm_src);
-                break;
-            case Algorithm::ReduceL1:
-                uni_vandps(xmm_src, xmm_src, xmm_aux);
-                uni_vaddps(xmm_dst, xmm_dst, xmm_src);
-                break;
-            case Algorithm::ReduceLogSum:
-            case Algorithm::ReduceMean:
-            case Algorithm::ReduceSum:
-                uni_vaddps(xmm_dst, xmm_dst, xmm_src);
-                break;
-            case Algorithm::ReduceMax:
-                uni_vmaxps(xmm_dst, xmm_dst, xmm_src);
-                break;
-            case Algorithm::ReduceMin:
-                uni_vminps(xmm_dst, xmm_dst, xmm_src);
-                break;
-            case Algorithm::ReduceL2:
-            case Algorithm::ReduceSumSquare:
-                uni_vmulps(xmm_src, xmm_src, xmm_src);
-                uni_vaddps(xmm_dst, xmm_dst, xmm_src);
-                break;
-            case Algorithm::ReduceLogSumExp:
-                exp_injector->compute_vector_range(xmm_src.getIdx(), xmm_src.getIdx() + 1);
-                uni_vaddps(xmm_dst, xmm_dst, xmm_src);
-                break;
-            case Algorithm::ReduceOr:
-                uni_vorps(xmm_dst, xmm_dst, xmm_src);
-                break;
-            case Algorithm::ReduceProd:
-                uni_vmulps(xmm_dst, xmm_dst, xmm_src);
-                break;
-            default:
-                assert(!"unsupported reduce mode");
-        }
-    }
-
-    inline void load_dst_vector() {
-        load_vector(vmm_dst, ptr[reg_dst], jcp_.dst_dt);
-        if (isa == cpu::x64::sse41)
-            load_vector(vmm_dst_aux, ptr[reg_dst + 4 * jcp_.dst_data_size], jcp_.dst_dt);
-    }
-
-    inline void store_dst_vector() {
-        if (jcp_.reduce_mode == Algorithm::ReduceOr && isa != cpu::x64::avx512_core) {
-            uni_cmpneqps(vmm_dst, vmm_dst, vmm_zero);
-            uni_vandps(vmm_dst, vmm_dst, vmm_aux);
-
-            if (isa == cpu::x64::sse41) {
-                uni_cmpneqps(vmm_dst_aux, vmm_dst_aux, vmm_zero);
-                uni_vandps(vmm_dst_aux, vmm_dst_aux, vmm_aux);
-            }
-        }
-        store_vector(ptr[reg_dst], vmm_dst, jcp_.dst_dt);
-        if (isa == cpu::x64::sse41)
-            store_vector(ptr[reg_dst + 4 * jcp_.dst_data_size], vmm_dst_aux, jcp_.dst_dt);
-    }
-
-    inline void load_vector(Vmm vmm_src, const Xbyak::Address &op, memory::data_type src_dt) {
-        switch (src_dt) {
-            case memory::data_type::f32:
-            case memory::data_type::s32:
-                uni_vmovups(vmm_src, op);
-                break;
-            case memory::data_type::bf16:
-                uni_vpmovzxwd(vmm_src, op);
-                uni_vpslld(vmm_src, vmm_src, 16);
-                break;
-            case memory::data_type::s8:
-                uni_vpmovsxbd(vmm_src, op);
-                break;
-            case memory::data_type::u8:
-                uni_vpmovzxbd(vmm_src, op);
-                break;
-            default:
-                assert(!"unknown src_dt");
-        }
-
-        if (!isFloatCompatible(src_dt))
-            uni_vcvtdq2ps(vmm_src, vmm_src);
-    }
-
-    inline void load_scalar(Xmm xmm_src, const Xbyak::Address &op, memory::data_type src_dt) {
-        switch (src_dt) {
-            case memory::data_type::f32:
-            case memory::data_type::s32:
-                uni_vmovss(xmm_src, op);
-                break;
-            case memory::data_type::bf16:
-                uni_vpinsrw(xmm_src, xmm_src, op, 0x0);
-                uni_vpslld(xmm_src, xmm_src, 16);
-                break;
-            case memory::data_type::s8:
-                movsx(reg_tmp_32, op);
-                uni_vmovq(xmm_src, reg_tmp_64);
-                break;
-            case memory::data_type::u8:
-                movzx(reg_tmp_32, op);
-                uni_vmovq(xmm_src, reg_tmp_64);
-                break;
-            default:
-                assert(!"unknown src_dt");
-        }
-
-        if (!isFloatCompatible(src_dt)) {
-            uni_vcvtdq2ps(xmm_src, xmm_src);
-        }
-    }
-
-    inline void store_vector(const Xbyak::Address &op, Vmm vmm_dst, memory::data_type dst_dt) {
-        Xmm xmm_dst = Xmm(vmm_dst.getIdx());
-        Ymm ymm_dst = Ymm(vmm_dst.getIdx());
-
-        if (!isFloatCompatible(dst_dt)) {
-            uni_vcvtps2dq(vmm_dst, vmm_dst);
-        }
-
-        switch (dst_dt) {
-            case memory::data_type::f32:
-            case memory::data_type::s32:
-                uni_vmovups(op, vmm_dst);
-                break;
-            case memory::data_type::bf16:
-                uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())}, {static_cast<size_t>(ymm_dst.getIdx())});
-                vmovdqu16(op, ymm_dst);
-                break;
-            case memory::data_type::s8:
-                if (isa == cpu::x64::avx512_core) {
-                    vpmovsdb(op, vmm_dst);
-                } else {
-                    uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst);
-                    if (isa != cpu::x64::sse41)
-                        vpermq(ymm_dst, ymm_dst, 0x08);
-                    uni_vpacksswb(vmm_dst, vmm_dst, vmm_dst);
-                    if (isa != cpu::x64::sse41)
-                        vmovq(op, xmm_dst);
-                    else
-                        uni_vmovd(op, xmm_dst);
-                }
-                break;
-            case memory::data_type::u8:
-                if (isa == cpu::x64::avx512_core) {
-                    vpmaxsd(vmm_dst, vmm_zero, vmm_dst);
-                    vpmovusdb(op, vmm_dst);
-                } else {
-                    uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst);
-                    if (isa != cpu::x64::sse41)
-                        vpermq(ymm_dst, ymm_dst, 0x08);
-                    uni_vpackuswb(vmm_dst, vmm_dst, vmm_dst);
-                    if (isa != cpu::x64::sse41)
-                        vmovq(op, xmm_dst);
-                    else
-                        uni_vmovd(op, xmm_dst);
-                }
-                break;
-            default:
-                assert(!"unknown dst_dt");
-        }
-    }
-
-    inline void store_scalar(const Xbyak::Address &op, Xmm xmm_dst, memory::data_type dst_dt) {
-        if (!isFloatCompatible(dst_dt)) {
-            uni_vcvtps2dq(xmm_dst, xmm_dst);
-        }
-
-        switch (dst_dt) {
-            case memory::data_type::f32:
-            case memory::data_type::s32:
-                uni_vmovss(op, xmm_dst);
-                break;
-            case memory::data_type::bf16:
-                uni_vpsrld(xmm_dst, xmm_dst, 16);
-                uni_vpextrw(op, xmm_dst, 0x0);
-                break;
-            case memory::data_type::s8:
-                uni_vpackssdw(xmm_dst, xmm_dst, xmm_dst);
-                uni_vpacksswb(xmm_dst, xmm_dst, xmm_dst);
-                uni_vmovq(reg_tmp_64, xmm_dst);
-                mov(op, reg_tmp_8);
-                break;
-            case memory::data_type::u8:
-                uni_vpackusdw(xmm_dst, xmm_dst, xmm_dst);
-                uni_vpackuswb(xmm_dst, xmm_dst, xmm_dst);
-                uni_vmovq(reg_tmp_64, xmm_dst);
-                mov(op, reg_tmp_8);
-                break;
-            default:
-                assert(!"unknown dst_dt");
-        }
-    }
-
-    inline void horiz_reduce_store(Vmm vmm_dst, memory::data_type dst_dt, bool load_embedded = false) {
-        if (isa == cpu::x64::sse41) {
-            horiz_store(vmm_dst, dst_dt, load_embedded);
-        } else if (isa == cpu::x64::avx2) {
-            Xbyak::Ymm ymm_dst = Xbyak::Ymm(vmm_dst.getIdx());
-            vextractf128(xmm_aux1, ymm_dst, 0);
-            vextractf128(xmm_aux2, ymm_dst, 1);
-            horiz_ps(xmm_aux1, xmm_aux2);
-            horiz_store(xmm_aux1, dst_dt, load_embedded);
-        } else {
-            Xbyak::Zmm zmm_dst = Xbyak::Zmm(vmm_dst.getIdx());
-            vextractf32x4(xmm_aux1, zmm_dst, 0);
-            vextractf32x4(xmm_aux2, zmm_dst, 1);
-            horiz_ps(xmm_aux1, xmm_aux2);
-            vextractf32x4(xmm_aux2, zmm_dst, 2);
-            vextractf32x4(xmm_aux3, zmm_dst, 3);
-            horiz_ps(xmm_aux2, xmm_aux3);
-            horiz_ps(xmm_aux1, xmm_aux2);
-            horiz_store(xmm_aux1, dst_dt, load_embedded);
-        }
-    }
-
-    inline void horiz_store(Xbyak::Xmm xmm_dst, memory::data_type dst_dt, bool load_embedded) {
-        uni_vmovshdup(xmm_aux3, xmm_dst);          // dst:1,2,3,4; aux3:2,2,4,4
-        horiz_ps(xmm_dst, xmm_aux3);               // dst:f(1,2),f(2,2),f(3,4),f(4,4)
-        uni_vmovhlps(xmm_aux3, xmm_aux3, xmm_dst); // aux3:f(3,4),f(4,4),4,4
-        horiz_ps(xmm_dst, xmm_aux3);               // dst:f(1,2,3,4),...
-        if (load_embedded) {
-            load_scalar(xmm_aux3, ptr[reg_dst], dst_dt);
-            horiz_ps(xmm_dst, xmm_aux3);
-        }
-        store_scalar(ptr[reg_dst], xmm_dst, dst_dt);
-    }
-
-    inline void horiz_ps(const Xmm& xmm, const Operand& op) {
-        switch (jcp_.reduce_mode) {
-            case Algorithm::ReduceAnd:
-                uni_vandps(xmm, xmm, op);
-                break;
-            case Algorithm::ReduceL1:
-            case Algorithm::ReduceL2:
-            case Algorithm::ReduceLogSum:
-            case Algorithm::ReduceMean:
-            case Algorithm::ReduceSum:
-            case Algorithm::ReduceSumSquare:
-            case Algorithm::ReduceLogSumExp:
-                uni_vaddps(xmm, xmm, op);
-                break;
-            case Algorithm::ReduceMax:
-                uni_vmaxps(xmm, xmm, op);
-                break;
-            case Algorithm::ReduceMin:
-                uni_vminps(xmm, xmm, op);
-                break;
-            case Algorithm::ReduceOr:
-                uni_vorps(xmm, xmm, op);
-                break;
-            case Algorithm::ReduceProd:
-                uni_vmulps(xmm, xmm, op);
-                break;
-            default:
-                assert(!"unsupported reduce mode");
-        }
-    }
-
-    void prepare_aux_table() {
-        auto broadcast_int = [&](int val) {
-            for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-                dd(val);
-            }
-        };
-
-        align(64);
-        L(l_table);
-
-        broadcast_int(aux_vals.float_one);
-        broadcast_int(aux_vals.float_abs);
-        broadcast_int(aux_vals.float_min);
-        broadcast_int(aux_vals.float_max);
-        broadcast_int(aux_vals.int32_min);
-        broadcast_int(aux_vals.int32_max);
-    }
-
-    const struct aux_vals_type {
-        int float_one = 0x3f800000; // 1.0f
-        int float_abs = 0x7fffffff; // mask to make positive
-        int float_min = 0xff7fffff; // float minimum
-        int float_max = 0x7f7fffff; // float maximum
-        int int32_min = 0xcf000000; // -2^31 presented in float
-        int int32_max = 0x4effffff; // 2^31-1 presented in float
-    } aux_vals;
-};
-
-template <cpu_isa_t isa>
-struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, public jit_generator {
-    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_reduce_post_kernel_f32)
-
-    explicit jit_uni_reduce_post_kernel_f32(jit_reduce_config_params jcp, const dnnl_primitive_attr &attr)
-    : jit_uni_reduce_post_kernel(jcp, attr), jit_generator(jit_name()) {}
-
-    void create_ker() override {
-        jit_generator::create_kernel();
-        ker_ = (decltype(ker_))jit_ker();
-    }
-
-    void generate() override {
-        const auto &p = attr_.post_ops_;
-        for (int i = 0; i < p.len(); i++) {
-            auto &post_op = p.entry_[i];
-            if (post_op.is_eltwise()) {
-                eltwise_injectors.push_back(std::make_shared<jit_uni_eltwise_injector_f32<isa>>(
-                        this, post_op.eltwise.alg, post_op.eltwise.alpha, post_op.eltwise.beta, post_op.eltwise.scale));
-            } else if (post_op.is_depthwise()) {
-                depthwise_injectors.push_back(std::make_shared<jit_uni_depthwise_injector_f32<isa>>(
-                        this, post_op));
-            } else if (post_op.is_quantization()) {
-                quantization_injectors.push_back(std::make_shared<jit_uni_quantization_injector_f32<isa>>(
-                        this, post_op, vmm_d_weights, vmm_d_bias, reg_d_weights, reg_d_bias));
-            }
-        }
-
-        if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) {
-            log_injector = std::make_shared<jit_uni_eltwise_injector_f32<isa>>(this, alg_kind::eltwise_log, 0.f, 0.f, 1.f);
-        }
-
-        if (mayiuse(avx512_core))
-            uni_vcvtneps2bf16 = std::make_shared<jit_uni_vcvtneps2bf16>(this, isa);
-
-        this->preamble();
-
-        planar_layout = jcp_.layout == ReduceLayoutType::reduce_ncsp || jcp_.layout == ReduceLayoutType::reduce_nspc;
-        post_reduce = jcp_.reduce_mode == Algorithm::ReduceL2 || jcp_.reduce_mode == Algorithm::ReduceMean ||
-                      jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp;
-        post_ops_fusing = attr_.post_ops_.len() != 0;
-
-        mov(reg_dst, ptr[reg_params + GET_OFF_POST(dst)]);
-        mov(reg_work_amount, ptr[reg_params + GET_OFF_POST(work_amount)]);
-        mov(reg_channel_size, ptr[reg_params + GET_OFF_POST(channel_size)]);
-        mov(reg_divisor, ptr[reg_params + GET_OFF_POST(divisor)]);
-        if (jcp_.fuse_low_precision)
-            mov(reg_src, ptr[reg_params + GET_OFF_POST(src)]);
-        if (!planar_layout)
-            mov(reg_reduce_c, ptr[reg_params + GET_OFF_POST(reduce_c)]);
-        if (post_ops_fusing) {
-            mov(reg_post_ops_data, ptr[reg_params + GET_OFF_POST(post_op_data)]);
-            mov(reg_oc_off, ptr[reg_params + GET_OFF_POST(oc_off)]);
-        }
-
-        if (isa == cpu::x64::avx512_core)
-            uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
-
-        if (jcp_.layout == ReduceLayoutType::reduce_blocked) {
-            reduce_post_main();
-        } else if (jcp_.layout == ReduceLayoutType::reduce_nspc && post_ops_fusing) {
-            // the tail of channel dimension should always be concerned during post ops fusing for nspc layout
-            Xbyak::Label reduce_nspc_loop_label;
-            Xbyak::Label reduce_nspc_loop_end_label;
-            mov(reg_total_work_amount, reg_work_amount);
-            L(reduce_nspc_loop_label);
-            {
-                cmp(reg_total_work_amount, 0);
-                jle(reduce_nspc_loop_end_label, T_NEAR);
-
-                mov(reg_oc_off, 0);
-                mov(reg_work_amount, reg_channel_size);
-                reduce_post_main();
-                reduce_post_tail();
-
-                sub(reg_total_work_amount, reg_channel_size);
-                jmp(reduce_nspc_loop_label, T_NEAR);
-            }
-            L(reduce_nspc_loop_end_label);
-        } else {
-            reduce_post_main();
-            reduce_post_tail();
-        }
-
-        this->postamble();
-
-        if (mayiuse(avx512_core))
-            uni_vcvtneps2bf16->emit_data();
-
-        if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) {
-            log_injector->prepare_table();
-        }
-
-        for (auto& inj : eltwise_injectors)
-            inj->prepare_table();
-    }
-
-private:
-    using Vmm = typename conditional3<isa == cpu::x64::sse41, Xbyak::Xmm, isa == cpu::x64::avx2,
-            Xbyak::Ymm, Xbyak::Zmm>::type;
-    size_t vlen = cpu_isa_traits<isa>::vlen;
-    bool planar_layout = false;
-    bool post_reduce = true;
-    bool post_ops_fusing = false;
-
-    Xbyak::Reg64 reg_src = rbp;
-    Xbyak::Reg64 reg_dst = r8;
-    Xbyak::Reg64 reg_work_amount = r9;
-    Xbyak::Reg64 reg_total_work_amount = r10;
-    Xbyak::Reg64 reg_channel_size = r11;
-    Xbyak::Reg64 reg_divisor = r12;
-    Xbyak::Reg64 reg_reduce_c = r13;
-    Xbyak::Reg64 reg_params = abi_param1;
-
-    Xbyak::Reg8 reg_tmp_8 = r14b;
-    Xbyak::Reg32 reg_tmp_32 = r14d;
-    Xbyak::Reg64 reg_tmp_64 = r14;
-
-    Xbyak::Reg64 reg_oc_off = rax;
-    Xbyak::Reg64 reg_d_weights = rbx;
-    Xbyak::Reg64 reg_d_bias = rdx;
-    Xbyak::Reg64 reg_post_ops_data = r15;
-
-    Vmm vmm_aux = Vmm(0);
-    Xmm xmm_aux = Xmm(0);
-    Vmm vmm_dst = Vmm(1);
-    Xmm xmm_dst = Xmm(1);
-    Vmm vmm_zero = Vmm(2);
-    Vmm vmm_dst_aux = Vmm(3);
-    Xbyak::Xmm xmm_aux1 = Xbyak::Xmm(4);
-    Xbyak::Xmm xmm_aux2 = Xbyak::Xmm(5);
-    Xbyak::Xmm xmm_aux3 = Xbyak::Xmm(6);
-
-    Vmm vmm_d_weights = Vmm(7);
-    Vmm vmm_d_bias = Vmm(8);
-
-    std::shared_ptr<jit_uni_vcvtneps2bf16> uni_vcvtneps2bf16;
-    std::shared_ptr<jit_uni_eltwise_injector_f32<isa>> log_injector;
-
-    std::vector<std::shared_ptr<jit_uni_eltwise_injector_f32<isa>>> eltwise_injectors;
-    std::vector<std::shared_ptr<jit_uni_depthwise_injector_f32<isa>>> depthwise_injectors;
-    std::vector<std::shared_ptr<jit_uni_quantization_injector_f32<isa>>> quantization_injectors;
-
-    inline void reduce_post_main() {
-        Xbyak::Label reduce_channel_label;
-        Xbyak::Label reduce_map_label;
-        if (planar_layout) {
-            jmp(reduce_map_label, T_NEAR);
-        } else {
-            cmp(reg_reduce_c, 1);
-            jne(reduce_map_label, T_NEAR);
-        }
-
-        // further reduce channel block since reduce channel batch has already been reduced
-        // (X1, X2, X3, X4, X5, X6, X7, X8) -> (Y1, N/A, N/A, N/A, N/A, N/A, N/A, N/A)
-        // cases: [blocked layout reducing channel dimensions]
-        L(reduce_channel_label);
-        {
-            Xbyak::Label reduce_loop_label;
-            Xbyak::Label reduce_loop_end_label;
-
-            int step = vlen / sizeof(float) < 8 ? 8 : vlen / sizeof(float);
-            L(reduce_loop_label);
-            {
-                cmp(reg_work_amount, step);
-                jl(reduce_loop_end_label, T_NEAR);
-
-                // load
-                wrap_load_vector(vmm_dst, 0);
-                if (isa == cpu::x64::sse41)
-                    wrap_load_vector(vmm_dst_aux, 4);
-
-                // reduce and store
-                horiz_reduce_store(vmm_dst, jcp_.dst_dt);
-                if (isa == cpu::x64::sse41)
-                    horiz_reduce_store(vmm_dst_aux, jcp_.dst_dt, true);
-
-                add(reg_dst, step * jcp_.dst_data_size);
-                if (jcp_.fuse_low_precision)
-                    add(reg_src, step * sizeof(float));
-                sub(reg_work_amount, step);
-
-                jmp(reduce_loop_label, T_NEAR);
-            }
-            L(reduce_loop_end_label);
-
-            if (post_reduce || post_ops_fusing) {
-                mov(reg_dst, ptr[reg_params + GET_OFF_POST(dst)]);
-                if (jcp_.fuse_low_precision)
-                    mov(reg_src, ptr[reg_params + GET_OFF_POST(src)]);
-                mov(reg_work_amount, ptr[reg_params + GET_OFF_POST(work_amount)]);
-            }
-        }
-
-        // reduce map for value in dst memory
-        // cases: [ReduceL2] [ReduceLogSum] [ReduceLogSumExp] [ReduceMean]
-        L(reduce_map_label);
-        {
-            if (post_reduce) {
-                if (jcp_.reduce_mode == Algorithm::ReduceMean)
-                    uni_vbroadcastss(vmm_aux, ptr[reg_divisor]);
-
-                Xbyak::Label reduce_loop_label;
-                Xbyak::Label reduce_loop_end_label;
-
-                int step = vlen / sizeof(float) < 8 ? 8 : vlen / sizeof(float);
-                L(reduce_loop_label);
-                {
-                    cmp(reg_work_amount, step);
-                    jl(reduce_loop_end_label, T_NEAR);
-
-                    wrap_load_vector(vmm_dst, 0);
-                    reduce_map_kernel(vmm_dst);
-                    if (post_ops_fusing)
-                        apply_post_ops(jcp_.dst_dt, jcp_.layout == ReduceLayoutType::reduce_ncsp);
-                    store_vector(ptr[reg_dst], vmm_dst, jcp_.dst_dt);
-
-                    if (isa == cpu::x64::sse41) {
-                        wrap_load_vector(vmm_dst, 4);
-                        reduce_map_kernel(vmm_dst);
-                        if (post_ops_fusing) {
-                            if (jcp_.layout != ReduceLayoutType::reduce_ncsp)
-                                add(reg_oc_off, 4 * sizeof(float));
-                            apply_post_ops(jcp_.dst_dt, jcp_.layout == ReduceLayoutType::reduce_ncsp);
-                            if (jcp_.layout != ReduceLayoutType::reduce_ncsp)
-                                sub(reg_oc_off, 4 * sizeof(float));
-                        }
-                        store_vector(ptr[reg_dst + 4 * jcp_.dst_data_size], vmm_dst, jcp_.dst_dt);
-                    }
-
-                    add(reg_dst, step * jcp_.dst_data_size);
-                    if (jcp_.fuse_low_precision)
-                        add(reg_src, step * sizeof(float));
-                    if (jcp_.layout == ReduceLayoutType::reduce_nspc && post_ops_fusing)
-                        add(reg_oc_off, step * sizeof(float));
-                    sub(reg_work_amount, step);
-
-                    jmp(reduce_loop_label, T_NEAR);
-                }
-                L(reduce_loop_end_label);
-            } else {
-                if (post_ops_fusing) {
-                    Xbyak::Label reduce_loop_label;
-                    Xbyak::Label reduce_loop_end_label;
-
-                    int step = vlen / sizeof(float) < 8 ? 8 : vlen / sizeof(float);
-                    L(reduce_loop_label);
-                    {
-                        cmp(reg_work_amount, step);
-                        jl(reduce_loop_end_label, T_NEAR);
-
-                        wrap_load_vector(vmm_dst, 0);
-                        apply_post_ops(jcp_.dst_dt, jcp_.layout == ReduceLayoutType::reduce_ncsp);
-                        store_vector(ptr[reg_dst], vmm_dst, jcp_.dst_dt);
-
-                        if (isa == cpu::x64::sse41) {
-                            wrap_load_vector(vmm_dst, 4);
-                            if (jcp_.layout != ReduceLayoutType::reduce_ncsp)
-                                add(reg_oc_off, 4 * sizeof(float));
-                            apply_post_ops(jcp_.dst_dt, jcp_.layout == ReduceLayoutType::reduce_ncsp);
-                            if (jcp_.layout != ReduceLayoutType::reduce_ncsp)
-                                sub(reg_oc_off, 4 * sizeof(float));
-                            store_vector(ptr[reg_dst + 4 * jcp_.dst_data_size], vmm_dst, jcp_.dst_dt);
-                        }
-
-                        add(reg_dst, step * jcp_.dst_data_size);
-                        if (jcp_.fuse_low_precision)
-                            add(reg_src, step * sizeof(float));
-                        if (jcp_.layout == ReduceLayoutType::reduce_nspc && post_ops_fusing)
-                            add(reg_oc_off, step * sizeof(float));
-                        sub(reg_work_amount, step);
-
-                        jmp(reduce_loop_label, T_NEAR);
-                    }
-                    L(reduce_loop_end_label);
-                }
-            }
-        }
-    }
-
-    inline void reduce_post_tail() {
-        // reduce map for tail in dst memory
-        // cases: [ReduceL2] [ReduceLogSum] [ReduceLogSumExp] [ReduceMean] in planar layout
-        if (post_reduce) {
-            if (jcp_.reduce_mode == Algorithm::ReduceMean)
-                uni_vbroadcastss(xmm_aux, ptr[reg_divisor]);
-
-            Xbyak::Label reduce_loop_label;
-            Xbyak::Label reduce_loop_end_label;
-
-            int step = 1;
-            L(reduce_loop_label);
-            {
-                cmp(reg_work_amount, step);
-                jl(reduce_loop_end_label, T_NEAR);
-
-                // load
-                wrap_load_scalar(xmm_dst, 0);
-
-                // reduce
-                reduce_map_kernel_scalar(xmm_dst);
-
-                // store
-                if (post_ops_fusing)
-                    apply_post_ops(jcp_.dst_dt, jcp_.layout == ReduceLayoutType::reduce_ncsp);
-                store_scalar(ptr[reg_dst], xmm_dst, jcp_.dst_dt);
-
-                add(reg_dst, step * jcp_.dst_data_size);
-                if (jcp_.fuse_low_precision)
-                    add(reg_src, step * sizeof(float));
-                if (jcp_.layout == ReduceLayoutType::reduce_nspc && post_ops_fusing)
-                    add(reg_oc_off, step * sizeof(float));
-                sub(reg_work_amount, step);
-
-                jmp(reduce_loop_label, T_NEAR);
-            }
-            L(reduce_loop_end_label);
-        } else {
-            if (post_ops_fusing) {
-                Xbyak::Label reduce_loop_label;
-                Xbyak::Label reduce_loop_end_label;
-
-                int step = 1;
-                L(reduce_loop_label);
-                {
-                    cmp(reg_work_amount, step);
-                    jl(reduce_loop_end_label, T_NEAR);
-
-                    // load
-                    wrap_load_scalar(xmm_dst, 0);
-
-                    // store
-                    apply_post_ops(jcp_.dst_dt, jcp_.layout == ReduceLayoutType::reduce_ncsp);
-                    store_scalar(ptr[reg_dst], xmm_dst, jcp_.dst_dt);
-
-                    add(reg_dst, step * jcp_.dst_data_size);
-                    if (jcp_.fuse_low_precision)
-                        add(reg_src, step * sizeof(float));
-                    if (jcp_.layout == ReduceLayoutType::reduce_nspc && post_ops_fusing)
-                        add(reg_oc_off, step * sizeof(float));
-                    sub(reg_work_amount, step);
-
-                    jmp(reduce_loop_label, T_NEAR);
-                }
-                L(reduce_loop_end_label);
-            }
-        }
-    }
-
-    void apply_post_ops(memory::data_type dst_dt, bool is_broadcast) {
-        const auto &p = attr_.post_ops_;
-        int eltwise_inj_idx = 0;
-        int depthwise_inj_idx = 0;
-        int quantization_inj_idx = 0;
-        int post_ops_data_offset = 0;
-        for (int i = 0; i < p.len(); i++) {
-            auto& post_op = p.entry_[i];
-            if (post_op.is_eltwise()) {
-                eltwise_injectors[eltwise_inj_idx]->compute_vector_range(vmm_dst.getIdx(), vmm_dst.getIdx() + 1);
-                eltwise_inj_idx++;
-            } else if (post_op.is_depthwise()) {
-                mov(reg_d_weights, ptr[reg_post_ops_data + post_ops_data_offset]);
-                add(reg_d_weights, reg_oc_off);
-
-                depthwise_injectors[depthwise_inj_idx]->compute_vector_range(
-                        vmm_dst.getIdx(), vmm_dst.getIdx() + 1, reg_d_weights, reg_d_weights, is_broadcast);
-
-                post_ops_data_offset += depthwise_injectors[depthwise_inj_idx]->memoryStep();
-                depthwise_inj_idx++;
-            } else if (post_op.is_quantization()) {
-                bool do_dequantization = post_op.quantization.alg == alg_kind::quantization_quantize_dequantize;
-                bool do_rounding = do_dequantization || isFloatCompatible(dst_dt) || i != p.len() - 1;
-
-                int s_idx = vmm_dst.getIdx();
-
-                quantization_injectors[quantization_inj_idx]->init_crop_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
-                quantization_injectors[quantization_inj_idx]->compute_crop(s_idx, s_idx + 1, 0, 0, is_broadcast);
-
-                quantization_injectors[quantization_inj_idx]->init_input_scale_shift_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
-                quantization_injectors[quantization_inj_idx]->compute_input_scale_shift(s_idx, s_idx + 1, 0, do_rounding, 0, is_broadcast);
-
-                if (do_dequantization) {
-                    quantization_injectors[quantization_inj_idx]->init_output_scale_shift_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
-                    quantization_injectors[quantization_inj_idx]->compute_output_scale_shift(s_idx, s_idx + 1, 0, 0, is_broadcast);
-                }
-
-                post_ops_data_offset += quantization_injectors[quantization_inj_idx]->memoryStep();
-                quantization_inj_idx++;
-            }
-        }
-    }
-
-    inline void reduce_map_kernel(Vmm vmm_dst) {
-        if (jcp_.reduce_mode == Algorithm::ReduceMean)
-            uni_vdivps(vmm_dst, vmm_dst, vmm_aux);
-        else if (jcp_.reduce_mode == Algorithm::ReduceL2)
-            uni_vsqrtps(vmm_dst, vmm_dst);
-        else if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp)
-            log_injector->compute_vector_range(vmm_dst.getIdx(), vmm_dst.getIdx() + 1);
-    }
-
-    inline void reduce_map_kernel_scalar(Xmm xmm_dst) {
-        if (jcp_.reduce_mode == Algorithm::ReduceMean)
-            uni_vdivps(xmm_dst, xmm_dst, xmm_aux);
-        else if (jcp_.reduce_mode == Algorithm::ReduceL2)
-            uni_vsqrtps(xmm_dst, xmm_dst);
-        else if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp)
-            log_injector->compute_vector_range(xmm_dst.getIdx(), xmm_dst.getIdx() + 1);
-    }
-
-    inline void wrap_load_vector(Vmm vmm_val, size_t offset) {
-        if (jcp_.fuse_low_precision)
-            load_vector(vmm_val, ptr[reg_src + offset * sizeof(float)], memory::data_type::f32);
-        else
-            load_vector(vmm_val, ptr[reg_dst + offset * jcp_.dst_data_size], jcp_.dst_dt);
-    }
-
-    inline void wrap_load_scalar(Xmm xmm_val, size_t offset) {
-        if (jcp_.fuse_low_precision)
-            load_scalar(xmm_val, ptr[reg_src + offset * sizeof(float)], memory::data_type::f32);
-        else
-            load_scalar(xmm_val, ptr[reg_dst + offset * jcp_.dst_data_size], jcp_.dst_dt);
-    }
-
-    inline void load_vector(Vmm vmm_src, const Xbyak::Address &op, memory::data_type src_dt) {
-        switch (src_dt) {
-            case memory::data_type::f32:
-            case memory::data_type::s32:
-                uni_vmovups(vmm_src, op);
-                break;
-            case memory::data_type::bf16:
-                uni_vpmovzxwd(vmm_src, op);
-                uni_vpslld(vmm_src, vmm_src, 16);
-                break;
-            case memory::data_type::s8:
-                uni_vpmovsxbd(vmm_src, op);
-                break;
-            case memory::data_type::u8:
-                uni_vpmovzxbd(vmm_src, op);
-                break;
-            default:
-                assert(!"unknown src_dt");
-        }
-
-        if (!isFloatCompatible(src_dt))
-            uni_vcvtdq2ps(vmm_src, vmm_src);
-    }
-
-    inline void load_scalar(Xmm xmm_src, const Xbyak::Address &op, memory::data_type src_dt) {
-        switch (src_dt) {
-            case memory::data_type::f32:
-            case memory::data_type::s32:
-                uni_vmovss(xmm_src, op);
-                break;
-            case memory::data_type::bf16:
-                uni_vpinsrw(xmm_src, xmm_src, op, 0x0);
-                uni_vpslld(xmm_src, xmm_src, 16);
-                break;
-            case memory::data_type::s8:
-                movsx(reg_tmp_32, op);
-                uni_vmovq(xmm_src, reg_tmp_64);
-                break;
-            case memory::data_type::u8:
-                movzx(reg_tmp_32, op);
-                uni_vmovq(xmm_src, reg_tmp_64);
-                break;
-            default:
-                assert(!"unknown src_dt");
-        }
-
-        if (!isFloatCompatible(src_dt)) {
-            uni_vcvtdq2ps(xmm_src, xmm_src);
-        }
-    }
-
-    inline void store_vector(const Xbyak::Address &op, Vmm vmm_dst, memory::data_type dst_dt) {
-        Xmm xmm_dst = Xmm(vmm_dst.getIdx());
-        Ymm ymm_dst = Ymm(vmm_dst.getIdx());
-
-        if (!isFloatCompatible(dst_dt)) {
-            uni_vcvtps2dq(vmm_dst, vmm_dst);
-        }
-
-        switch (dst_dt) {
-            case memory::data_type::f32:
-            case memory::data_type::s32:
-                uni_vmovups(op, vmm_dst);
-                break;
-            case memory::data_type::bf16:
-                uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())}, {static_cast<size_t>(ymm_dst.getIdx())});
-                vmovdqu16(op, ymm_dst);
-                break;
-            case memory::data_type::s8:
-                if (isa == cpu::x64::avx512_core) {
-                    vpmovsdb(op, vmm_dst);
-                } else {
-                    uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst);
-                    if (isa != cpu::x64::sse41)
-                        vpermq(ymm_dst, ymm_dst, 0x08);
-                    uni_vpacksswb(vmm_dst, vmm_dst, vmm_dst);
-                    if (isa != cpu::x64::sse41)
-                        vmovq(op, xmm_dst);
-                    else
-                        uni_vmovd(op, xmm_dst);
-                }
-                break;
-            case memory::data_type::u8:
-                if (isa == cpu::x64::avx512_core) {
-                    vpmaxsd(vmm_dst, vmm_zero, vmm_dst);
-                    vpmovusdb(op, vmm_dst);
-                } else {
-                    uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst);
-                    if (isa != cpu::x64::sse41)
-                        vpermq(ymm_dst, ymm_dst, 0x08);
-                    uni_vpackuswb(vmm_dst, vmm_dst, vmm_dst);
-                    if (isa != cpu::x64::sse41)
-                        vmovq(op, xmm_dst);
-                    else
-                        uni_vmovd(op, xmm_dst);
-                }
-                break;
-            default:
-                assert(!"unknown dst_dt");
-        }
-    }
-
-    inline void store_scalar(const Xbyak::Address &op, Xmm xmm_dst, memory::data_type dst_dt) {
-        if (!isFloatCompatible(dst_dt)) {
-            uni_vcvtps2dq(xmm_dst, xmm_dst);
-        }
-
-        switch (dst_dt) {
-            case memory::data_type::f32:
-            case memory::data_type::s32:
-                uni_vmovss(op, xmm_dst);
-                break;
-            case memory::data_type::bf16:
-                uni_vpsrld(xmm_dst, xmm_dst, 16);
-                uni_vpextrw(op, xmm_dst, 0x0);
-                break;
-            case memory::data_type::s8:
-                uni_vpackssdw(xmm_dst, xmm_dst, xmm_dst);
-                uni_vpacksswb(xmm_dst, xmm_dst, xmm_dst);
-                uni_vmovq(reg_tmp_64, xmm_dst);
-                mov(op, reg_tmp_8);
-                break;
-            case memory::data_type::u8:
-                uni_vpackusdw(xmm_dst, xmm_dst, xmm_dst);
-                uni_vpackuswb(xmm_dst, xmm_dst, xmm_dst);
-                uni_vmovq(reg_tmp_64, xmm_dst);
-                mov(op, reg_tmp_8);
-                break;
-            default:
-                assert(!"unknown dst_dt");
-        }
-    }
-
-    inline void horiz_reduce_store(Vmm vmm_dst, memory::data_type dst_dt, bool load_embedded = false) {
-        if (isa == cpu::x64::sse41) {
-            horiz_store(vmm_dst, dst_dt, load_embedded);
-        } else if (isa == cpu::x64::avx2) {
-            Xbyak::Ymm ymm_dst = Xbyak::Ymm(vmm_dst.getIdx());
-            vextractf128(xmm_aux1, ymm_dst, 0);
-            vextractf128(xmm_aux2, ymm_dst, 1);
-            horiz_ps(xmm_aux1, xmm_aux2);
-            horiz_store(xmm_aux1, dst_dt, load_embedded);
-        } else {
-            Xbyak::Zmm zmm_dst = Xbyak::Zmm(vmm_dst.getIdx());
-            vextractf32x4(xmm_aux1, zmm_dst, 0);
-            vextractf32x4(xmm_aux2, zmm_dst, 1);
-            horiz_ps(xmm_aux1, xmm_aux2);
-            vextractf32x4(xmm_aux2, zmm_dst, 2);
-            vextractf32x4(xmm_aux3, zmm_dst, 3);
-            horiz_ps(xmm_aux2, xmm_aux3);
-            horiz_ps(xmm_aux1, xmm_aux2);
-            horiz_store(xmm_aux1, dst_dt, load_embedded);
-        }
-    }
-
-    inline void horiz_store(Xbyak::Xmm xmm_dst, memory::data_type dst_dt, bool load_embedded) {
-        uni_vmovshdup(xmm_aux3, xmm_dst);          // dst:1,2,3,4; aux3:2,2,4,4
-        horiz_ps(xmm_dst, xmm_aux3);               // dst:f(1,2),f(2,2),f(3,4),f(4,4)
-        uni_vmovhlps(xmm_aux3, xmm_aux3, xmm_dst); // aux3:f(3,4),f(4,4),4,4
-        horiz_ps(xmm_dst, xmm_aux3);               // dst:f(1,2,3,4),...
-        if (jcp_.fuse_low_precision && (post_reduce || post_ops_fusing)) {
-            if (load_embedded) {
-                load_scalar(xmm_aux3, ptr[reg_src], memory::data_type::f32);
-                horiz_ps(xmm_dst, xmm_aux3);
-            }
-            store_scalar(ptr[reg_src], xmm_dst, memory::data_type::f32);
-        } else {
-            if (load_embedded) {
-                load_scalar(xmm_aux3, ptr[reg_dst], dst_dt);
-                horiz_ps(xmm_dst, xmm_aux3);
-            }
-            store_scalar(ptr[reg_dst], xmm_dst, dst_dt);
-        }
-    }
-
-    inline void horiz_ps(const Xmm& xmm, const Operand& op) {
-        switch (jcp_.reduce_mode) {
-            case Algorithm::ReduceAnd:
-                uni_vandps(xmm, xmm, op);
-                break;
-            case Algorithm::ReduceL1:
-            case Algorithm::ReduceL2:
-            case Algorithm::ReduceLogSum:
-            case Algorithm::ReduceMean:
-            case Algorithm::ReduceSum:
-            case Algorithm::ReduceSumSquare:
-            case Algorithm::ReduceLogSumExp:
-                uni_vaddps(xmm, xmm, op);
-                break;
-            case Algorithm::ReduceMax:
-                uni_vmaxps(xmm, xmm, op);
-                break;
-            case Algorithm::ReduceMin:
-                uni_vminps(xmm, xmm, op);
-                break;
-            case Algorithm::ReduceOr:
-                uni_vorps(xmm, xmm, op);
-                break;
-            case Algorithm::ReduceProd:
-                uni_vmulps(xmm, xmm, op);
-                break;
-            default:
-                assert(!"unsupported reduce mode");
-        }
-    }
-};
-
-#endif // OPENVINO_ARCH_X86_64
+} // namespace
 
-const std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>&, Reduce&)>> Reduce::initializers = {
-    {ngraph::opset4::ReduceL1::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Reduce& node) {
+const std::map<const ov::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ov::Node>&, Reduce&)>> Reduce::initializers = {
+    {op::v4::ReduceL1::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Reduce& node) {
         node.algorithm = Algorithm::ReduceL1;
     }},
-    {ngraph::opset4::ReduceL2::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Reduce& node) {
+    {op::v4::ReduceL2::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Reduce& node) {
         node.algorithm = Algorithm::ReduceL2;
     }},
-    {ngraph::opset1::ReduceLogicalAnd::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Reduce& node) {
+    {op::v1::ReduceLogicalAnd::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Reduce& node) {
         node.algorithm = Algorithm::ReduceAnd;
     }},
-    {ngraph::opset1::ReduceLogicalOr::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Reduce& node) {
+    {op::v1::ReduceLogicalOr::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Reduce& node) {
         node.algorithm = Algorithm::ReduceOr;
     }},
-    {ngraph::opset1::ReduceMax::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Reduce& node) {
+    {op::v1::ReduceMax::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Reduce& node) {
         node.algorithm = Algorithm::ReduceMax;
     }},
-    {ngraph::opset1::ReduceMean::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Reduce& node) {
+    {op::v1::ReduceMean::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Reduce& node) {
         node.algorithm = Algorithm::ReduceMean;
     }},
-    {ngraph::opset1::ReduceMin::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Reduce& node) {
+    {op::v1::ReduceMin::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Reduce& node) {
         node.algorithm = Algorithm::ReduceMin;
     }},
-    {ngraph::opset1::ReduceProd::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Reduce& node) {
+    {op::v1::ReduceProd::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Reduce& node) {
         node.algorithm = Algorithm::ReduceProd;
     }},
-    {ngraph::opset1::ReduceSum::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, Reduce& node) {
+    {op::v1::ReduceSum::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Reduce& node) {
         node.algorithm = Algorithm::ReduceSum;
     }}
 };
 
-bool Reduce::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool Reduce::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        if (std::dynamic_pointer_cast<const ngraph::op::util::ArithmeticReductionKeepDims>(op) == nullptr &&
-                std::dynamic_pointer_cast<const ngraph::op::util::LogicalReductionKeepDims>(op) == nullptr) {
+        if (!op->get_type_info().is_castable(op::util::ArithmeticReductionKeepDims::get_type_info_static()) &&
+                !op->get_type_info().is_castable(op::util::LogicalReductionKeepDims::get_type_info_static())) {
             errorMessage = "Reduce node with name " + op->get_friendly_name() + " is not derived from ArithmeticReductionKeepDims or LogicalReductionKeepDims";
             return false;
         }
-        if (const auto reduce = std::dynamic_pointer_cast<const ngraph::op::util::ArithmeticReductionKeepDims>(op)) {
-            auto reduceConst = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(reduce->get_input_node_shared_ptr(REDUCE_INDEXES));
-            if (!reduceConst) {
-                errorMessage = "Second tensor is not constant";
-                return false;
-            }
+        const auto idxIn = op->get_input_node_shared_ptr(REDUCE_INDEXES);
+        if (idxIn->get_type_info() != op::v0::Constant::get_type_info_static()) {
+            errorMessage = "Only const 'reduce_indexes' input is supported";
+            return false;
         }
-        if (const auto reduce = std::dynamic_pointer_cast<const ngraph::op::util::LogicalReductionKeepDims>(op)) {
-            auto reduceConst = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(reduce->get_input_node_shared_ptr(REDUCE_INDEXES));
-            if (!reduceConst) {
-                errorMessage = "Second tensor is not constant";
-                return false;
-            }
+        if (idxIn->get_element_type() != ov::element::i32 && idxIn->get_element_type() != ov::element::i64) {
+            errorMessage = "Only i32 and i64 'reduce_indexes' input is supported";
+            return false;
         }
         if (initializers.find(op->get_type_info()) == initializers.end()) {
             errorMessage = "Doesn't support Reduce algorithm: " +  std::string(op->get_type_info().name);
             return false;
         }
-        if (std::dynamic_pointer_cast<ngraph::opset1::Constant>(op->get_input_node_shared_ptr(REDUCE_INDEXES)) == nullptr) {
-            errorMessage = "Only const 'reduce_indexes' input is supported";
-            return false;
-        }
     } catch (...) {
         return false;
     }
     return true;
 }
 
-Reduce::Reduce(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
+Reduce::Reduce(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
         : Node(op, context, NgraphShapeInferFactory(op, PortMask(REDUCE_INDEXES))) {
     std::string errorMessage;
-    if (isSupportedOperation(op, errorMessage)) {
-        errorPrefix = "Reduce node with name '" + getName() + "'";
-        initializers.at(op->get_type_info())(op, *this);
-        if (const auto reduce = std::dynamic_pointer_cast<ngraph::op::util::ArithmeticReductionKeepDims>(op)) {
-            keep_dims = reduce->get_keep_dims();
-            auto reduceConst = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(reduce->get_input_node_shared_ptr(REDUCE_INDEXES));
-            if (!reduceConst)
-                IE_THROW() << errorPrefix << " second tensor is not constant!";
-            raw_axes = reduceConst->cast_vector<int>();
-        } else if (const auto reduce = std::dynamic_pointer_cast<ngraph::op::util::LogicalReductionKeepDims>(op)) {
-            keep_dims = reduce->get_keep_dims();
-            auto reduceConst = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(reduce->get_input_node_shared_ptr(REDUCE_INDEXES));
-            if (!reduceConst)
-                IE_THROW() << errorPrefix << " second tensor is not constant!";
-            raw_axes = reduceConst->cast_vector<int>();
-        }
-        set_use_aux_kernel = false;
-        fuse_low_precision = false;
-        vec_reduceDH_prc.clear();
-        vec_reduceCDW_prc.clear();
-        setJITBeyond5D();
-    } else {
+    if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
     }
+
+    initializers.at(op->get_type_info())(op, *this);
+
+    if (const auto reduction = std::dynamic_pointer_cast<op::util::ReductionBase>(op)) {
+        keep_dims = reduction->get_keep_dims();
+    }
+    const auto idxIn = ov::as_type_ptr<op::v0::Constant>(op->get_input_node_shared_ptr(REDUCE_INDEXES));
+    if (idxIn->get_element_type() == ov::element::i32) {
+        const auto tmpData = idxIn->get_vector<int32_t>();
+        raw_axes.assign(tmpData.begin(), tmpData.end());
+    } else if (idxIn->get_element_type() == ov::element::i64) {
+        raw_axes = idxIn->get_vector<int64_t>();
+    }
+
+    set_use_aux_kernel = false;
+    fuse_low_precision = false;
+    vec_reduceDH_prc.clear();
+    vec_reduceCDW_prc.clear();
+    setJITBeyond5D();
 }
 
 void Reduce::getSupportedDescriptors() {
-    if (getParentEdges().size() != 2)
-        IE_THROW() << errorPrefix << " gets incorrect number of input edges!";
-    if (getChildEdges().empty())
-        IE_THROW() << errorPrefix << " gets incorrect number of output edges!";
+    if (getParentEdges().size() != 2) {
+        THROW_CPU_NODE_ERR << " gets incorrect number of input edges!";
+    }
+    if (getChildEdges().empty()) {
+        THROW_CPU_NODE_ERR << " gets incorrect number of output edges!";
+    }
 
     if (getInputShapeAtPort(REDUCE_INDEXES).getRank() != 1) {
-        IE_THROW() << errorPrefix << " gets incorrect index vector dimension! Index vector should be 1 dimension.";
+        THROW_CPU_NODE_ERR << " gets incorrect index vector dimension! Index vector should be 1 dimension.";
     }
 
     if (keep_dims) {
         if (getInputShapeAtPort(REDUCE_DATA).getRank() != getOutputShapeAtPort(0).getRank())
-            IE_THROW() << errorPrefix << " gets incorrect number of input/output dimensions!";
+            THROW_CPU_NODE_ERR << " gets incorrect number of input/output dimensions!";
     } else {
         // In fact, after the Reduce operation, the shape must be a scalar if the previous one was 1d.
         // But for now, 0d tensor (scalar) is emulated as 1d tensor. Skip checking in such cases.
         bool is_emulated_0d_as_1d = getInputShapeAtPort(REDUCE_DATA).getRank() == 1 && getOutputShapeAtPort(0).getRank() == 1;
         if (getInputShapeAtPort(REDUCE_DATA).getRank() <= getOutputShapeAtPort(0).getRank() && !is_emulated_0d_as_1d)
-            IE_THROW() << errorPrefix << "gets incorrect number of input/output dimensions!";
+            THROW_CPU_NODE_ERR << "gets incorrect number of input/output dimensions!";
     }
 }
 
 void Reduce::initSupportedPrimitiveDescriptors() {
-    if (!supportedPrimitiveDescriptors.empty())
-        return;
-
-    input_prec = getOriginalInputPrecisionAtPort(REDUCE_DATA);
+    const auto& input_prec_0 = getOriginalInputPrecisionAtPort(REDUCE_DATA);
+    auto input_prec_1 = getOriginalInputPrecisionAtPort(REDUCE_INDEXES);
     output_prec = getOriginalOutputPrecisionAtPort(0);
 
+    if (input_prec_1 == Precision::U64) {
+        input_prec_1 = Precision::I64;
+    } else if (!one_of(input_prec_1, Precision::I32, Precision::I64)) {
+        input_prec_1 = Precision::I32;
+    }
+
     if (!fusedWith.empty()) {
         // In jit mode we use the output memory as an intermediate accumulator for certain reduce modes.
         // If the post ops node has a lower precision for such modes, working buffer with original precision is needed,
@@ -1876,27 +219,27 @@ void Reduce::initSupportedPrimitiveDescriptors() {
         output_prec = fused_prec;
     }
 
-    jit_mode = canApplyJIT(input_prec, output_prec);
+    jit_mode = canApplyJIT(input_prec_0, output_prec);
 
     if (jit_mode) {
         // Since in jit mode we use the output memory as an intermediate accumulator for certain reduce modes, we can't use BF16 output precision due to
         // the possible accuracy loss. Therefore, for such mods, we will change the output precision to FP32.
         if (Precision::BF16 == output_prec) {
-            if (!mayiuse(avx512_core)) {
-                    output_prec = Precision::FP32;
+            if (!x64::mayiuse(x64::avx512_core)) {
+                output_prec = Precision::FP32;
             } else if (algorithm != Algorithm::ReduceAnd && algorithm != Algorithm::ReduceOr &&
                        algorithm != Algorithm::ReduceMin && algorithm != Algorithm::ReduceMax) {
-                            output_prec = Precision::FP32;
+                output_prec = Precision::FP32;
             }
         }
     }
 
     intermediate_prec = fuse_low_precision ? Precision(Precision::FP32) : output_prec;
-    precision_change = input_prec != intermediate_prec;
+    precision_change = input_prec_0 != intermediate_prec;
     support_split = algorithm != Algorithm::ReduceL2 && algorithm != Algorithm::ReduceLogSumExp &&
                     algorithm != Algorithm::ReduceSumSquare;
 
-    src_data_size = input_prec.size();
+    src_data_size = input_prec_0.size();
     dst_data_size = output_prec.size();
     intermediate_data_size = intermediate_prec.size();
 
@@ -1912,11 +255,11 @@ void Reduce::initSupportedPrimitiveDescriptors() {
 
     auto& creatorsMap = BlockedDescCreator::getCommonCreators();
 
-    auto pushDesc = [&](LayoutType inFormat, LayoutType outFormat, InferenceEngine::Precision inPrecision,
-            InferenceEngine::Precision outPrecision, impl_desc_type impl_type, bool useAclExecutor = false) {
-        config.inConfs[REDUCE_DATA].setMemDesc(creatorsMap.at(inFormat)->createSharedDesc(inPrecision, getInputShapeAtPort(REDUCE_DATA)));
-        config.inConfs[REDUCE_INDEXES].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc(InferenceEngine::Precision::I32,
-                                                                                                 getInputShapeAtPort(REDUCE_INDEXES)));
+    auto pushDesc = [&](const LayoutType &inFormat, const LayoutType &outFormat, const Precision& inPrecision0, const Precision& inPrecision1,
+                        const Precision& outPrecision, const impl_desc_type &impl_type, bool useAclExecutor = false) {
+        config.inConfs[REDUCE_DATA].setMemDesc(creatorsMap.at(inFormat)->createSharedDesc(inPrecision0, getInputShapeAtPort(REDUCE_DATA)));
+        config.inConfs[REDUCE_INDEXES].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc(inPrecision1,
+                                                                                                     getInputShapeAtPort(REDUCE_INDEXES)));
         config.outConfs[0].setMemDesc(creatorsMap.at(outFormat)->createSharedDesc(outPrecision, getOutputShapeAtPort(0)));
 
         if (useAclExecutor) {
@@ -1957,35 +300,41 @@ void Reduce::initSupportedPrimitiveDescriptors() {
 
     if (jit_mode) {
         impl_desc_type impl_type = impl_desc_type::jit_sse42;
-        if (mayiuse(cpu::x64::avx512_core)) {
+        if (x64::mayiuse(x64::avx512_core)) {
             impl_type = impl_desc_type::jit_avx512;
-        } else if (mayiuse(cpu::x64::avx2)) {
+        } else if (x64::mayiuse(x64::avx2)) {
             impl_type = impl_desc_type::jit_avx2;
         }
 
-        pushDesc(LayoutType::ncsp, LayoutType::ncsp, input_prec, output_prec, impl_type);
+        pushDesc(LayoutType::ncsp, LayoutType::ncsp, input_prec_0, input_prec_1, output_prec, impl_type);
         if ((getInputShapeAtPort(REDUCE_DATA).getRank() == 4 || getInputShapeAtPort(REDUCE_DATA).getRank() == 5) &&
                 getInputShapeAtPort(REDUCE_DATA).getMinDims()[1] > 1) {
             if (keep_dims) {
-                if (mayiuse(cpu::x64::avx512_core)) {
-                    pushDesc(LayoutType::nspc, LayoutType::nspc, input_prec, output_prec, impl_type);
-                    pushDesc(LayoutType::nCsp16c, LayoutType::nCsp16c, input_prec, output_prec, impl_type);
-                } else if (mayiuse(cpu::x64::avx2) || mayiuse(cpu::x64::sse41)) {
-                    pushDesc(LayoutType::nspc, LayoutType::nspc, input_prec, output_prec, impl_type);
-                    pushDesc(LayoutType::nCsp8c, LayoutType::nCsp8c, input_prec, output_prec, impl_type);
+                pushDesc(LayoutType::nspc, LayoutType::nspc, input_prec_0, input_prec_1, output_prec, impl_type);
+                if (x64::mayiuse(x64::avx512_core)) {
+                    if (src_data_size <= 4) {
+                        pushDesc(LayoutType::nCsp16c, LayoutType::nCsp16c, input_prec_0, input_prec_1, output_prec, impl_type);
+                    } else if (src_data_size == 8) {
+                        pushDesc(LayoutType::nCsp8c, LayoutType::nCsp8c, input_prec_0, input_prec_1, output_prec, impl_type);
+                    }
+                } else if (src_data_size <= 4) {
+                    pushDesc(LayoutType::nCsp8c, LayoutType::nCsp8c, input_prec_0, input_prec_1, output_prec, impl_type);
                 }
             } else {
-                if (mayiuse(cpu::x64::avx512_core)) {
-                    pushDesc(LayoutType::nspc, LayoutType::ncsp, input_prec, output_prec, impl_type);
-                    pushDesc(LayoutType::nCsp16c, LayoutType::ncsp, input_prec, output_prec, impl_type);
-                } else if (mayiuse(cpu::x64::avx2) || mayiuse(cpu::x64::sse41)) {
-                    pushDesc(LayoutType::nspc, LayoutType::ncsp, input_prec, output_prec, impl_type);
-                    pushDesc(LayoutType::nCsp8c, LayoutType::ncsp, input_prec, output_prec, impl_type);
+                pushDesc(LayoutType::nspc, LayoutType::ncsp, input_prec_0, input_prec_1, output_prec, impl_type);
+                if (x64::mayiuse(x64::avx512_core)) {
+                    if (src_data_size <= 4) {
+                        pushDesc(LayoutType::nCsp16c, LayoutType::ncsp, input_prec_0, input_prec_1, output_prec, impl_type);
+                    } else if (src_data_size == 8) {
+                        pushDesc(LayoutType::nCsp8c, LayoutType::ncsp, input_prec_0, input_prec_1, output_prec, impl_type);
+                    }
+                } else if (src_data_size <= 4) {
+                    pushDesc(LayoutType::nCsp8c, LayoutType::ncsp, input_prec_0, input_prec_1, output_prec, impl_type);
                 }
             }
         }
     } else {
-        pushDesc(LayoutType::ncsp, LayoutType::ncsp, InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP32, impl_desc_type::ref);
+        pushDesc(LayoutType::ncsp, LayoutType::ncsp, Precision::FP32, Precision::I32, Precision::FP32, impl_desc_type::ref);
     }
 }
 
@@ -2010,7 +359,7 @@ void Reduce::prepareParams() {
     }
 
     src_dims = getParentEdgesAtPort(REDUCE_DATA)[0]->getMemory().getDesc().getShape().getDims();
-    std::vector<int> reduce_axes;
+    std::vector<int64_t> reduce_axes;
     if (jit_mode && jit_beyond_5D) {
         reduce_axes = update_src_dims();
     } else {
@@ -2028,21 +377,22 @@ void Reduce::prepareParams() {
     apply_post_kernel = true;
     apply_division = false;
 
-    auto builder = [&](const ReduceKey& key) -> std::shared_ptr<jit_uni_reduce_post_kernel> {
-        std::shared_ptr<jit_uni_reduce_post_kernel> post_kernel;
+    auto builder = [&](const ReduceKey& key) -> std::shared_ptr<JitReduceKernelBase<JitReducePostCallArgs>> {
+        std::shared_ptr<JitReduceKernelBase<JitReducePostCallArgs>> postKernel;
 #if defined(OPENVINO_ARCH_X86_64)
-        if (mayiuse(cpu::x64::avx512_core)) {
-            post_kernel.reset(new jit_uni_reduce_post_kernel_f32<cpu::x64::avx512_core>(key.jcp, *attr.get()));
-        } else if (mayiuse(cpu::x64::avx2)) {
-            post_kernel.reset(new jit_uni_reduce_post_kernel_f32<cpu::x64::avx2>(key.jcp, *attr.get()));
-        } else if (mayiuse(cpu::x64::sse41)) {
-            post_kernel.reset(new jit_uni_reduce_post_kernel_f32<cpu::x64::sse41>(key.jcp, *attr.get()));
+        if (x64::mayiuse(x64::avx512_core)) {
+            postKernel.reset(new JitReducePostKernel<x64::avx512_core>(key.jcp, *attr.get()));
+        } else if (x64::mayiuse(x64::avx2)) {
+            postKernel.reset(new JitReducePostKernel<x64::avx2>(key.jcp, *attr.get()));
+        } else if (x64::mayiuse(x64::sse41)) {
+            postKernel.reset(new JitReducePostKernel<x64::sse41>(key.jcp, *attr.get()));
         }
 #endif // OPENVINO_ARCH_X86_64
-        if (post_kernel)
-            post_kernel->create_ker();
+        if (postKernel) {
+            postKernel->create_kernel();
+        }
 
-        return post_kernel;
+        return postKernel;
     };
 
     if (compile_post_kernel) {
@@ -2052,12 +402,26 @@ void Reduce::prepareParams() {
         auto cache = context->getParamsCache();
         auto result = cache->getOrCreate(key, builder);
         if (!result.first) {
-            IE_THROW() << errorPrefix << " has not found jit_uni_reduce_post_kernel_f32.";
+            THROW_CPU_NODE_ERR << " has not found JitReducePostKernel.";
         }
 
         reduce_post_kernel = result.first;
         jit_mode = jit_mode && reduce_post_kernel;
 
+        if (jit_mode) {
+            size_t divisor = IB * IC * ID * IH * IW / (OB * OC * OD * OH * OW);
+            if (divisor == 0lu) {
+                divisor = 1lu;
+            }
+            if (reduce_post_kernel->get_exec_prc().size() == 4) {
+                in_out_divisor_f32 = static_cast<float>(divisor);
+                in_out_divisor = &in_out_divisor_f32;
+            } else if (reduce_post_kernel->get_exec_prc().size() == 8) {
+                in_out_divisor_f64 = static_cast<double>(divisor);
+                in_out_divisor = &in_out_divisor_f64;
+            }
+        }
+
         if (!isDynamicNode()) {
             compile_post_kernel = false;
         }
@@ -2071,11 +435,11 @@ void Reduce::createPrimitive() {
     auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
     auto srcMemPtr = getParentEdgeAt(REDUCE_DATA)->getMemoryPtr();
     if (!dstMemPtr || !dstMemPtr->isAllocated())
-        IE_THROW() << errorPrefix << " has not allocated destination memory.";
+        THROW_CPU_NODE_ERR << " has not allocated destination memory.";
     if (!srcMemPtr || !srcMemPtr->isAllocated())
-        IE_THROW() << errorPrefix << " has not allocate input memory.";
+        THROW_CPU_NODE_ERR << " has not allocate input memory.";
     if (getSelectedPrimitiveDescriptor() == nullptr)
-        IE_THROW() << errorPrefix << " has nullable preferable primitive descriptor";
+        THROW_CPU_NODE_ERR << " has nullable preferable primitive descriptor";
 
     if (srcMemPtr->getDesc().hasLayoutType(LayoutType::ncsp)) {
         layout = ReduceLayoutType::reduce_ncsp;
@@ -2086,17 +450,15 @@ void Reduce::createPrimitive() {
     }
 
     // hybrid layout: nspc/blocked layout for input and ncsp for output
-    // !keep_dims is needed to avoid hybrid layout for cases eg. (A, B, C, D) reduce to (A, 1, 1, 1)
+    // !keepDims is needed to avoid hybrid layout for cases eg. (A, B, C, D) reduce to (A, 1, 1, 1)
     if (!keep_dims && (layout == ReduceLayoutType::reduce_nspc || layout == ReduceLayoutType::reduce_blocked)) {
         is_hybrid_layout = dstMemPtr->getDesc().hasLayoutType(LayoutType::ncsp);
     }
 
     auto selectedPD = getSelectedPrimitiveDescriptor();
-    jcp = jit_reduce_config_params();
-    jcp.src_dt = DnnlExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[REDUCE_DATA].getMemDesc()->getPrecision());
-    jcp.dst_dt = DnnlExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].getMemDesc()->getPrecision());
-    jcp.src_data_size = DnnlExtensionUtils::sizeOfDataType(jcp.src_dt);
-    jcp.dst_data_size = DnnlExtensionUtils::sizeOfDataType(jcp.dst_dt);
+    jcp = JitReduceConfigParams();
+    jcp.src_el_type = details::convertPrecision(selectedPD->getConfig().inConfs[REDUCE_DATA].getMemDesc()->getPrecision());
+    jcp.dst_el_type = details::convertPrecision(selectedPD->getConfig().outConfs[0].getMemDesc()->getPrecision());
     jcp.layout = layout;
     jcp.reduce_mode = getAlgorithm();
     jcp.fuse_low_precision = fuse_low_precision;
@@ -2107,10 +469,11 @@ void Reduce::createPrimitive() {
     compile_post_kernel = false;
 #endif // OPENVINO_ARCH_X86_64
 
-    if (mayiuse(cpu::x64::avx512_core)) {
-        blk_size = 16;
+    size_t prcDiv = jcp.src_el_type.size() < 4 ? 4 : jcp.src_el_type.size();
+    if (x64::mayiuse(x64::avx512_core)) {
+        blk_size = 64 / prcDiv;
     } else {
-        blk_size = 8;
+        blk_size = 32 / prcDiv;
     }
 
     if (inputShapesDefined()) {
@@ -2120,8 +483,7 @@ void Reduce::createPrimitive() {
     }
 
     auto reduce_jcp = jcp;
-    reduce_jcp.dst_dt = fuse_low_precision ? DnnlExtensionUtils::IEPrecisionToDataType(intermediate_prec) : jcp.dst_dt;
-    jcp.dst_data_size = DnnlExtensionUtils::sizeOfDataType(reduce_jcp.dst_dt);
+    reduce_jcp.dst_el_type = fuse_low_precision ? details::convertPrecision(intermediate_prec) : jcp.dst_el_type;
     create_reduce_kernel(reduce_kernel, reduce_jcp);
 
     // set_use_aux_kernel being false means this is a dynamic case, and prepareParams() hasn't been invoked yet.
@@ -2139,31 +501,27 @@ void Reduce::createPrimitive() {
     // stage to reduce the rest dimensions.
     if (use_aux_kernel) {
         aux_jcp = reduce_jcp;
-        aux_jcp.src_dt = reduce_jcp.dst_dt;
-        aux_jcp.src_data_size = reduce_jcp.dst_data_size;
+        aux_jcp.src_el_type = reduce_jcp.dst_el_type;
         create_reduce_kernel(reduce_aux_kernel, aux_jcp);
     }
 }
 
-void Reduce::create_reduce_kernel(std::shared_ptr<jit_uni_reduce_kernel> &kernel, const jit_reduce_config_params &jcp) {
+void Reduce::create_reduce_kernel(std::shared_ptr<JitReduceKernelBase<kernel::JitReduceCallArgs>> &kernel, const JitReduceConfigParams &jcp) {
 #if defined(OPENVINO_ARCH_X86_64)
-    if (mayiuse(cpu::x64::avx512_core)) {
-        kernel.reset(new jit_uni_reduce_kernel_f32<cpu::x64::avx512_core>(jcp));
-    } else if (mayiuse(cpu::x64::avx2)) {
-        kernel.reset(new jit_uni_reduce_kernel_f32<cpu::x64::avx2>(jcp));
-    } else if (mayiuse(cpu::x64::sse41)) {
-        kernel.reset(new jit_uni_reduce_kernel_f32<cpu::x64::sse41>(jcp));
+    if (x64::mayiuse(x64::avx512_core)) {
+        kernel.reset(new JitReduceKernel<x64::avx512_core>(jcp));
+    } else if (x64::mayiuse(x64::avx2)) {
+        kernel.reset(new JitReduceKernel<x64::avx2>(jcp));
+    } else if (x64::mayiuse(x64::sse41)) {
+        kernel.reset(new JitReduceKernel<x64::sse41>(jcp));
     }
 #endif // OPENVINO_ARCH_X86_64
-    if (kernel)
-        kernel->create_ker();
+    if (kernel) {
+        kernel->create_kernel();
+    }
     jit_mode = jit_mode && kernel;
 }
 
-void Reduce::executeDynamicImpl(dnnl::stream strm) {
-    execute(strm);
-}
-
 void Reduce::execute(dnnl::stream strm) {
     auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
     auto srcMemPtr = getParentEdgeAt(REDUCE_DATA)->getMemoryPtr();
@@ -2191,11 +549,15 @@ void Reduce::execute(dnnl::stream strm) {
             auto out_ptr = reinterpret_cast<float *>(dst_data);
             reduce_ref(in_ptr, out_ptr);
         } else {
-            IE_THROW() << errorPrefix << " supports only plain layout on machine w/o sse42.";
+            THROW_CPU_NODE_ERR << " supports only plain layout on machine w/o sse42.";
         }
     }
 }
 
+void Reduce::executeDynamicImpl(dnnl::stream strm) {
+    execute(strm);
+}
+
 void Reduce::reduce_type(const uint8_t *in_ptr, uint8_t *out_ptr) {
     reduce_stride = IW;
 
@@ -2214,9 +576,19 @@ void Reduce::reduce_type(const uint8_t *in_ptr, uint8_t *out_ptr) {
         auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
         out_ptr = reinterpret_cast<uint8_t *>(dstMemPtr->getData());
         if (layout == ReduceLayoutType::reduce_nspc) {
-            nspc2ncsp(proc_ptr, out_ptr);
+            switch (dst_data_size) {
+                case 1: nspc2ncsp<uint8_t>(proc_ptr, out_ptr); break;
+                case 2: nspc2ncsp<uint16_t>(proc_ptr, out_ptr); break;
+                case 4: nspc2ncsp<uint32_t>(proc_ptr, out_ptr); break;
+                case 8: nspc2ncsp<uint64_t>(proc_ptr, out_ptr); break;
+            }
         } else {
-            blocked2ncsp(proc_ptr, out_ptr);
+            switch (dst_data_size) {
+                case 1: blocked2ncsp<uint8_t>(proc_ptr, out_ptr); break;
+                case 2: blocked2ncsp<uint16_t>(proc_ptr, out_ptr); break;
+                case 4: blocked2ncsp<uint32_t>(proc_ptr, out_ptr); break;
+                case 8: blocked2ncsp<uint64_t>(proc_ptr, out_ptr); break;
+            }
         }
     }
 }
@@ -2242,20 +614,20 @@ void Reduce::reduce_PLN(const uint8_t *in_ptr, uint8_t *out_ptr) {
             size_t ob = ReduceN ? 0 : ib; GET_PTR_N_PLN;
             if (!ReduceC && !ReduceD && ReduceW) {
                 size_t work_amount = ReduceH ? IH * IW : IW;
-                if (work_amount < blk_size && mayiuse(cpu::x64::avx2)) {
+                if (work_amount < blk_size && x64::mayiuse(x64::avx2)) {
                     size_t outer_size = ReduceH ? IC * ID : IC * ID * IH;
                     size_t inner_size = ReduceH ? IH * IW : IW;
                     size_t output_inner_size = ReduceH ? OH * OW : OW;
                     size_t IK = outer_size / blk_size;
-                    std::vector<int> index_buf(blk_size);
+                    std::vector<int> indicesBuf(16, work_amount * src_data_size);
                     for (size_t i = 0; i < blk_size; i++) {
-                        index_buf[i] = i * work_amount * src_data_size;
+                        indicesBuf[i] *= i;
                     }
                     parallel_for(IK, [&](size_t ik) {
                         size_t ok = ik;
                         reduce_kernel_process(in_ptr_n + ik * blk_size * inner_size * src_data_size,
                                               out_ptr_n + ok * blk_size * output_inner_size * dst_data_size,
-                                              work_amount, 1, 0, static_cast<int *>(&index_buf[0]));
+                                              work_amount, 1, 0, static_cast<int *>(&indicesBuf[0]));
                     });
                     size_t tail_start = IK * blk_size;
                     size_t IT = outer_size - tail_start;
@@ -2334,15 +706,18 @@ void Reduce::reduce_PLN(const uint8_t *in_ptr, uint8_t *out_ptr) {
                 }
             } else if (!ReduceC && !ReduceD && ReduceH && !ReduceW) {
                 parallel_for2d(IC, ID, [&](size_t ic, size_t id) {
-                    size_t oc = ic, od = id; GET_PTR_NCD_BASE_PTR_N_PLN;
-                    parallel_for(IW / blk_size, [&](size_t ibw){
+                    size_t oc = ic, od = id;
+                    GET_PTR_NCD_BASE_PTR_N_PLN;
+                    parallel_for(IW / blk_size, [&](size_t ibw) {
                         size_t obw = ibw;
-                        reduce_kernel_process(in_ptr_ncd + ibw * blk_size * src_data_size, out_ptr_ncd + obw * blk_size * dst_data_size,
-                                              blk_size, 0, IH);
+                        reduce_kernel_process(in_ptr_ncd + ibw * blk_size * src_data_size,
+                                            out_ptr_ncd + obw * blk_size * dst_data_size,
+                                            blk_size, 0, IH);
                     });
                     size_t tail_start = IW / blk_size * blk_size;
-                    reduce_kernel_process(in_ptr_ncd + tail_start * src_data_size, out_ptr_ncd + tail_start * dst_data_size,
-                                          IW - tail_start, 0, IH);
+                    reduce_kernel_process(in_ptr_ncd + tail_start * src_data_size,
+                                        out_ptr_ncd + tail_start * dst_data_size,
+                                        IW - tail_start, 0, IH);
                 });
             } else if (!ReduceC && ReduceD && ReduceH && !ReduceW) {
                 size_t IWB = IW / blk_size;
@@ -2352,10 +727,10 @@ void Reduce::reduce_PLN(const uint8_t *in_ptr, uint8_t *out_ptr) {
                         // step1: !ReduceD && ReduceH && !ReduceW
                         uint8_t *prc_ptr_n = &vec_reduceDH_prc[0];
                         init_dst_data(prc_ptr_n, prc_size);
-                        parallel_for2d(ID, IWB, [&](size_t id, size_t iwb){
+                        parallel_for2d(ID, IWB, [&](size_t id, size_t iwb) {
                             size_t pd = id, pwb = iwb;
                             reduce_kernel_process(in_ptr_n + (id * IH * IW + iwb * blk_size) * src_data_size,
-                                                prc_ptr_n + (pd * PW + pwb * blk_size) * prc_data_size, blk_size, 0, IH);
+                                                  prc_ptr_n + (pd * PW + pwb * blk_size) * prc_data_size, blk_size, 0, IH);
                         });
                         // step2: ReduceD
                         reduce_stride = PW;
@@ -2372,7 +747,7 @@ void Reduce::reduce_PLN(const uint8_t *in_ptr, uint8_t *out_ptr) {
                     size_t tail_start = IWB * blk_size;
                     parallel_for(IW - tail_start, [&](size_t i_tail) {
                         reduce_kernel_process(in_ptr_n + (tail_start + i_tail) * src_data_size, out_ptr_n + (tail_start + i_tail) * dst_data_size,
-                                            1, 0, ID * IH);
+                                              1, 0, ID * IH);
                     });
                 } else {
                     parallel_for(IC, [&](size_t ic) {
@@ -2450,7 +825,8 @@ void Reduce::reduce_BLK(const uint8_t *in_ptr, uint8_t *out_ptr) {
                 apply_post_kernel = !apply_division;
             }
             parallel_for2d(ICB, ID, [&](size_t icb, size_t id) {
-                size_t ocb = icb, od = id; GET_PTR_NCD_BASE_PTR_N_BLK;
+                size_t ocb = icb, od = id;
+                GET_PTR_NCD_BASE_PTR_N_BLK;
                 reduce_kernel_process(in_ptr_ncd, out_ptr_ncd, IH * IW * blk_size);
             });
         } else if (ReduceC && ReduceD && ReduceH && ReduceW) {
@@ -2604,9 +980,9 @@ void Reduce::reduce_BLK_concern_padding(const uint8_t *in_ptr, uint8_t *out_ptr)
 }
 
 inline void Reduce::reduce_kernel_process(const uint8_t *in_p, uint8_t *out_p, size_t work_amount,
-                                                    size_t reduce_w, size_t work_batch, const int *tab_idx) {
-    const float divisor = apply_division ? static_cast<float>(IB * IC * ID * IH * IW / (OB * OC * OD * OH * OW)) : 1;
-    auto arg = jit_reduce_call_args();
+                                          size_t reduce_w, size_t work_batch, const int *tab_idx) {
+    auto arg = JitReduceCallArgs();
+
     arg.src = static_cast<const void *>(in_p);
     arg.idx = tab_idx;
     arg.dst = static_cast<void *>(out_p);
@@ -2614,28 +990,28 @@ inline void Reduce::reduce_kernel_process(const uint8_t *in_p, uint8_t *out_p, s
     arg.work_batch = work_batch;
     arg.reduce_w = reduce_w;
     arg.reduce_stride = reduce_stride;
-    arg.can_divide = apply_division ? 1 : 0;
-    arg.divisor = &divisor;
+    arg.can_divide = apply_division ? 1lu : 0lu;
+    arg.divisor = in_out_divisor;
 
     (*reduce_kernel)(&arg);
 }
 
 inline void Reduce::reduce_kernel_post_process(uint8_t *out_ptr) {
     const uint8_t *in_ptr = fuse_low_precision ? static_cast<uint8_t *>(&intermediate_buf[0]) : nullptr;
-    const size_t integerDivisor = IB * IC * ID * IH * IW / (OB * OC * OD * OH * OW);
-    const float divisor = static_cast<float>(integerDivisor);
     if (layout == ReduceLayoutType::reduce_ncsp) {
+        const auto work_amount = OD * OH * OW;
         parallel_for2d(OB, OC, [&](size_t ob, size_t oc) {
             const uint8_t *in_p = in_ptr + (ob * OC + oc) * OD * OH * OW * intermediate_data_size;
-            uint8_t *out_p = out_ptr + (ob * OC + oc) * OD * OH * OW * dst_data_size;
-            auto arg = jit_reduce_post_call_args();
+            uint8_t *out_p = out_ptr + (ob * OC + oc) * work_amount * dst_data_size;
+            auto arg = JitReducePostCallArgs();
             arg.src = static_cast<const void *>(in_p);
             arg.dst = static_cast<void *>(out_p);
             arg.oc_off = oc * sizeof(float);
             arg.channel_size = OC;
-            arg.work_amount = OD * OH * OW;
-            arg.divisor = &divisor;
+            arg.work_amount = work_amount;
+            arg.divisor = in_out_divisor;
             arg.post_op_data = static_cast<const void **>(postOpsDataPtrs.data());
+
             (*reduce_post_kernel)(&arg);
         });
     } else if (layout == ReduceLayoutType::reduce_nspc) {
@@ -2643,33 +1019,38 @@ inline void Reduce::reduce_kernel_post_process(uint8_t *out_ptr) {
         size_t OP = OB * OC >= num_threads ? OB * OC : OB * OC * OD;
         if (OP < num_threads && OW > blk_size)
             OP *= OH;
-        size_t work_amount = OB * OC * OD * OH * OW / OP;
+        const auto work_amount = OB * OC * OD * OH * OW / OP;
         parallel_for(OP, [&](size_t op) {
             const uint8_t *in_p = in_ptr + op * work_amount * intermediate_data_size;
             uint8_t *out_p = out_ptr + op * work_amount * dst_data_size;
-            auto arg = jit_reduce_post_call_args();
+            auto arg = JitReducePostCallArgs();
+
             arg.src = static_cast<const void *>(in_p);
             arg.dst = static_cast<void *>(out_p);
             arg.oc_off = 0;
             arg.channel_size = OW; // OW is related to nspc-ncsp dimension reinterpret
             arg.work_amount = work_amount;
-            arg.divisor = &divisor;
+            arg.divisor = in_out_divisor;
             arg.post_op_data = static_cast<const void **>(postOpsDataPtrs.data());
+
             (*reduce_post_kernel)(&arg);
         });
     } else {
         size_t OCB = div_up(OC, blk_size);
+        const auto work_amount = OD * OH * OW * blk_size;
         parallel_for2d(OB, OCB, [&](size_t ob, size_t ocb) {
             const uint8_t *in_p = in_ptr + (ob * OCB + ocb) * OD * OH * OW * blk_size * intermediate_data_size;
-            uint8_t *out_p = out_ptr + (ob * OCB + ocb) * OD * OH * OW * blk_size * dst_data_size;
-            auto arg = jit_reduce_post_call_args();
+            uint8_t *out_p = out_ptr + (ob * OCB + ocb) * work_amount * dst_data_size;
+            auto arg = JitReducePostCallArgs();
+
             arg.src = static_cast<const void *>(in_p);
             arg.dst = static_cast<void *>(out_p);
             arg.reduce_c = ReduceC ? 1 : 0;
             arg.oc_off = ocb * blk_size * sizeof(float);
-            arg.work_amount = OD * OH * OW * blk_size;
-            arg.divisor = &divisor;
+            arg.work_amount = work_amount;
+            arg.divisor = in_out_divisor;
             arg.post_op_data = static_cast<const void **>(postOpsDataPtrs.data());
+
             (*reduce_post_kernel)(&arg);
         });
     }
@@ -2708,6 +1089,7 @@ inline void Reduce::output_info_restore(uint8_t **out_ptr) {
     }
 }
 
+template<typename T>
 void Reduce::nspc2ncsp(uint8_t *proc_ptr, uint8_t *out_ptr) {
     // dimension reinterpret after nspc reusing routine reduce_PLN
     // demote -- nspc -- ncsp
@@ -2724,45 +1106,20 @@ void Reduce::nspc2ncsp(uint8_t *proc_ptr, uint8_t *out_ptr) {
     const size_t stride1 = DIM2 * DIM3 * DIM4;
     const size_t stride0 = stride1 * DIM1;
 
-    if (dst_data_size == 4) {
-        auto src_data = reinterpret_cast<const float *>(proc_ptr);
-        auto dst_data = reinterpret_cast<float *>(out_ptr);
-        parallel_for2d(DIM0, stride1, [&](size_t b, size_t j) {
-            auto src_off = b * stride0 + j * DIM1;
-            auto dst_off = b * stride0 + j;
-            for (size_t dim1 = 0; dim1 < DIM1; dim1++) {
-                dst_data[dst_off] = src_data[src_off];
-                src_off++;
-                dst_off += stride1;
-            }
-        });
-    } else if (dst_data_size == 2) {
-        auto src_data = reinterpret_cast<const uint16_t *>(proc_ptr);
-        auto dst_data = reinterpret_cast<uint16_t *>(out_ptr);
-        parallel_for2d(DIM0, stride1, [&](size_t b, size_t j) {
-            auto src_off = b * stride0 + j * DIM1;
-            auto dst_off = b * stride0 + j;
-            for (size_t dim1 = 0; dim1 < DIM1; dim1++) {
-                dst_data[dst_off] = src_data[src_off];
-                src_off++;
-                dst_off += stride1;
-            }
-        });
-    } else {
-        auto src_data = reinterpret_cast<const uint8_t *>(proc_ptr);
-        auto dst_data = reinterpret_cast<uint8_t *>(out_ptr);
-        parallel_for2d(DIM0, stride1, [&](size_t b, size_t j) {
-            auto src_off = b * stride0 + j * DIM1;
-            auto dst_off = b * stride0 + j;
-            for (size_t dim1 = 0; dim1 < DIM1; dim1++) {
-                dst_data[dst_off] = src_data[src_off];
-                src_off++;
-                dst_off += stride1;
-            }
-        });
-    }
+    auto src_data = reinterpret_cast<const T *>(proc_ptr);
+    auto dst_data = reinterpret_cast<T *>(out_ptr);
+    parallel_for2d(DIM0, stride1, [&](size_t b, size_t j) {
+        auto src_off = b * stride0 + j * DIM1;
+        auto dst_off = b * stride0 + j;
+        for (size_t dim1 = 0; dim1 < DIM1; dim1++) {
+            dst_data[dst_off] = src_data[src_off];
+            src_off++;
+            dst_off += stride1;
+        }
+    });
 }
 
+template<typename T>
 void Reduce::blocked2ncsp(uint8_t *proc_ptr, uint8_t *out_ptr) {
     const size_t DIM0 = OB;
     const size_t DIM1 = OC;
@@ -2773,70 +1130,26 @@ void Reduce::blocked2ncsp(uint8_t *proc_ptr, uint8_t *out_ptr) {
     const size_t src_stride0 = stride1 * div_up(OC, blk_size) * blk_size;
     const size_t dst_stride0 = stride1 * DIM1;
 
-    if (dst_data_size == 4) {
-        auto src_data = reinterpret_cast<const float *>(proc_ptr);
-        auto dst_data = reinterpret_cast<float *>(out_ptr);
-        parallel_for2d(DIM0, stride1, [&](size_t b, size_t j) {
-            auto src_off = b * src_stride0 + j * blk_size;
-            auto dst_off = b * dst_stride0 + j;
-            for (size_t dim1 = 0; dim1 + blk_size <= DIM1; dim1 += blk_size) {
-                for (size_t k = 0; k < blk_size; k++) {
-                    dst_data[dst_off] = src_data[src_off];
-                    src_off++;
-                    dst_off += stride1;
-                }
-                src_off += (stride1 - 1) * blk_size;
-            }
-            size_t tail = DIM1 % blk_size;
-            for (size_t k = 0; k < tail; k++) {
-                dst_data[dst_off] = src_data[src_off];
-                src_off++;
-                dst_off += stride1;
-            }
-        });
-    } else if (dst_data_size == 2) {
-        auto src_data = reinterpret_cast<const uint16_t *>(proc_ptr);
-        auto dst_data = reinterpret_cast<uint16_t *>(out_ptr);
-        parallel_for2d(DIM0, stride1, [&](size_t b, size_t j) {
-            auto src_off = b * src_stride0 + j * blk_size;
-            auto dst_off = b * dst_stride0 + j;
-            for (size_t dim1 = 0; dim1 + blk_size <= DIM1; dim1 += blk_size) {
-                for (size_t k = 0; k < blk_size; k++) {
-                    dst_data[dst_off] = src_data[src_off];
-                    src_off++;
-                    dst_off += stride1;
-                }
-                src_off += (stride1 - 1) * blk_size;
-            }
-            size_t tail = DIM1 % blk_size;
-            for (size_t k = 0; k < tail; k++) {
-                dst_data[dst_off] = src_data[src_off];
-                src_off++;
-                dst_off += stride1;
-            }
-        });
-    } else {
-        auto src_data = reinterpret_cast<const uint8_t *>(proc_ptr);
-        auto dst_data = reinterpret_cast<uint8_t *>(out_ptr);
-        parallel_for2d(DIM0, stride1, [&](size_t b, size_t j) {
-            auto src_off = b * src_stride0 + j * blk_size;
-            auto dst_off = b * dst_stride0 + j;
-            for (size_t dim1 = 0; dim1 + blk_size <= DIM1; dim1 += blk_size) {
-                for (size_t k = 0; k < blk_size; k++) {
-                    dst_data[dst_off] = src_data[src_off];
-                    src_off++;
-                    dst_off += stride1;
-                }
-                src_off += (stride1 - 1) * blk_size;
-            }
-            size_t tail = DIM1 % blk_size;
-            for (size_t k = 0; k < tail; k++) {
+    auto src_data = reinterpret_cast<const T *>(proc_ptr);
+    auto dst_data = reinterpret_cast<T *>(out_ptr);
+    parallel_for2d(DIM0, stride1, [&](size_t b, size_t j) {
+        auto src_off = b * src_stride0 + j * blk_size;
+        auto dst_off = b * dst_stride0 + j;
+        for (size_t dim1 = 0; dim1 + blk_size <= DIM1; dim1 += blk_size) {
+            for (size_t k = 0; k < blk_size; k++) {
                 dst_data[dst_off] = src_data[src_off];
                 src_off++;
                 dst_off += stride1;
             }
-        });
-    }
+            src_off += (stride1 - 1) * blk_size;
+        }
+        size_t tail = DIM1 % blk_size;
+        for (size_t k = 0; k < tail; k++) {
+            dst_data[dst_off] = src_data[src_off];
+            src_off++;
+            dst_off += stride1;
+        }
+    });
 }
 
 inline void Reduce::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
@@ -2853,7 +1166,13 @@ inline void Reduce::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
             break;
         case Algorithm::ReduceAnd:
         case Algorithm::ReduceProd:
-            if (output_prec == Precision::FP32) {
+            if (output_prec == Precision::FP64) {
+                auto out_p = reinterpret_cast<double *>(out_ptr);
+                parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = static_cast<int64_t>(1); });
+            } else if (output_prec == Precision::I64) {
+                auto out_p = reinterpret_cast<int64_t *>(out_ptr);
+                parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = static_cast<int64_t>(1); });
+            } else if (output_prec == Precision::FP32) {
                 auto out_p = reinterpret_cast<float *>(out_ptr);
                 parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = static_cast<float>(1); });
             } else if (output_prec == Precision::I32) {
@@ -2871,7 +1190,13 @@ inline void Reduce::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
             }
             break;
         case Algorithm::ReduceMax:
-            if (output_prec == Precision::FP32) {
+            if (output_prec == Precision::FP64) {
+                auto out_p = reinterpret_cast<double *>(out_ptr);
+                parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<double>::lowest(); });
+            } else if (output_prec == Precision::I64) {
+                auto out_p = reinterpret_cast<int64_t *>(out_ptr);
+                parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<int64_t>::min(); });
+            } else if (output_prec == Precision::FP32) {
                 auto out_p = reinterpret_cast<float *>(out_ptr);
                 parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<float>::lowest(); });
             } else if (output_prec == Precision::I32) {
@@ -2889,7 +1214,13 @@ inline void Reduce::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
             }
             break;
         case Algorithm::ReduceMin:
-            if (output_prec == Precision::FP32) {
+            if (output_prec == Precision::FP64) {
+                auto out_p = reinterpret_cast<double *>(out_ptr);
+                parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<double>::max(); });
+            } else if (output_prec == Precision::I64) {
+                auto out_p = reinterpret_cast<int64_t *>(out_ptr);
+                parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<int64_t>::max(); });
+            } else if (output_prec == Precision::FP32) {
                 auto out_p = reinterpret_cast<float *>(out_ptr);
                 parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<float>::max(); });
             } else if (output_prec == Precision::I32) {
@@ -2907,15 +1238,16 @@ inline void Reduce::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
             }
             break;
         default:
-            IE_THROW() << errorPrefix << " gets unsupported reduce mode.";
+            THROW_CPU_NODE_ERR << " gets unsupported reduce mode.";
     }
 }
 
 inline void Reduce::create_hybrid_working_memory() {
     auto rank = getInputShapeAtPort(REDUCE_DATA).getRank();
-    memory::format_tag format = (layout == ReduceLayoutType::reduce_nspc) ? (rank == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc)
-                                        : (rank == 4 ? (mayiuse(cpu::x64::avx512_core) ? memory::format_tag::nChw16c : memory::format_tag::nChw8c)
-                                                     : (mayiuse(cpu::x64::avx512_core) ? memory::format_tag::nCdhw16c : memory::format_tag::nCdhw8c));
+    dnnl::memory::format_tag format =
+            (layout == ReduceLayoutType::reduce_nspc) ? (rank == 4 ? dnnl::memory::format_tag::nhwc : dnnl::memory::format_tag::ndhwc)
+                    : (rank == 4 ? (x64::mayiuse(x64::avx512_core) ? dnnl::memory::format_tag::nChw16c : dnnl::memory::format_tag::nChw8c)
+                                 : (x64::mayiuse(x64::avx512_core) ? dnnl::memory::format_tag::nCdhw16c : dnnl::memory::format_tag::nCdhw8c));
     auto prc_dims = rank == 4 ? std::vector<size_t>{OB, OC, OH, OW} : std::vector<size_t>{OB, OC, OD, OH, OW};
     auto desc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(prc_dims), DnnlExtensionUtils::IEPrecisionToDataType(output_prec), format);
     prc_mem = dnnl::memory(desc, getEngine());
@@ -2956,7 +1288,7 @@ inline void Reduce::create_opt_working_memory() {
     }
 }
 
-inline void Reduce::calc_process_dst_dims(std::vector<int> &reduce_axes, const SizeVector &dst_dims) {
+inline void Reduce::calc_process_dst_dims(std::vector<int64_t> &reduce_axes, const SizeVector &dst_dims) {
     std::set<size_t> axes;
     SizeVector out_dims;
     process_dst_dims.clear();
@@ -2965,7 +1297,7 @@ inline void Reduce::calc_process_dst_dims(std::vector<int> &reduce_axes, const S
         if (axis < 0)
             axis += src_dims.size();
         if (static_cast<size_t>(axis) > src_dims.size())
-            IE_THROW() << errorPrefix << " exceeds data tensor dimension on index to reduce";
+            THROW_CPU_NODE_ERR << " exceeds data tensor dimension on index to reduce";
         axes.insert(static_cast<size_t>(axis));
     }
     for (size_t i = 0; i < src_dims.size(); i++) {
@@ -2988,11 +1320,11 @@ inline void Reduce::calc_process_dst_dims(std::vector<int> &reduce_axes, const S
     if (jit_mode && jit_beyond_5D) {
         if (std::accumulate(out_dims.begin(), out_dims.end(), size_t(1), std::multiplies<size_t>()) !=
             std::accumulate(dst_dims.begin(), dst_dims.end(), size_t(1), std::multiplies<size_t>()))
-            IE_THROW() << errorPrefix << "gets incorrect number of output dimensions!";
+            THROW_CPU_NODE_ERR << "gets incorrect number of output dimensions!";
     } else {
         for (size_t i = 0; i < std::min(out_dims.size(), dst_dims.size()); i++) {
             if (out_dims[i] != dst_dims[i])
-                IE_THROW() << errorPrefix << "gets incorrect number of output dimensions!";
+                THROW_CPU_NODE_ERR << "gets incorrect number of output dimensions!";
         }
     }
 }
@@ -3098,8 +1430,8 @@ inline void Reduce::reduce_ref(const float *in_ptr, float *out_ptr) {
         case Algorithm::ReduceSumSquare:
             reduce_ref_process(in_ptr, out_ptr, 0, [](float old, float y)->float { return old + y * y; });
             break;
-    default:
-        IE_THROW() << errorPrefix << "gets unsupported reduce mode.";
+        default:
+            THROW_CPU_NODE_ERR << "gets unsupported reduce mode.";
     }
 }
 
@@ -3186,7 +1518,7 @@ inline void Reduce::reduce_ref_map(float *out_ptr, size_t work_amount_dst, size_
             });
             break;
         default:
-            IE_THROW() << errorPrefix << "gets unsupported reduce mode.";
+            THROW_CPU_NODE_ERR << "gets unsupported reduce mode.";
     }
 }
 
@@ -3233,8 +1565,8 @@ void Reduce::setJITBeyond5D() {
     }
 }
 
-std::vector<int> Reduce::update_src_dims() {
-    std::vector<int> reduce_axes = raw_axes;
+std::vector<int64_t> Reduce::update_src_dims() {
+    std::vector<int64_t> reduce_axes = raw_axes;
 
     if (reduce_axes.size() < 1)
         return reduce_axes;
@@ -3267,6 +1599,7 @@ std::vector<int> Reduce::update_src_dims() {
 
 bool Reduce::canApplyJIT(const Precision &input_prec, const Precision &output_prec) const {
     static const Precision supportedPrecisions[] = {
+            Precision::I64,
             Precision::FP32,
             Precision::BF16,
             Precision::I32,
@@ -3274,7 +1607,7 @@ bool Reduce::canApplyJIT(const Precision &input_prec, const Precision &output_pr
             Precision::U8
     };
 
-    return (mayiuse(cpu::x64::sse41)) && (getInputShapeAtPort(REDUCE_DATA).getRank() <= 5 || jit_beyond_5D) &&
+    return (x64::mayiuse(x64::sse41)) && (getInputShapeAtPort(REDUCE_DATA).getRank() <= 5 || jit_beyond_5D) &&
            std::find(std::begin(supportedPrecisions), std::end(supportedPrecisions), input_prec) != std::end(supportedPrecisions) &&
            std::find(std::begin(supportedPrecisions), std::end(supportedPrecisions), output_prec) != std::end(supportedPrecisions);
 }
@@ -3297,19 +1630,19 @@ int Reduce::getFusingAxis() const {
 }
 
 bool Reduce::canFuse(const NodePtr& node) const {
-    Precision input_prec = getOriginalInputPrecisionAtPort(REDUCE_DATA);
-    Precision output_prec = getOriginalOutputPrecisionAtPort(0);
+    const auto& input_prec = getOriginalInputPrecisionAtPort(REDUCE_DATA);
+    const auto& output_prec = getOriginalOutputPrecisionAtPort(0);
     if (!canApplyJIT(input_prec, output_prec) || jit_beyond_5D || algorithm == Algorithm::ReduceAnd || algorithm == Algorithm::ReduceOr) {
         return false;
     }
 
+    if (one_of(8, input_prec.size(), output_prec.size())) {
+        return false;
+    }
+
     return canFuseSimpleOperation(node);
 }
 
 bool Reduce::created() const {
     return getType() == Type::Reduce;
 }
-
-}   // namespace node
-}   // namespace intel_cpu
-}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/reduce.h b/src/plugins/intel_cpu/src/nodes/reduce.h
index 2f07cb196a7dfe..067ef41de7ffcb 100644
--- a/src/plugins/intel_cpu/src/nodes/reduce.h
+++ b/src/plugins/intel_cpu/src/nodes/reduce.h
@@ -4,92 +4,18 @@
 
 #pragma once
 
-#include <ie_common.h>
 #include <node.h>
-#include <string>
-#include <memory>
-#include <vector>
+#include "kernels/x64/reduce.hpp"
+
 #include "executors/reduce_list.hpp"
 
 namespace ov {
 namespace intel_cpu {
 namespace node {
 
-enum ReduceLayoutType {
-    reduce_ncsp,
-    reduce_nspc,
-    reduce_blocked
-};
-
-struct jit_reduce_config_params {
-    ReduceLayoutType layout;
-    Algorithm reduce_mode;
-    bool fuse_low_precision;
-    dnnl::memory::data_type src_dt;
-    dnnl::memory::data_type dst_dt;
-    int src_data_size;
-    int dst_data_size;
-};
-
-struct jit_reduce_call_args {
-    const void *src;
-    const int *idx;
-    void *dst;
-    size_t work_amount;
-    size_t work_batch;
-    size_t reduce_w = 2;    // only used in planar layout  [1: reduce width dimension]   [0: reduce other dimension] [other value: N/A]
-    size_t reduce_stride;   // only used in planar layout while reducing dimensions except for width
-    size_t can_divide;      // if apply division in reduce_kernel [1: Yes] [0: No]
-    const float *divisor;   // mean = sum / divisor
-};
-
-struct jit_reduce_post_call_args {
-    const void *src;
-    void *dst;
-    size_t work_amount;
-    size_t reduce_c = 2;    // only used in blocked layout [1: reduce channel dimension] [0: reduce other dimension] [other value: N/A]
-    size_t oc_off;          // offset in byte along channel on output tensor
-    size_t channel_size;    // only for post ops fusion of nspc layout
-    const float *divisor;   // mean = sum / divisor
-    const void** post_op_data;
-};
-
-struct jit_uni_reduce_kernel {
-    void (*ker_)(const jit_reduce_call_args *);
-
-    void operator()(const jit_reduce_call_args *args) {
-        assert(ker_);
-        ker_(args);
-    }
-
-    explicit jit_uni_reduce_kernel(jit_reduce_config_params jcp) : ker_(nullptr), jcp_(jcp) {}
-    virtual ~jit_uni_reduce_kernel() {}
-
-    virtual void create_ker() = 0;
-
-    jit_reduce_config_params jcp_;
-};
-
-struct jit_uni_reduce_post_kernel {
-    void (*ker_)(const jit_reduce_post_call_args *);
-
-    void operator()(const jit_reduce_post_call_args *args) {
-        assert(ker_);
-        ker_(args);
-    }
-
-    explicit jit_uni_reduce_post_kernel(jit_reduce_config_params jcp, const dnnl_primitive_attr &attr) : ker_(nullptr), jcp_(jcp), attr_(attr) {}
-    virtual ~jit_uni_reduce_post_kernel() {}
-
-    virtual void create_ker() = 0;
-
-    jit_reduce_config_params jcp_;
-    const dnnl_primitive_attr &attr_;
-};
-
 class Reduce : public Node {
 public:
-    Reduce(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    Reduce(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context);
 
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
@@ -105,7 +31,7 @@ class Reduce : public Node {
     }
 
     bool isExecutable() const override;
-    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
 
 private:
     void reduce_type(const uint8_t *in_ptr, uint8_t *out_ptr);
@@ -113,7 +39,7 @@ class Reduce : public Node {
     void reduce_BLK(const uint8_t *in_ptr, uint8_t *out_ptr);
     void reduce_BLK_concern_padding(const uint8_t *in_ptr, uint8_t *out_ptr);
     inline void reduce_kernel_process(const uint8_t *in_p, uint8_t *out_p, size_t work_amount,
-                                      size_t reduce_w = 2, size_t work_batch = 1, const int *tab_idx = NULL);
+                                    size_t reduce_w = 2, size_t work_batch = 1, const int *tab_idx = NULL);
     inline void reduce_kernel_post_process(uint8_t *out_ptr);
     inline void reduce_kernel_reassign();
     inline void reduce_kernel_restore();
@@ -122,22 +48,24 @@ class Reduce : public Node {
     inline void init_dst_data(uint8_t *out_ptr, size_t dst_size);
     inline void create_hybrid_working_memory();
     inline void create_opt_working_memory();
-    inline void calc_process_dst_dims(std::vector<int> &reduce_axes, const InferenceEngine::SizeVector &dst_dim);
+    inline void calc_process_dst_dims(std::vector<int64_t> &reduce_axes, const InferenceEngine::SizeVector &dst_dim);
     inline void set_reduce_dim_flags();
     inline void reduce_ref(const float *in_ptr, float *out_ptr);
     void reduce_ref_process(const float *in_ptr, float *out_ptr, float init_value, std::function<float(float, float)> func);
-    void create_reduce_kernel(std::shared_ptr<jit_uni_reduce_kernel> &kernel, const jit_reduce_config_params &jcp);
+    void create_reduce_kernel(std::shared_ptr<kernel::JitReduceKernelBase<kernel::JitReduceCallArgs>> &kernel, const kernel::JitReduceConfigParams &jcp);
     inline void reduce_ref_map(float *out_ptr, size_t work_amount_dst, size_t reduced_dims_work_amount);
+    template<typename T>
     void nspc2ncsp(uint8_t *proc_ptr, uint8_t *out_ptr);
+    template<typename T>
     void blocked2ncsp(uint8_t *proc_ptr, uint8_t *out_ptr);
     void setPostOps(dnnl::primitive_attr &attr, const VectorDims &postOpDims, bool initWeights = false);
     void setJITBeyond5D();
-    std::vector<int> update_src_dims();
+    std::vector<int64_t> update_src_dims();
     bool canApplyJIT(const InferenceEngine::Precision &input_prec, const InferenceEngine::Precision &output_prec) const;
 
     size_t blk_size;
-    static const size_t REDUCE_DATA = 0;
-    static const size_t REDUCE_INDEXES = 1;
+    static constexpr size_t REDUCE_DATA = 0;
+    static constexpr size_t REDUCE_INDEXES = 1;
     bool jit_beyond_5D = false;
     bool jit_mode = true;
     bool keep_dims = true;
@@ -161,16 +89,19 @@ class Reduce : public Node {
     size_t dst_size, prc_size, intermediate_size, tmp_size;
     size_t reduce_stride;
     uint8_t *tmp_ptr;
-    ReduceLayoutType layout;
+    kernel::ReduceLayoutType layout;
     InferenceEngine::Precision input_prec, output_prec, intermediate_prec, tmp_prec;
     InferenceEngine::SizeVector src_dims;
     InferenceEngine::SizeVector process_dst_dims;
     InferenceEngine::SizeVector axes_for_reduction;
-    std::vector<int> raw_axes;
+    std::vector<int64_t> raw_axes;
     std::vector<uint8_t> intermediate_buf;
+    float in_out_divisor_f32 = 1.f;
+    double in_out_divisor_f64 = 1.;
+    void* in_out_divisor;
 
-    jit_reduce_config_params jcp;
-    jit_reduce_config_params aux_jcp;
+    kernel::JitReduceConfigParams jcp;
+    kernel::JitReduceConfigParams aux_jcp;
 
     dnnl::primitive_attr attr;
 
@@ -180,12 +111,12 @@ class Reduce : public Node {
     std::vector<uint8_t> vec_reduceDH_prc;
     std::vector<uint8_t> vec_reduceCDW_prc;
 
-    std::shared_ptr<jit_uni_reduce_kernel> reduce_kernel;
-    std::shared_ptr<jit_uni_reduce_kernel> reduce_aux_kernel;
-    std::shared_ptr<jit_uni_reduce_kernel> reduce_tmp_kernel;
-    std::shared_ptr<jit_uni_reduce_post_kernel> reduce_post_kernel;
+    std::shared_ptr<kernel::JitReduceKernelBase<kernel::JitReduceCallArgs>> reduce_kernel;
+    std::shared_ptr<kernel::JitReduceKernelBase<kernel::JitReduceCallArgs>> reduce_aux_kernel;
+    std::shared_ptr<kernel::JitReduceKernelBase<kernel::JitReduceCallArgs>> reduce_tmp_kernel;
+    std::shared_ptr<kernel::JitReduceKernelBase<kernel::JitReducePostCallArgs>> reduce_post_kernel;
 
-    static const std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>& op, Reduce& node)>> initializers;
+    static const std::map<const ov::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ov::Node>& op, Reduce& node)>> initializers;
 
     std::string errorPrefix;
 
diff --git a/src/plugins/intel_cpu/src/nodes/reference.cpp b/src/plugins/intel_cpu/src/nodes/reference.cpp
index 490610b4f0c822..5fcf00ff196f27 100644
--- a/src/plugins/intel_cpu/src/nodes/reference.cpp
+++ b/src/plugins/intel_cpu/src/nodes/reference.cpp
@@ -8,10 +8,9 @@
 #include <dnnl_extension_utils.h>
 #include "openvino/runtime/tensor.hpp"
 #include "common/blocked_desc_creator.h"
-#include <ngraph/opsets/opset1.hpp>
+#include <openvino/opsets/opset1.hpp>
 #include "common/cpu_memcpy.h"
 
-using namespace dnnl;
 using namespace InferenceEngine;
 using namespace InferenceEngine::details;
 
@@ -19,11 +18,11 @@ namespace ov {
 namespace intel_cpu {
 namespace node {
 
-Reference::Reference(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context,
+Reference::Reference(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context,
                                          const std::string& errorMessage) :
         Node(op, context, NgraphShapeInferFactory(op, FULL_PORT_MASK)), ngraphOp(op), additionalErrorMessage(errorMessage) {
     if (!op->has_evaluate()) {
-        IE_THROW(NotImplemented) << "Cannot fallback on ngraph reference implementation (Ngraph::Node::evaluate() is not implemented)";
+        IE_THROW(NotImplemented) << "Cannot fallback on ngraph reference implementation (ov::Node::evaluate() is not implemented)";
     }
     setType(Type::Reference);
     setTypeStr("Reference");
@@ -31,7 +30,7 @@ Reference::Reference(const std::shared_ptr<ngraph::Node>& op, const GraphContext
     // RandomUniform should generate new sequence each run even if all inputs are constants. So that method Node::IsConstant()
     // doesn't return 'True' for RandomUniform with all constant inputs and the node generates new values for each inference,
     // we set 'NoConst' value for 'ConstantType' in ctor
-    if (ov::is_type<ngraph::op::v8::RandomUniform>(ngraphOp)) {
+    if (ov::is_type<ov::op::v8::RandomUniform>(ngraphOp)) {
         constant = ConstantType::NoConst;
     }
 }
diff --git a/src/plugins/intel_cpu/src/nodes/reference.h b/src/plugins/intel_cpu/src/nodes/reference.h
index 4c2a8a1310806f..59e1036617b9cc 100644
--- a/src/plugins/intel_cpu/src/nodes/reference.h
+++ b/src/plugins/intel_cpu/src/nodes/reference.h
@@ -12,7 +12,7 @@ namespace node {
 
 class Reference : public Node {
 public:
-    Reference(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context, const std::string& errorMessage);
+    Reference(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context, const std::string& errorMessage);
 
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
@@ -29,7 +29,7 @@ class Reference : public Node {
     ov::TensorVector prepareOutputs() const;
 
 private:
-    const std::shared_ptr<ngraph::Node> ngraphOp;
+    const std::shared_ptr<ov::Node> ngraphOp;
     const std::string additionalErrorMessage;
 };
 
diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp
index f8a9de782c2c09..c802115aec45d6 100644
--- a/src/plugins/intel_cpu/src/nodes/reorder.cpp
+++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp
@@ -29,12 +29,12 @@ bool Reorder::isExecutable() const {
     return Node::isExecutable() && !isOptimized;
 }
 
-Reorder::Reorder(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context) :
+Reorder::Reorder(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context) :
         Node(op, context, PassThroughShapeInferFactory()) {
     IE_THROW() << "Can't create reorder node from ngraph node";
 }
 
-Reorder::Reorder(const std::string& name, const GraphContext::CPtr context) :
+Reorder::Reorder(const std::string& name, const GraphContext::CPtr& context) :
         Node("Reorder", name, context) {}
 
 void Reorder::getSupportedDescriptors() {
diff --git a/src/plugins/intel_cpu/src/nodes/reorder.h b/src/plugins/intel_cpu/src/nodes/reorder.h
index ef8e508fa08123..8fc7c71a27f6aa 100644
--- a/src/plugins/intel_cpu/src/nodes/reorder.h
+++ b/src/plugins/intel_cpu/src/nodes/reorder.h
@@ -17,8 +17,8 @@ namespace node {
 
 class Reorder : public Node {
 public:
-    Reorder(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
-    Reorder(const std::string& name, const GraphContext::CPtr context);
+    Reorder(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context);
+    Reorder(const std::string& name, const GraphContext::CPtr& context);
 
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
diff --git a/src/plugins/intel_cpu/src/nodes/reshape.cpp b/src/plugins/intel_cpu/src/nodes/reshape.cpp
index 58b59b0dbfa2ab..8a64c34839ee02 100644
--- a/src/plugins/intel_cpu/src/nodes/reshape.cpp
+++ b/src/plugins/intel_cpu/src/nodes/reshape.cpp
@@ -3,30 +3,26 @@
 //
 
 #include "reshape.h"
-#include "utils.hpp"
-#include <string>
-#include <dnnl_types.h>
-#include <dnnl_extension_utils.h>
-#include <openvino/opsets/opset1.hpp>
-#include <ie_ngraph_utils.hpp>
-#include <utils/shape_inference/static_shape.hpp>
-#include <utils/shape_inference/shape_inference.hpp>
-#include "utils/shape_inference/shape_inference_cpu.hpp"
 
 #include "common/cpu_memcpy.h"
+#include <ie_ngraph_utils.hpp>
+#include <openvino/op/reshape.hpp>
+#include <openvino/op/squeeze.hpp>
+#include <openvino/op/unsqueeze.hpp>
+#include <utils.hpp>
 
-using namespace dnnl;
 using namespace InferenceEngine;
 
 namespace ov {
 namespace intel_cpu {
 namespace node {
 
-bool Reshape::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool Reshape::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        if (!std::dynamic_pointer_cast<const ov::opset1::Reshape>(op) &&
-            !std::dynamic_pointer_cast<const ov::opset1::Squeeze>(op) &&
-                !std::dynamic_pointer_cast<const ov::opset1::Unsqueeze>(op)) {
+        if (!one_of(op->get_type_info(),
+                    op::v1::Reshape::get_type_info_static(),
+                    op::v0::Squeeze::get_type_info_static(),
+                    op::v0::Unsqueeze::get_type_info_static())) {
             errorMessage = "Only opset1 Reshape, Squeeze, Unsqueeze operations are supported";
             return false;
         }
@@ -226,36 +222,49 @@ class ReshapeShapeInferFactory : public ShapeInferFactory {
 };
 } // namespace
 
-Reshape::Reshape(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context) :
+Reshape::Reshape(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context) :
         Node(op, context, ReshapeShapeInferFactory(op)) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
     }
 
-    errorPrefix = std::string(op->get_type_name()) + " node with name '" + getName() + "'";
-
     if (isDynamicNode()) {
-        auto checkSecondInput = [](const std::shared_ptr<ngraph::Node>& op, const std::string opType) {
+        auto checkSecondInput = [](const std::shared_ptr<ov::Node>& op, const std::string &opType) {
             if (op->get_input_partial_shape(1).is_dynamic()) {
                 IE_THROW() << "CPU plug-in doesn't support " << opType << " node with non static second input";
             }
         };
 
-        if (std::dynamic_pointer_cast<const ov::opset1::Reshape>(op)) {
-            checkSecondInput(op, "Reshape");
-        } else if (std::dynamic_pointer_cast<const ov::opset1::Squeeze>(op)) {
+        if (op->get_type_info() == op::v1::Reshape::get_type_info_static()) {
+            checkSecondInput(op, getTypeStr());
+        } else if (op->get_type_info() == op::v0::Squeeze::get_type_info_static()) {
             if (op->get_input_size() == 1)
                 IE_THROW() << "CPU plug-in doesn't support Squeeze node with inputs num equal 1";
-            checkSecondInput(op, "Squeeze");
-        } else if (std::dynamic_pointer_cast<const ov::opset1::Unsqueeze>(op)) {
-            checkSecondInput(op, "Unsqueeze");
+            checkSecondInput(op, getTypeStr());
+        } else if (op->get_type_info() == op::v0::Unsqueeze::get_type_info_static()) {
+            checkSecondInput(op, getTypeStr());
         } else {
             IE_THROW() << "Unsupported operation type via reshape node";
         }
     }
 }
 
+template<typename T>
+bool Reshape::validateSecondInputValues(const void* inPtr) const {
+    const auto sndInput = reinterpret_cast<const T *>(inPtr);
+    for (size_t i = 0; i < lastSecondInputValues.size(); i++) {
+        const auto inVal = static_cast<int64_t>(sndInput[i]);
+        if (lastSecondInputValues[i] != inVal) {
+            for (size_t i = 0; i < lastSecondInputValues.size(); i++) {
+                lastSecondInputValues[i] = inVal;
+            }
+            return true;
+        }
+    }
+    return false;
+}
+
 bool Reshape::needShapeInfer() const {
     if (inputShapesModified()) {
         return true;
@@ -264,16 +273,12 @@ bool Reshape::needShapeInfer() const {
     if (lastSecondInputValues.empty()) {
         lastSecondInputValues.resize(mem.getStaticDims()[0], 0);
     }
-    const int32_t *sndInput = reinterpret_cast<const int32_t *>(mem.getData());
-    for (size_t i = 0; i < lastSecondInputValues.size(); i++) {
-        if (lastSecondInputValues[i] != sndInput[i]) {
-            for (size_t i = 0; i < lastSecondInputValues.size(); i++) {
-                lastSecondInputValues[i] = sndInput[i];
-            }
-            return true;
-        }
+
+    switch (mem.getDesc().getPrecision()) {
+        case Precision::I64: return validateSecondInputValues<int64_t>(mem.getData());
+        case Precision::I32: return validateSecondInputValues<int32_t>(mem.getData());
+        default: THROW_CPU_NODE_ERR << "has unsupported  second input data type.";
     }
-    return false;
 }
 
 void Reshape::getSupportedDescriptors() {
@@ -287,9 +292,12 @@ void Reshape::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    InferenceEngine::Precision inPrec = getOriginalInputPrecisionAtPort(0);
-    InferenceEngine::Precision outPrec = getOriginalOutputPrecisionAtPort(0);
-    InferenceEngine::Precision secondInPrc = InferenceEngine::Precision::I32;
+    auto inPrec = getOriginalInputPrecisionAtPort(0);
+    Precision secondInPrc = Precision::I32;
+    if (getOriginalInputPrecisions().size() > 1) {
+        secondInPrc = getOriginalInputPrecisionAtPort(1);
+    }
+    const auto &outPrec = getOriginalOutputPrecisionAtPort(0);
 
     // Current reshape implementation is simple memory reinterpret,
     // same precision on input and output is required
@@ -308,7 +316,7 @@ void Reshape::initSupportedPrimitiveDescriptors() {
     for (size_t i = 0; i < getParentEdges().size(); i++) {
         config.inConfs[i].inPlace(0 == i && canBeInPlace ? 0 : -1);
         config.inConfs[i].constant(false);
-        config.inConfs[i].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc((i > 0 ? secondInPrc : inPrec), getInputShapeAtPort(i)));
+        config.inConfs[i].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc((i == 0 ? inPrec : secondInPrc), getInputShapeAtPort(i)));
     }
     config.outConfs.resize(1);
     config.outConfs[0].inPlace(canBeInPlace ? 0 : -1);
diff --git a/src/plugins/intel_cpu/src/nodes/reshape.h b/src/plugins/intel_cpu/src/nodes/reshape.h
index e62253e99fa8a3..125cd6cd5c661e 100644
--- a/src/plugins/intel_cpu/src/nodes/reshape.h
+++ b/src/plugins/intel_cpu/src/nodes/reshape.h
@@ -4,12 +4,7 @@
 
 #pragma once
 
-#include <ie_common.h>
 #include <node.h>
-#include <string>
-#include <vector>
-#include <memory>
-#include "input.h"
 
 namespace ov {
 namespace intel_cpu {
@@ -17,7 +12,7 @@ namespace node {
 
 class Reshape : public Node {
 public:
-    Reshape(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    Reshape(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context);
 
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
@@ -29,12 +24,13 @@ class Reshape : public Node {
     void executeDynamicImpl(dnnl::stream strm) override;
     void execute(dnnl::stream strm) override;
 
-    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
+    template<typename T>
+    bool validateSecondInputValues(const void* inPtr) const;
 
-private:
-    mutable std::vector<int> lastSecondInputValues;
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
 
-    std::string errorPrefix;
+private:
+    mutable std::vector<int64_t> lastSecondInputValues;
 };
 
 }   // namespace node
diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp
index 004cccf763e90c..35c787981bd736 100644
--- a/src/plugins/intel_cpu/src/nodes/rnn.cpp
+++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp
@@ -17,8 +17,12 @@
 
 #include "ov_ops/augru_cell.hpp"
 #include "ov_ops/augru_sequence.hpp"
-
-#include <ngraph/node.hpp>
+#include <openvino/op/gru_cell.hpp>
+#include <openvino/op/gru_sequence.hpp>
+#include <openvino/op/lstm_cell.hpp>
+#include <openvino/op/lstm_sequence.hpp>
+#include <openvino/op/rnn_cell.hpp>
+#include <openvino/op/rnn_sequence.hpp>
 
 #include <oneapi/dnnl/dnnl.hpp>
 #include <string>
@@ -388,7 +392,7 @@ RNN::RNN(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
         yIdx = 0; hoIdx = 1; coIdx = 2;
     }
 
-    auto rnnCellBase = std::dynamic_pointer_cast<ngraph::op::util::RNNCellBase>(op);
+    auto rnnCellBase = std::dynamic_pointer_cast<op::util::RNNCellBase>(op);
     if (!rnnCellBase)
         THROW_ERROR << "does not have original layer for RNNCell.";
 
diff --git a/src/plugins/intel_cpu/src/nodes/scatter_update.cpp b/src/plugins/intel_cpu/src/nodes/scatter_update.cpp
index 27796341f786b9..171cea2e4887be 100644
--- a/src/plugins/intel_cpu/src/nodes/scatter_update.cpp
+++ b/src/plugins/intel_cpu/src/nodes/scatter_update.cpp
@@ -3,16 +3,13 @@
 //
 
 #include "scatter_update.h"
-#include <string>
-#include <vector>
-#include <onednn/dnnl.h>
+
 #include <dnnl_extension_utils.h>
 #include "ie_parallel.hpp"
-#include <algorithm>
 #include "common/cpu_memcpy.h"
-
-#include <ngraph/opsets/opset3.hpp>
-#include <ngraph/opsets/opset4.hpp>
+#include <openvino/op/scatter_elements_update.hpp>
+#include <openvino/op/scatter_nd_update.hpp>
+#include <openvino/op/scatter_update.hpp>
 
 using namespace dnnl;
 using namespace InferenceEngine;
@@ -21,11 +18,11 @@ namespace ov {
 namespace intel_cpu {
 namespace node {
 
-bool ScatterUpdate::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool ScatterUpdate::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        auto scatterElemUpd = ngraph::as_type_ptr<const ngraph::opset3::ScatterElementsUpdate>(op);
-        auto scatterUpd = ngraph::as_type_ptr<const ngraph::opset3::ScatterUpdate>(op);
-        auto scatterNdUpd = ngraph::as_type_ptr<const ngraph::opset4::ScatterNDUpdate>(op);
+        auto scatterElemUpd = ov::as_type_ptr<const op::v3::ScatterElementsUpdate>(op);
+        auto scatterUpd = ov::as_type_ptr<const op::v3::ScatterUpdate>(op);
+        auto scatterNdUpd = ov::as_type_ptr<const op::v3::ScatterNDUpdate>(op);
         if (!scatterElemUpd && !scatterUpd && !scatterNdUpd) {
             const std::string opType = op->get_type_name();
             errorMessage = "Only opset" + opType == "ScatterNDUpdate" ? "4 " : "3 " + opType + " operation is supported";
@@ -41,18 +38,17 @@ bool ScatterUpdate::isExecutable() const {
     return !isInputTensorAtPortEmpty(DATA_ID);
 }
 
-ScatterUpdate::ScatterUpdate(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
+ScatterUpdate::ScatterUpdate(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
         : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)),
           dataSize(0lu), indicesSize(0lu), axisSize(0lu),
           dataPrec(Precision::UNSPECIFIED),
           indicesPrec(Precision::UNSPECIFIED),
           axisPrec(Precision::UNSPECIFIED) {
     std::string errorMessage;
-    if (isSupportedOperation(op, errorMessage)) {
-        errorPrefix = std::string(op->get_type_name()) + " node with name '" + getName() + "'";
-    } else {
+    if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
     }
+    errorPrefix = std::string(op->get_type_name()) + " node with name '" + getName() + "'";
 }
 
 void ScatterUpdate::getSupportedDescriptors() {
@@ -291,7 +287,7 @@ void ScatterUpdate::execute(dnnl::stream strm) {
             size_t start = 0, end = 0;
             splitter(indicesBlockND[0], nthr, ithr, start, end);
             for (size_t i = start; i < end; i++) {
-                int64_t idxValue =  getIndicesValue(indicesPtr, i);
+                int64_t idxValue = getIndicesValue(indicesPtr, i);
                 if (idxValue >= static_cast<int64_t>(srcDimAxis) || idxValue < 0) {
                     IE_THROW() << errorPrefix
                     << " have indices value that points to non-existing output tensor element";
diff --git a/src/plugins/intel_cpu/src/nodes/scatter_update.h b/src/plugins/intel_cpu/src/nodes/scatter_update.h
index 835077b89778b5..512110ef2dc646 100644
--- a/src/plugins/intel_cpu/src/nodes/scatter_update.h
+++ b/src/plugins/intel_cpu/src/nodes/scatter_update.h
@@ -22,7 +22,7 @@ enum class ScatterUpdateMode {
 
 class ScatterUpdate : public Node {
 public:
-    ScatterUpdate(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    ScatterUpdate(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context);
 
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
@@ -36,7 +36,7 @@ class ScatterUpdate : public Node {
     void executeDynamicImpl(dnnl::stream strm) override;
 
     bool isExecutable() const override;
-    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
 
 private:
     void scatterUpdate(uint8_t *indicesPtr, uint8_t *updatePtr, int axis, uint8_t *dstDataPtr);
diff --git a/src/plugins/intel_cpu/src/nodes/shapeof.cpp b/src/plugins/intel_cpu/src/nodes/shapeof.cpp
index e3b9a8bcc81640..b0d6c36c5f6295 100644
--- a/src/plugins/intel_cpu/src/nodes/shapeof.cpp
+++ b/src/plugins/intel_cpu/src/nodes/shapeof.cpp
@@ -41,11 +41,11 @@ class ShapeOfShapeInferFactory : public ShapeInferFactory {
 };
 } // namespace
 
-bool ShapeOf::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool ShapeOf::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
         if (!one_of(op->get_type_info(),
-                    ngraph::op::v0::ShapeOf::get_type_info_static(),
-                    ngraph::op::v3::ShapeOf::get_type_info_static())) {
+                    ov::op::v0::ShapeOf::get_type_info_static(),
+                    ov::op::v3::ShapeOf::get_type_info_static())) {
             errorMessage = "Node is not an instance of ShapeOf form the operation set v1 or v3.";
             return false;
         }
@@ -55,35 +55,38 @@ bool ShapeOf::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op
     return true;
 }
 
-ShapeOf::ShapeOf(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
-    : Node(op, context, ShapeOfShapeInferFactory()) {
+ShapeOf::ShapeOf(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context)
+        : Node(op, context, ShapeOfShapeInferFactory()) {
     std::string errorMessage;
-    if (isSupportedOperation(op, errorMessage)) {
-        errorPrefix = "ShapeOf layer with name '" + getName() + "' ";
-        if (op->get_input_partial_shape(0).size() == 0)
-            IE_THROW() << errorPrefix << "gets unsupported input 0D tensor (scalar)";
-    } else {
+    if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
     }
+    if (op->get_input_partial_shape(0).size() == 0) {
+        THROW_CPU_NODE_ERR << "gets unsupported input 0D tensor (scalar)";
+    }
 }
 
 void ShapeOf::getSupportedDescriptors() {
     if (getParentEdges().size() != 1)
-        IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getParentEdges().size();
+        THROW_CPU_NODE_ERR << "has incorrect number of input edges: " << getParentEdges().size();
     if (getChildEdges().empty())
-        IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size();
+        THROW_CPU_NODE_ERR << "has incorrect number of output edges: " << getChildEdges().size();
 }
 
 void ShapeOf::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    Precision precision = getOriginalInputPrecisionAtPort(0);
+    const auto inPrc = getOriginalInputPrecisionAtPort(0);
+    const auto &outPrc = getOriginalOutputPrecisionAtPort(0);
+    if (!one_of(outPrc, Precision::I32, Precision::I64)) {
+        THROW_CPU_NODE_ERR << "has unsupported output precision: " << outPrc;
+    }
 
     const LayoutType dataFormats[4] = { LayoutType::ncsp, LayoutType::nspc, LayoutType::nCsp16c, LayoutType::nCsp8c };
     for (const auto &df : dataFormats) {
-        addSupportedPrimDesc({{df, precision}},
-                             {{LayoutType::ncsp, Precision::I32}},
+        addSupportedPrimDesc({{df, inPrc}},
+                             {{LayoutType::ncsp, outPrc}},
                              impl_desc_type::ref);
     }
 }
@@ -96,14 +99,22 @@ void ShapeOf::execute(dnnl::stream strm) {
     auto inPtr = getParentEdgeAt(0)->getMemoryPtr();
     auto outPtr = getChildEdgeAt(0)->getMemoryPtr();
     auto inDims = inPtr->getStaticDims();
-    size_t dimsCount = inDims.size();
-    if (outPtr->getStaticDims().size() != 1 || dimsCount != outPtr->getStaticDims()[0])
-        IE_THROW() << errorPrefix << "has inconsistent input shape and output size";
-
-    auto *dst = reinterpret_cast<int *>(getChildEdgeAt(0)->getMemoryPtr()->getData());
+    const size_t dimsCount = inDims.size();
+    if (outPtr->getStaticDims().size() != 1 || dimsCount != outPtr->getStaticDims()[0]) {
+        THROW_CPU_NODE_ERR << "has inconsistent input shape and output size";
+    }
 
-    for (size_t i = 0; i < dimsCount; i++) {
-        dst[i] = inDims[i];
+    const auto execPrc = outPtr->getDesc().getPrecision();
+    if (execPrc == Precision::I64) {
+        auto dstData = reinterpret_cast<int64_t *>(outPtr->getData());
+        for (size_t i = 0; i < dimsCount; i++) {
+            dstData[i] = inDims[i];
+        }
+    } else if (execPrc == Precision::I32) {
+        auto dstData = reinterpret_cast<int32_t *>(outPtr->getData());
+        for (size_t i = 0; i < dimsCount; i++) {
+            dstData[i] = inDims[i];
+        }
     }
 }
 
diff --git a/src/plugins/intel_cpu/src/nodes/shapeof.h b/src/plugins/intel_cpu/src/nodes/shapeof.h
index e313d3449e5f98..a200ed80fb5a83 100644
--- a/src/plugins/intel_cpu/src/nodes/shapeof.h
+++ b/src/plugins/intel_cpu/src/nodes/shapeof.h
@@ -16,7 +16,7 @@ namespace node {
 
 class ShapeOf : public Node {
 public:
-    ShapeOf(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    ShapeOf(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr& context);
 
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
@@ -28,9 +28,6 @@ class ShapeOf : public Node {
     bool isExecutable() const override;
 
     static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
-
-private:
-    std::string errorPrefix;
 };
 
 }   // namespace node
diff --git a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp
index 84a74766875f63..1cb34dfc5e0581 100644
--- a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp
+++ b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp
@@ -15,6 +15,7 @@
 #include <string>
 #include <cmath>
 #include <common/primitive_hashing_utils.hpp>
+#include <openvino/op/shuffle_channels.hpp>
 
 #define THROW_SHCH_ERROR IE_THROW() << "ShuffleChannels layer with name '" << getName() << "' "
 
@@ -54,7 +55,7 @@ bool ShuffleChannels::ShuffleChannelsAttributes::operator==(const ShuffleChannel
 
 bool ShuffleChannels::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
     try {
-        auto shuffleChannels = ov::as_type_ptr<const ngraph::op::v0::ShuffleChannels>(op);
+        auto shuffleChannels = ov::as_type_ptr<const op::v0::ShuffleChannels>(op);
         if (!shuffleChannels) {
             errorMessage = "Only opset1 ShuffleChannels operation is supported";
             return false;
@@ -75,7 +76,7 @@ ShuffleChannels::ShuffleChannels(const std::shared_ptr<ngraph::Node>& op, const
     if (inputShapes.size() != 1 || outputShapes.size() != 1)
         THROW_SHCH_ERROR << "has incorrect number of input/output edges.";
 
-    auto shuffleChannels = ov::as_type_ptr<const ngraph::op::v0::ShuffleChannels>(op);
+    auto shuffleChannels = ov::as_type_ptr<const op::v0::ShuffleChannels>(op);
     attrs.group = shuffleChannels->get_group();
     attrs.axis = shuffleChannels->get_axis();
     attrs.dataRank = getInputShapeAtPort(0).getRank();
diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp
index 710abbfffba80f..55c419d25d2152 100644
--- a/src/plugins/intel_cpu/src/nodes/split.cpp
+++ b/src/plugins/intel_cpu/src/nodes/split.cpp
@@ -3,19 +3,14 @@
 //
 
 #include "split.h"
+
 #include "common/cpu_memcpy.h"
-#include "common/blocked_desc_creator.h"
-#include <vector>
-#include <dnnl_types.h>
-#include <dnnl_extension_utils.h>
 #include <ie_parallel.hpp>
-#include "utils/general_utils.h"
-#include <memory_desc/cpu_memory_desc_utils.h>
-#include "utils/ngraph_utils.hpp"
+#include <openvino/op/constant.hpp>
+#include <openvino/op/split.hpp>
+#include <openvino/op/variadic_split.hpp>
 #include <partitioned_mem_mgr.h>
 
-#define THROW_ERROR IE_THROW() << "Split layer with name '" << getName() <<"' "
-
 using namespace dnnl;
 using namespace InferenceEngine;
 
@@ -23,13 +18,13 @@ namespace ov {
 namespace intel_cpu {
 namespace node {
 
-bool Split::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool Split::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        if (!one_of(op->get_type_info(), ngraph::op::v1::Split::get_type_info_static(), ngraph::op::v1::VariadicSplit::get_type_info_static())) {
+        if (!one_of(op->get_type_info(), op::v1::Split::get_type_info_static(), op::v1::VariadicSplit::get_type_info_static())) {
             errorMessage = "Only opset1 Split and VariadicSplit operations are supported";
             return false;
         }
-        auto axisOp = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
+        auto axisOp = ov::as_type_ptr<op::v0::Constant>(op->get_input_node_shared_ptr(1));
         if (!axisOp) {
             errorMessage = "Constant expected as the axis input.";
             return false;
@@ -44,31 +39,36 @@ bool Split::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op,
     return true;
 }
 
-Split::Split(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context) :
+Split::Split(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context) :
         Node(op, context, NgraphShapeInferFactory(op, PortMask(1, 2))) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
     }
 
-    if (ngraph::as_type_ptr<const ngraph::op::v1::Split>(op)) {
+    if (ov::as_type_ptr<const op::v1::Split>(op)) {
         INPUTS_NUM = 2;
-    } else if (ngraph::as_type_ptr<const ngraph::op::v1::VariadicSplit>(op)) {
+    } else if (ov::as_type_ptr<const op::v1::VariadicSplit>(op)) {
         INPUTS_NUM = 3;
-        if (!ngraph::is_type<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2))) {
+        if (!ov::is_type<op::v0::Constant>(op->get_input_node_shared_ptr(2))) {
             this->splitLengths.resize(op->get_input_shape(2)[0]);
             this->constSplitLengths = false;
         }
     }
 
     const auto inRank = getInputShapeAtPort(0).getRank();
-    auto axisOp = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
-    auto axis = axisOp->cast_vector<int64_t>()[0];
+    auto axisOp = ov::as_type_ptr<op::v0::Constant>(op->get_input_node_shared_ptr(1));
+    int64_t axis;
+    if (axisOp->get_element_type() == ov::element::i64) {
+        axis = axisOp->get_data_ptr<int64_t>()[0];
+    } else {
+        axis = axisOp->cast_vector<int64_t>()[0];
+    }
     if (axis < 0) {
         axis += inRank;
     }
     if (axis >= static_cast<int64_t>(inRank)) {
-        THROW_ERROR << "Split node with name '" << op->get_friendly_name() << "' has invalid value of axis parameter: " << axis;
+        THROW_CPU_NODE_ERR << "' has invalid value of axis parameter: " << axis;
     }
     this->axis = axis;
 }
@@ -82,24 +82,24 @@ void Split::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    const auto &srcShape = getInputShapeAtPort(0);
-    const auto &dstFirstDims = getOutputShapeAtPort(0).getDims();
+    const auto& srcShape = getInputShapeAtPort(0);
+    const auto& dstFirstDims = getOutputShapeAtPort(0).getDims();
     for (size_t i = 0; i < outputShapes.size(); i++) {
         const auto &o_Dims = outputShapes[i].getDims();
         if (dstFirstDims.size() != o_Dims.size()) {
-            THROW_ERROR << "only supports output blobs with equal number of dimensions";
+            THROW_CPU_NODE_ERR << "only supports output blobs with equal number of dimensions";
         }
 
         for (size_t j = 0; j < dstFirstDims.size(); j++) {
             if (j == axis)
                 continue;
             if (!dimsEqualWeak(o_Dims[j], dstFirstDims[j]))
-                THROW_ERROR << "has incorrect output dimensions";
+                THROW_CPU_NODE_ERR << "has incorrect output dimensions";
         }
     }
 
-    InferenceEngine::Precision inpPrecision = getOriginalInputPrecisionAtPort(0);
-    const auto axisPrecision = Precision::I32;
+    const auto& inpPrecision = getOriginalInputPrecisionAtPort(0);
+    const auto& axisPrecision = getOriginalInputPrecisionAtPort(1);
 
     // Set plain and tailC formats
     std::vector<LayoutType> tdCreatorTypes{ LayoutType::ncsp, LayoutType::nspc };
@@ -233,7 +233,7 @@ bool Split::needPrepareParams() const {
 void Split::prepareParams() {
     const auto &srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
     if (!srcMemPtr || !srcMemPtr->isAllocated()) {
-        THROW_ERROR << "has not allocated input memory";
+        THROW_CPU_NODE_ERR << "has not allocated input memory";
     }
 
     if (!constSplitLengths) {
@@ -248,7 +248,7 @@ void Split::prepareParams() {
     for (size_t port = 0; port < outputShapes.size(); ++port) {
         const auto &outMemPtr = this->getChildEdgesAtPort(port)[0]->getMemoryPtr();
         if (!outMemPtr || !outMemPtr->isAllocated()) {
-            THROW_ERROR << "has not allocated destination memory";
+            THROW_CPU_NODE_ERR << "has not allocated destination memory";
         }
 
         if (outMemPtr->getShape().hasZeroDims()) {
@@ -278,7 +278,7 @@ void Split::execute(dnnl::stream strm) {
     }
 
     if (dstMemPtrs.empty())
-        THROW_ERROR << "Output data pointers have not been initialized.";
+        THROW_CPU_NODE_ERR << "Output data pointers have not been initialized.";
 
     const auto &srcMem = getParentEdgesAtPort(0)[0]->getMemory();
 
@@ -300,7 +300,7 @@ void Split::initOptimalPrimitiveDescriptor() {
     Node::initOptimalPrimitiveDescriptor();
     auto selected_pd = getSelectedPrimitiveDescriptor();
     if (selected_pd == nullptr)
-        THROW_ERROR << "Preferable primitive descriptor is not set.";
+        THROW_CPU_NODE_ERR << "Preferable primitive descriptor is not set.";
 
     auto config = selected_pd->getConfig();
     canUseOptimizedNspc2Ncsp = false;
@@ -462,7 +462,7 @@ std::vector<uint8_t*> Split::getRawDstMemPtrs() const {
     for (size_t i = 0; i < dstMemPtrs.size(); ++i) {
         result[i] = reinterpret_cast<uint8_t*>(dstMemPtrs[i].second->getData());
         if (!result[i]) {
-            THROW_ERROR << "can't get child edge indx " << dstMemPtrs[i].first << " data.";
+            THROW_CPU_NODE_ERR << "can't get child edge indx " << dstMemPtrs[i].first << " data.";
         }
     }
     return result;
diff --git a/src/plugins/intel_cpu/src/nodes/split.h b/src/plugins/intel_cpu/src/nodes/split.h
index 5402d748832d7d..df3dbcc2f16ac9 100644
--- a/src/plugins/intel_cpu/src/nodes/split.h
+++ b/src/plugins/intel_cpu/src/nodes/split.h
@@ -4,9 +4,7 @@
 
 #pragma once
 
-#include <ie_common.h>
 #include <node.h>
-#include <string>
 
 namespace ov {
 namespace intel_cpu {
@@ -14,7 +12,7 @@ namespace node {
 
 class Split : public Node {
 public:
-    Split(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    Split(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr& context);
 
     static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
diff --git a/src/plugins/intel_cpu/src/nodes/strided_slice.cpp b/src/plugins/intel_cpu/src/nodes/strided_slice.cpp
index aa4dae10df7d86..5e996dc88a2905 100644
--- a/src/plugins/intel_cpu/src/nodes/strided_slice.cpp
+++ b/src/plugins/intel_cpu/src/nodes/strided_slice.cpp
@@ -6,8 +6,8 @@
 
 #include "ie_parallel.hpp"
 #include "common/cpu_memcpy.h"
-#include "input.h"
-#include <ngraph/opsets/opset1.hpp>
+#include <openvino/op/slice.hpp>
+#include <openvino/op/strided_slice.hpp>
 #include <utils/shape_inference/shape_inference_ngraph.hpp>
 #include "slice_shape_inference_utils.hpp"
 
@@ -139,7 +139,7 @@ class StridedSliceShapeInferFactory : public ShapeInferFactory {
 
 }  // namespace
 
-StridedSlice::StridedSlice(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context) :
+StridedSlice::StridedSlice(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context) :
         Node(op, context, StridedSliceShapeInferFactory(op)) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
@@ -151,10 +151,10 @@ StridedSlice::StridedSlice(const std::shared_ptr<ov::Node>& op, const GraphConte
 
     if ((attrs.isStridedSliceOp && (inputShapes.size() < 3 || inputShapes.size() > 4)) ||
             (!attrs.isStridedSliceOp && (inputShapes.size() < 4 || inputShapes.size() > 5))) {
-        IE_THROW() << errorPrefix << "has incorrect number of input edges";
+        THROW_CPU_NODE_ERR << "has incorrect number of input edges";
     }
     if (outputShapes.size() != 1) {
-        IE_THROW() << errorPrefix << "has incorrect number of output edges";
+        THROW_CPU_NODE_ERR << "has incorrect number of output edges";
     }
 
     if (inputShapes.size() > STRIDE_ID) {
@@ -229,7 +229,7 @@ StridedSlice::StridedSlice(const std::shared_ptr<ov::Node>& op, const GraphConte
             attrs.ellipsisPos1 = attrs.ellipsisMask[i] == 1 && attrs.ellipsisPos1 == -1 ? i : attrs.ellipsisPos1;
         }
         if (attrs.ellipsisMaskCounter > 1)
-            IE_THROW() << errorPrefix << "has incorrect 'Ellipsis_mask'. Only one non-zero bit is allowed";
+            THROW_CPU_NODE_ERR << "has incorrect 'Ellipsis_mask'. Only one non-zero bit is allowed";
 
         int newAxis = std::accumulate(attrs.newAxisMask.begin(), attrs.newAxisMask.end(), 0);
         int shrinkAxis = std::accumulate(attrs.shrinkAxisMask.begin(), attrs.shrinkAxisMask.end(), 0);
@@ -242,7 +242,7 @@ StridedSlice::StridedSlice(const std::shared_ptr<ov::Node>& op, const GraphConte
         if (!isConstantInput[type])
             return;
 
-        const auto constNode = ov::as_type_ptr<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(type));
+        const auto constNode = ov::as_type_ptr<const ov::opset1::Constant>(op->get_input_node_shared_ptr(type));
         parameter = constNode->cast_vector<int>();
 
         auto size = constNode->get_shape()[0];
@@ -314,7 +314,7 @@ void StridedSlice::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    const InferenceEngine::Precision dataPrecision = getOriginalInputPrecisionAtPort(DATA_ID);
+    const auto &dataPrecision = getOriginalInputPrecisionAtPort(DATA_ID);
     const InferenceEngine::Precision iPrecision = Precision::I32;
     attrs.dataSize = dataPrecision.size();
 
@@ -420,7 +420,7 @@ bool StridedSlice::needShapeInfer() const {
 
 void StridedSlice::execute(dnnl::stream strm) {
     if (!execPtr)
-        IE_THROW() << errorPrefix << "doesn't have compiled executor!";
+        THROW_CPU_NODE_ERR << "doesn't have compiled executor!";
 
     execPtr->exec(srcMemory, dstMemory);
 }
diff --git a/src/plugins/intel_cpu/src/nodes/strided_slice.h b/src/plugins/intel_cpu/src/nodes/strided_slice.h
index 3a8338c97e23a0..732f04b229d110 100644
--- a/src/plugins/intel_cpu/src/nodes/strided_slice.h
+++ b/src/plugins/intel_cpu/src/nodes/strided_slice.h
@@ -14,7 +14,7 @@ namespace node {
 
 class StridedSlice : public Node {
 public:
-    StridedSlice(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context);
+    StridedSlice(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context);
 
     static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp
index c6e8f4c03161d2..ea4041b25618ec 100644
--- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp
+++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp
@@ -6,19 +6,8 @@
 
 #include <ie_parallel.hpp>
 
-#include <vector>
-#include <algorithm>
-#include <array>
-#include <tuple>
-
-#include <dnnl_debug.h>
-#include <onednn/dnnl.h>
-#include <dnnl_extension_utils.h>
-
-#include <ngraph/opsets/opset1.hpp>
-#include <ngraph/pass/visualize_tree.hpp>
-#include <ngraph/rt_info.hpp>
-#include <ie_ngraph_utils.hpp>
+#include <openvino/opsets/opset1.hpp>
+#include <openvino/pass/visualize_tree.hpp>
 
 #include <snippets/op/subgraph.hpp>
 #include "snippets/pass/matmul_to_brgemm.hpp"
@@ -34,11 +23,13 @@
 #include "transformations/cpu_opset/common/pass/convert_to_swish_cpu.hpp"
 #include "transformations/defs.hpp"
 
+#include <algorithm>
+#include <array>
+#include <tuple>
+#include <vector>
+
 using namespace InferenceEngine;
-using namespace dnnl::impl::utils;
 using namespace dnnl::impl::cpu;
-using namespace dnnl::impl::cpu::x64;
-using namespace Xbyak;
 
 namespace ov {
 namespace intel_cpu {
@@ -78,8 +69,7 @@ class SnippetShapeInferFactory : public ShapeInferFactory {
 
 Snippet::Snippet(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
         : Node(op, context, SnippetShapeInferFactory(this)) {
-    host_isa = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) ?
-        dnnl::impl::cpu::x64::avx512_core : dnnl::impl::cpu::x64::avx2;
+    host_isa = x64::mayiuse(x64::avx512_core) ? x64::avx512_core : x64::avx2;
     original_snippet = ov::as_type_ptr<snippets::op::Subgraph>(op);
     if (!original_snippet) {
         IE_THROW(NotImplemented) << "Node is not an instance of snippets::op::Subgraph";
@@ -109,7 +99,8 @@ void Snippet::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    const std::set<Precision> supportedPrecisions = { Precision::FP32, Precision::I32, Precision::BF16, Precision::FP16, Precision::I8, Precision::U8 };
+    const std::set<Precision> supportedPrecisions =
+            { Precision::I64, Precision::FP32, Precision::I32, Precision::BF16, Precision::FP16, Precision::I8, Precision::U8 };
 
     bool dimRanksAreEqual = true;
     for (size_t i = 0; dimRanksAreEqual && i < inputShapes.size(); i++) {
@@ -157,7 +148,7 @@ void Snippet::initSupportedPrimitiveDescriptors() {
 
                 return std::make_shared<CpuBlockedMemoryDesc>(prc, shape, blocks, order, offset);
             } else if (lt == Blocked && shape.getRank() != 1 && (shape.getMinDims()[1] != Shape::UNDEFINED_DIM && shape.getMinDims()[1] > 1)) {
-                size_t blockSize = mayiuse(dnnl::impl::cpu::x64::avx512_core) ? 16 : 8;
+                size_t blockSize = x64::mayiuse(x64::avx512_core) ? 16 : 8;
 
                 VectorDims blocks = dims;
                 VectorDims order(blocks.size());
@@ -188,7 +179,7 @@ void Snippet::initSupportedPrimitiveDescriptors() {
                 static_cast<InferenceEngine::Precision>(InferenceEngine::Precision::BF16) :
                 originalInputPrecision;
             if (supportedPrecisions.count(precision) == 0)
-                IE_THROW() << "Subgraph node with name `" << getName() << "` doesn't support " << precision << " precision.";
+                THROW_CPU_NODE_ERR << " doesn't support " << precision << " precision.";
 
             const auto equalPrecisions = getOriginalOutputPrecisions().size() == 1 &&
                     precision == getOriginalOutputPrecisionAtPort(0);
@@ -207,7 +198,7 @@ void Snippet::initSupportedPrimitiveDescriptors() {
         for (size_t i = 0; i < outputShapes.size(); i++) {
             auto precision = getOriginalOutputPrecisionAtPort(i);
             if (supportedPrecisions.count(precision) == 0)
-                IE_THROW() << "Subgraph node with name `" << getName() << "` doesn't support " << precision << " precision.";
+                THROW_CPU_NODE_ERR << " doesn't support " << precision << " precision.";
 
             BlockedMemoryDesc::CmpMask outputMask = BlockedMemoryDesc::SKIP_OFFSET_MASK;
             PortConfig portConfig;
@@ -221,9 +212,9 @@ void Snippet::initSupportedPrimitiveDescriptors() {
         }
 
         impl_desc_type impl_type = impl_desc_type::unknown;
-        if (mayiuse(x64::avx512_core)) {
+        if (x64::mayiuse(x64::avx512_core)) {
             impl_type = impl_desc_type::jit_avx512;
-        } else if (mayiuse(x64::avx2)) {
+        } else if (x64::mayiuse(x64::avx2)) {
             impl_type = impl_desc_type::jit_avx2;
         }
         return {config, impl_type};
@@ -239,8 +230,9 @@ void Snippet::initSupportedPrimitiveDescriptors() {
 void Snippet::selectOptimalPrimitiveDescriptor() {
     selectPreferPrimitiveDescriptor(getImplPriority(), true);
 }
-InferenceEngine::Precision Snippet::getRuntimePrecision() const {
-    std::vector<InferenceEngine::Precision> inputPrecisions;
+
+Precision Snippet::getRuntimePrecision() const {
+    std::vector<Precision> inputPrecisions;
     for (size_t i = 0; i < getParentEdges().size(); i++) {
         auto parentEdge = getParentEdgeAt(i);
         if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated && !parentEdge->getParent()->isConstant()) {
@@ -327,7 +319,7 @@ ov::PartialShape Snippet::canonicalizeBody() {
             dims.emplace_back(d == Shape::UNDEFINED_DIM ? -1 : d);
         ov::PartialShape shape(dims);
         ov::AxisVector blocking(blockedDesc->getOrder());
-        ov::element::Type precision = InferenceEngine::details::convertPrecision(blockedDesc->getPrecision());
+        ov::element::Type precision = details::convertPrecision(blockedDesc->getPrecision());
         return snippets::op::Subgraph::BlockedShape{shape, blocking, precision};
     };
     inputShapeIsBlocked.resize(inputShapes.size(), false);
diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.h b/src/plugins/intel_cpu/src/nodes/subgraph.h
index 435b709b492f74..7486c1bf63d082 100644
--- a/src/plugins/intel_cpu/src/nodes/subgraph.h
+++ b/src/plugins/intel_cpu/src/nodes/subgraph.h
@@ -1,17 +1,12 @@
-// Copyright (C) 2020-2022 Intel Corporation
+// Copyright (C) 2020-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <ie_common.h>
-
-#include <onednn/dnnl.h>
-#include <cpu/x64/jit_generator.hpp>
-#include "emitters/x64/jit_snippets_emitters.hpp"
-
 #include <node.h>
 #include "snippets/op/subgraph.hpp"
+#include "emitters/x64/jit_snippets_emitters.hpp"
 
 #include <array>
 
@@ -24,7 +19,7 @@ namespace node {
 /// precision: fp32
 class Snippet : public Node {
 public:
-    Snippet(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    Snippet(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context);
     ~Snippet() override = default;
 
     void getSupportedDescriptors() override {};
diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp
index be139aaf8d0c75..9f21ce7a343083 100644
--- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp
+++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp
@@ -164,7 +164,7 @@ class IterCountPortHelper : public PortMapHelper {
 public:
     IterCountPortHelper(const MemoryPtr &to, const dnnl::engine& eng) {
         // Only scalar I32 tensor is supported
-        IE_ASSERT(to->getDataType() == memory::data_type::s32);
+        // IE_ASSERT(to->getDataType() == memory::data_type::s32);
         IE_ASSERT(to->getShape() == Shape(VectorDims{1}));
         mem_holder_dst = to->getPrimitive();
     }
diff --git a/src/plugins/intel_cpu/src/nodes/tile.cpp b/src/plugins/intel_cpu/src/nodes/tile.cpp
index 05392e7f1506fd..bc6f57d8c15e2e 100644
--- a/src/plugins/intel_cpu/src/nodes/tile.cpp
+++ b/src/plugins/intel_cpu/src/nodes/tile.cpp
@@ -4,6 +4,8 @@
 
 #include "tile.h"
 #include "common/cpu_memcpy.h"
+#include <openvino/op/constant.hpp>
+#include <openvino/op/tile.hpp>
 
 using namespace InferenceEngine;
 
@@ -13,7 +15,7 @@ namespace node {
 
 bool Tile::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        if (!ov::is_type<ov::op::v0::Tile>(op)) {
+        if (!ov::is_type<op::v0::Tile>(op)) {
             errorMessage = "Only opset1 Tile operation is supported.";
             return false;
         }
@@ -22,7 +24,7 @@ bool Tile::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::
             return false;
         }
         if (!isDynamicNgraphNode(op) &&
-                !ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(TILE_REPEATS))) {
+                !ov::is_type<op::v0::Constant>(op->get_input_node_ptr(TILE_REPEATS))) {
             errorMessage = "Only constant 'Repeats' input is supported with static shapes.";
             return false;
         }
@@ -32,18 +34,16 @@ bool Tile::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::
     return true;
 }
 
-Tile::Tile(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context) :
+Tile::Tile(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context) :
         Node(op, context, NgraphShapeInferFactory(op, PortMask(TILE_REPEATS))) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
     }
 
-    errorPrefix = "Tile node with name '" + getName() + "'";
-
-    if (ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(TILE_REPEATS))) {
+    if (auto repeatsOp = ov::as_type<op::v0::Constant>(op->get_input_node_ptr(TILE_REPEATS))) {
         constMap[TILE_REPEATS] = true;
-        repeats = originRepeats = ov::as_type<const ov::op::v0::Constant>(op->get_input_node_ptr(TILE_REPEATS))->cast_vector<size_t>();
+        repeats = originRepeats = repeatsOp->cast_vector<Dim>();
         while (repeats.size() < getInputShapeAtPort(TILE_INPUT).getRank()) {
             repeats.insert(repeats.begin(), 1lu);
         }
@@ -61,24 +61,24 @@ void Tile::getSupportedDescriptors() {
         return result;
     };
     if (getParentEdges().size() != 2)
-        IE_THROW() << errorPrefix << " has incorrect number of input edges. "
+        THROW_CPU_NODE_ERR << " has incorrect number of input edges. "
                 "Expected: 2, Actual: " << getParentEdges().size();
     if (getChildEdges().empty())
-        IE_THROW() << errorPrefix << " has no output edges.";
+        THROW_CPU_NODE_ERR << " has no output edges.";
     const auto& dstDims0 = getOutputShapeAtPort(0).getDims();
     for (size_t i = 1lu; i < outputShapes.size(); i++) {
         const auto& dstDims = getOutputShapeAtPort(i).getDims();
         if (dstDims.size() != dstDims0.size())
-            IE_THROW() << errorPrefix << " has output edges 0 and " << i << " with different ranks: " << dstDims0.size() << " and " << dstDims.size();
+            THROW_CPU_NODE_ERR << " has output edges 0 and " << i << " with different ranks: " << dstDims0.size() << " and " << dstDims.size();
         for (size_t j = 0; j < dstDims0.size(); j++) {
             if (dstDims0[j] != dstDims[j]) {
-                IE_THROW() << errorPrefix << " has output edges 0 and " << i << " with different dims: " << vec_to_string(dstDims0) << " and "
+                THROW_CPU_NODE_ERR << " has output edges 0 and " << i << " with different dims: " << vec_to_string(dstDims0) << " and "
                            << vec_to_string(dstDims);
             }
         }
     }
     if (constMap[TILE_REPEATS] && getInputShapeAtPort(TILE_INPUT).getRank() > getOutputShapeAtPort(0).getRank())
-        IE_THROW() << errorPrefix << " has incorrect input/output data shape rank. Input shape rank cannot be more than output shape rank. "
+        THROW_CPU_NODE_ERR << " has incorrect input/output data shape rank. Input shape rank cannot be more than output shape rank. "
                 "Actual input shape size: " << getInputShapeAtPort(TILE_INPUT).getRank() << ", output shape size: " << getOutputShapeAtPort(0).getRank();
 
     if (!isDynamicNode())
@@ -100,8 +100,13 @@ void Tile::prepareParams() {
     if (!constMap[TILE_REPEATS]) {
         const auto& repeatsMem = getParentEdgesAtPort(TILE_REPEATS)[0]->getMemory();
 
-        const int32_t* repeatsData = reinterpret_cast<const int32_t *>(repeatsMem.getData());
-        originRepeats.assign(repeatsData, repeatsData + repeatsMem.getStaticDims()[0]);
+        if (repeatsMem.getDesc().getPrecision() == Precision::I64) {
+            auto repeatsData = reinterpret_cast<const int64_t *>(repeatsMem.getData());
+            originRepeats.assign(repeatsData, repeatsData + repeatsMem.getStaticDims()[0]);
+        } else {
+            auto repeatsData = reinterpret_cast<const int32_t *>(repeatsMem.getData());
+            originRepeats.assign(repeatsData, repeatsData + repeatsMem.getStaticDims()[0]);
+        }
 
         repeats.assign(std::max(originRepeats.size(), getInputShapeAtPort(TILE_INPUT).getRank()), 1lu);
         const size_t offset = repeats.size() - originRepeats.size();
diff --git a/src/plugins/intel_cpu/src/nodes/tile.h b/src/plugins/intel_cpu/src/nodes/tile.h
index 2edda6e0f887d5..eb8ba348168ccc 100644
--- a/src/plugins/intel_cpu/src/nodes/tile.h
+++ b/src/plugins/intel_cpu/src/nodes/tile.h
@@ -14,7 +14,7 @@ namespace node {
 
 class Tile : public Node, public TileBroadcastCommon {
 public:
-    Tile(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context);
+    Tile(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context);
 
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
@@ -39,8 +39,6 @@ class Tile : public Node, public TileBroadcastCommon {
     int tiles = 0;
     bool noTiling = false;
     VectorDims originRepeats;
-
-    std::string errorPrefix;
 };
 
 }   // namespace node
diff --git a/src/plugins/intel_cpu/src/nodes/topk.cpp b/src/plugins/intel_cpu/src/nodes/topk.cpp
index 80d7b42d3a1369..e7f4b9f1c53f81 100644
--- a/src/plugins/intel_cpu/src/nodes/topk.cpp
+++ b/src/plugins/intel_cpu/src/nodes/topk.cpp
@@ -4,14 +4,10 @@
 
 #include "topk.h"
 
-#include <string>
-#include <vector>
-#include <set>
 #include <onednn/dnnl.h>
 #include <dnnl_extension_utils.h>
 #include "emitters/x64/jit_load_store_emitters.hpp"
 #include "ie_parallel.hpp"
-#include <ngraph/op/topk.hpp>
 #include <ie_ngraph_utils.hpp>
 #include <algorithm>
 
@@ -19,8 +15,6 @@
 #include <cpu/x64/jit_uni_eltwise.hpp>
 #include "common/cpu_memcpy.h"
 
-#include <ngraph/opsets/opset1.hpp>
-
 using namespace dnnl;
 using namespace InferenceEngine;
 using namespace dnnl::impl;
@@ -1792,30 +1786,29 @@ struct jit_uni_topk_kernel_f32 : public jit_uni_topk_kernel, public jit_generato
 
 bool TopK::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        if (!one_of(op->get_type_info(), ov::op::v1::TopK::get_type_info_static(),
-                                         ov::op::v3::TopK::get_type_info_static(),
-                                         ov::op::v11::TopK::get_type_info_static())) {
+        if (!one_of(op->get_type_info(), op::v1::TopK::get_type_info_static(),
+                                         op::v3::TopK::get_type_info_static(),
+                                         op::v11::TopK::get_type_info_static())) {
             errorMessage = "Node is not an instance of the TopK from the operation sets v1, v3 or v11";
             return false;
         }
 
-        auto topKOp = ov::as_type_ptr<const ov::op::util::TopKBase>(op);
+        auto topKOp = ov::as_type_ptr<const op::util::TopKBase>(op);
         if (!isDynamicNgraphNode(op)) {
-            auto topKConst = std::dynamic_pointer_cast<const ov::op::v0::Constant>(topKOp->get_input_node_shared_ptr(TOPK_K));
-            if (!topKConst) {
+            if (topKOp->get_input_node_shared_ptr(TOPK_K)->get_type_info() != ov::opset1::Constant::get_type_info_static()) {
                 errorMessage = "Second tensor is not constant in static shape mode";
                 return false;
             }
         }
 
-        if (topKOp->get_mode() != ov::op::TopKMode::MAX &&
-            topKOp->get_mode() != ov::op::TopKMode::MIN) {
+        if (topKOp->get_mode() != op::TopKMode::MAX &&
+            topKOp->get_mode() != op::TopKMode::MIN) {
             errorMessage = "Unsupported mode.";
             return false;
         }
-        if (!one_of(topKOp->get_sort_type(), ov::op::TopKSortType::NONE,
-                                             ov::op::TopKSortType::SORT_VALUES,
-                                             ov::op::TopKSortType::SORT_INDICES)) {
+        if (!one_of(topKOp->get_sort_type(), op::TopKSortType::NONE,
+                                             op::TopKSortType::SORT_VALUES,
+                                             op::TopKSortType::SORT_INDICES)) {
             errorMessage = "Unsupported sort type.";
             return false;
         }
@@ -1828,59 +1821,61 @@ bool TopK::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::
 TopK::TopK(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
         : Node(op, context, NgraphShapeInferFactory(op, PortMask(TOPK_K))) {
     std::string errorMessage;
-    if (isSupportedOperation(op, errorMessage)) {
-        errorPrefix = "TopK layer with name '" + getName() + "'";
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
 
-        auto topKOp = ov::as_type_ptr<const ov::op::util::TopKBase>(op);
+    auto topKOp = ov::as_type_ptr<const op::util::TopKBase>(op);
 
-        auto in_dims = topKOp->get_input_partial_shape(TOPK_DATA);
-        auto out_dims = topKOp->get_output_partial_shape(TOPK_DATA);
-        auto out_idx_dims = topKOp->get_output_partial_shape(TOPK_INDEX);
-        auto in_dims_size = in_dims.size();
+    const auto& in_dims = topKOp->get_input_partial_shape(TOPK_DATA);
+    const auto& out_dims = topKOp->get_output_partial_shape(TOPK_DATA);
+    const auto& out_idx_dims = topKOp->get_output_partial_shape(TOPK_INDEX);
+    const auto in_dims_size = in_dims.size();
 
-        if (!isDynamicNgraphNode(op)) {
-            auto topKConst = std::dynamic_pointer_cast<const ov::op::v0::Constant>(topKOp->get_input_node_shared_ptr(TOPK_K));
-            if (!topKConst) {
-                IE_THROW() << errorPrefix <<  "gets non-constant second tensor in static shape mode!";
+    top_k = 0;
+    if (!isDynamicNgraphNode(op)) {
+        if (auto topKL = ov::as_type<op::v0::Constant>(topKOp->get_input_node_ptr(TOPK_K))) {
+            if (topKL->get_element_type() == ov::element::i64) {
+                top_k = topKL->get_data_ptr<int64_t>()[0];
+            } else {
+                top_k = topKL->cast_vector<int64_t>()[0];
             }
+        } else {
+            THROW_CPU_NODE_ERR << " gets non-constant second tensor in static shape mode!";
         }
+    }
 
-        axis = topKOp->get_axis();
-        mode_max = topKOp->get_mode() == ov::op::TopKMode::MAX;
-        sort_index = topKOp->get_sort_type() == ov::op::TopKSortType::SORT_INDICES;
+    axis = topKOp->get_axis();
+    mode_max = topKOp->get_mode() == op::TopKMode::MAX;
+    sort_index = topKOp->get_sort_type() == op::TopKSortType::SORT_INDICES;
 
-        stable = false;
-        if (!sort_index) {
-            const auto topKOpV11 = ngraph::as_type_ptr<const ov::op::v11::TopK>(op);
-            if (topKOpV11) {
-                stable = topKOpV11->get_stable();
-            }
+    stable = false;
+    if (!sort_index) {
+        if (auto topKOpV11 = ov::as_type_ptr<const op::v11::TopK>(op)) {
+            stable = topKOpV11->get_stable();
         }
+    }
 
-        top_k = 0;
-        preset_params_done = false;
-        vec_idx_seq.clear();
-        vec_idx_block.clear();
+    preset_params_done = false;
+    vec_idx_seq.clear();
+    vec_idx_block.clear();
 
-        if (inputShapes.size() != 2 || outputShapes.size() < 2)
-            IE_THROW() << errorPrefix << " gets incorrect number of input/output edges!";
+    if (inputShapes.size() != 2 || outputShapes.size() < 2)
+        THROW_CPU_NODE_ERR << " gets incorrect number of input/output edges!";
 
-        if (getInputShapeAtPort(TOPK_DATA).getRank() != getOutputShapeAtPort(TOPK_DATA).getRank())
-            IE_THROW() << errorPrefix << " gets incorrect number of input/output dimensions!";
+    if (getInputShapeAtPort(TOPK_DATA).getRank() != getOutputShapeAtPort(TOPK_DATA).getRank())
+        THROW_CPU_NODE_ERR << " gets incorrect number of input/output dimensions!";
 
-        if (getInputShapeAtPort(TOPK_K).getRank() != 1)
-            IE_THROW() << errorPrefix << " gets incorrect index vector dimension! Index vector should be 1 dimension.";
+    if (getInputShapeAtPort(TOPK_K).getRank() != 1)
+        THROW_CPU_NODE_ERR << " gets incorrect index vector dimension! Index vector should be 1 dimension.";
 
-        if (out_dims != out_idx_dims)
-            IE_THROW() << errorPrefix << " gets incorrect output tensor dimension sizes!";
+    if (out_dims != out_idx_dims)
+        THROW_CPU_NODE_ERR << " gets incorrect output tensor dimension sizes!";
 
-        if (axis < 0)
-            axis += in_dims_size;
-        if (axis < 0 || axis >= static_cast<int>(in_dims_size))
-            IE_THROW() << errorPrefix << " gets incorrect input parameters dimensions and axis number!";
-    } else {
-        IE_THROW(NotImplemented) << errorMessage;
-    }
+    if (axis < 0)
+        axis += in_dims_size;
+    if (axis < 0 || axis >= static_cast<int>(in_dims_size))
+        THROW_CPU_NODE_ERR << " gets incorrect input parameters dimensions and axis number!";
 }
 
 void TopK::getSupportedDescriptors() {}
@@ -1914,9 +1909,13 @@ void TopK::initSupportedPrimitiveDescriptors() {
         Precision::U8
     };
 
+    Precision inLenPrc = getOriginalInputPrecisionAtPort(TOPK_K);
+    if (!one_of(inLenPrc, Precision::I32, Precision::I64)) {
+        inLenPrc = Precision::I32;
+    }
     Precision dataPrecision = getOriginalOutputPrecisionAtPort(TOPK_DATA);
     if (dataPrecision == Precision::BF16 && !mayiuse(avx512_core))
-        IE_THROW() << errorPrefix << " gets incorrect isa for BF16! AVX512 must be supported!";
+        THROW_CPU_NODE_ERR << " gets incorrect isa for BF16! AVX512 must be supported!";
     bool precisionSupported = std::find(std::begin(supportedPrecision), std::end(supportedPrecision), dataPrecision)
                                      != std::end(supportedPrecision);
     if (!precisionSupported) {
@@ -1937,7 +1936,7 @@ void TopK::initSupportedPrimitiveDescriptors() {
     };
 
     for (const auto &df : dataFomats) {
-        addSupportedPrimDesc({{df.first, dataPrecision}, {LayoutType::ncsp, Precision::I32}},
+        addSupportedPrimDesc({{df.first, dataPrecision}, {LayoutType::ncsp, inLenPrc}},
                              {{df.second, dataPrecision}, {df.second, Precision::I32}},
                              impl_type);
     }
@@ -1984,11 +1983,11 @@ void TopK::prepareParams() {
     auto dstMemPtr = getChildEdgeAt(TOPK_DATA)->getMemoryPtr();
     auto srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr();
     if (!dstMemPtr || !dstMemPtr->isAllocated())
-        IE_THROW() << errorPrefix << " has not allocated destination memory.";
+        THROW_CPU_NODE_ERR << " has not allocated destination memory.";
     if (!srcMemPtr || !srcMemPtr->isAllocated())
-        IE_THROW() << errorPrefix << " has not allocate input memory.";
+        THROW_CPU_NODE_ERR << " has not allocate input memory.";
     if (getSelectedPrimitiveDescriptor() == nullptr)
-        IE_THROW() << errorPrefix << " has nullable preferable primitive descriptor";
+        THROW_CPU_NODE_ERR << " has nullable preferable primitive descriptor";
 
     src_dims = srcMemPtr->getDesc().getShape().getDims();
     dst_dims = dstMemPtr->getDesc().getShape().getDims();
@@ -2000,10 +1999,9 @@ void TopK::prepareParams() {
         if (top_k != src_k) {
             top_k = src_k;
         }
-    } else {
-        top_k = reinterpret_cast<int *>(getParentEdgeAt(TOPK_K)->getMemoryPtr()->getData())[0];
     }
 
+
     if (jit_mode) {
         if (!preset_params_done) {
             preset_params();
@@ -2154,7 +2152,7 @@ void TopK::execute(dnnl::stream strm) {
             auto out_idx_ptr = reinterpret_cast<int32_t *>(dst_idx);
             topk_ref(in_ptr, out_ptr, out_idx_ptr);
         } else {
-            IE_THROW() << errorPrefix <<  "only support plain layout on machine w/o sse42.";
+            THROW_CPU_NODE_ERR <<  "only support plain layout on machine w/o sse42.";
         }
     }
 }
diff --git a/src/plugins/intel_cpu/src/nodes/topk.h b/src/plugins/intel_cpu/src/nodes/topk.h
index f737857073c8fd..0e30d55ee30d4d 100644
--- a/src/plugins/intel_cpu/src/nodes/topk.h
+++ b/src/plugins/intel_cpu/src/nodes/topk.h
@@ -6,11 +6,6 @@
 
 #include <node.h>
 
-#include <ie_precision.hpp>
-#include <string>
-#include <memory>
-#include <vector>
-
 namespace ov {
 namespace intel_cpu {
 namespace node {
@@ -80,7 +75,7 @@ struct jit_uni_topk_kernel {
 
 class TopK : public Node {
 public:
-    TopK(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    TopK(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context);
     ~TopK() override = default;
 
     void getSupportedDescriptors() override;
@@ -119,14 +114,14 @@ class TopK : public Node {
     bool stable = false;
     bool mode_max = false;
     int axis = 0;
-    static const size_t TOPK_DATA = 0;
-    static const size_t TOPK_K = 1;
-    static const size_t TOPK_INDEX = 1;
+    static constexpr size_t TOPK_DATA = 0;
+    static constexpr size_t TOPK_K = 1;
+    static constexpr size_t TOPK_INDEX = 1;
     size_t O = 0, A = 0, I = 0;
     size_t blk_size = 0;
     size_t data_size = 0;
     size_t axis_dim = 0;
-    int top_k = 0;
+    int64_t top_k = 0;
     int dim = 0, before_num = 0;
     bool bubble_inplace = false;
     bool preset_params_done = false;
diff --git a/src/plugins/intel_cpu/src/nodes/transpose.cpp b/src/plugins/intel_cpu/src/nodes/transpose.cpp
index 750ae6bf711ca0..7921772b18228d 100644
--- a/src/plugins/intel_cpu/src/nodes/transpose.cpp
+++ b/src/plugins/intel_cpu/src/nodes/transpose.cpp
@@ -10,6 +10,8 @@
 #include <string>
 #include <dnnl_extension_utils.h>
 #include <common/primitive_hashing_utils.hpp>
+#include <openvino/op/constant.hpp>
+#include <openvino/op/transpose.hpp>
 
 using namespace dnnl;
 using namespace InferenceEngine;
@@ -21,12 +23,12 @@ namespace node {
 bool Transpose::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
         if (!one_of(op->get_type_info(),
-                ov::op::v1::Transpose::get_type_info_static())) {
+                op::v1::Transpose::get_type_info_static())) {
             errorMessage = "Node is not an instance of the Transpose operation from opset1.";
             return false;
         }
 
-        if (op->get_input_node_ptr(INPUT_ORDER_IDX)->get_type_info() != ov::op::v0::Constant::get_type_info_static()) {
+        if (op->get_input_node_ptr(INPUT_ORDER_IDX)->get_type_info() != op::v0::Constant::get_type_info_static()) {
             // TODO: Support parameterized Order input for dynamic shapes.
             errorMessage = "Constant expected as the second input for static shapes.";
             return false;
@@ -88,7 +90,7 @@ class TransposeShapeInferFactory : public ShapeInferFactory {
 public:
     TransposeShapeInferFactory(const std::shared_ptr<ov::Node>& op) : m_op(op) {}
     ShapeInferPtr makeShapeInfer() const override {
-        if (const auto order = ov::as_type_ptr<const ov::op::v0::Constant>(m_op->get_input_node_shared_ptr(ov::op::v1::Transpose::ORDER))) {
+        if (const auto order = ov::as_type_ptr<const op::v0::Constant>(m_op->get_input_node_shared_ptr(op::v1::Transpose::ORDER))) {
             const auto axes_vec = order->cast_vector<size_t>();
             return std::make_shared<TransposeShapeInfer>(m_op->get_output_partial_shape(0).rank().get_length(), axes_vec);
         } else {
@@ -101,16 +103,20 @@ class TransposeShapeInferFactory : public ShapeInferFactory {
 };
 } // namespace
 
-Transpose::Transpose(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
+Transpose::Transpose(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context)
         : Node(op, context, TransposeShapeInferFactory(op)) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
     }
 
-    if (op->get_input_node_ptr(INPUT_ORDER_IDX)->get_type_info() == ov::op::v0::Constant::get_type_info_static()) {
+    if (auto inputOrder = ov::as_type<op::v0::Constant>(op->get_input_node_ptr(INPUT_ORDER_IDX))) {
         isInputOrderConst = true;
-        order = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(INPUT_ORDER_IDX))->cast_vector<size_t>();
+        if (one_of(inputOrder->get_element_type(), ov::element::i64, ov::element::u64)) {
+            order = inputOrder->get_vector<size_t>();
+        } else {
+            order = inputOrder->cast_vector<size_t>();
+        }
 
         if (order.empty()) {
             size_t rank = getInputShapeAtPort(INPUT_DATA_IDX).getRank();
@@ -128,7 +134,11 @@ void Transpose::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    prec = getOriginalInputPrecisionAtPort(0);
+    const auto &dataPrc = getOriginalInputPrecisionAtPort(0);
+    auto orderPrc = getOriginalInputPrecisionAtPort(1);
+    if (!one_of(orderPrc, Precision::I32, Precision::I64)) {
+        orderPrc = Precision::I32;
+    }
 
     auto& creatorsMap = BlockedDescCreator::getCommonCreators();
 
@@ -139,7 +149,7 @@ void Transpose::initSupportedPrimitiveDescriptors() {
     config.inConfs[INPUT_DATA_IDX].constant(false);
     config.inConfs[INPUT_ORDER_IDX].constant(isInputOrderConst);
     config.inConfs[INPUT_ORDER_IDX].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc(
-            Precision::I32, getInputShapeAtPort(INPUT_ORDER_IDX)));
+            orderPrc, getInputShapeAtPort(INPUT_ORDER_IDX)));
     config.outConfs[0].inPlace(-1);
     config.outConfs[0].constant(false);
     transpose_context = std::make_shared<ExecutorContext>(context, getImplPriority());
@@ -160,30 +170,30 @@ void Transpose::initSupportedPrimitiveDescriptors() {
     const auto& inputDataShape = getInputShapeAtPort(INPUT_DATA_IDX);
     const auto& outputDataShape = getOutputShapeAtPort(0);
     if (inputDataShape.getRank() == 4 || inputDataShape.getRank() == 5) {
-        config.inConfs[0].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc(prec, inputDataShape));
-        config.outConfs[0].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc(prec, outputDataShape));
+        config.inConfs[0].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc(dataPrc, inputDataShape));
+        config.outConfs[0].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc(dataPrc, outputDataShape));
         supportedPrimitiveDescriptorsBuilder(config, transposeParams);
 #if defined(OPENVINO_ARCH_X86_64)
         const auto& srcDims = inputDataShape.getDims();
         if (srcDims[1] != Shape::UNDEFINED_DIM && srcDims[1] % 8 == 0) {
-            config.inConfs[0].setMemDesc(creatorsMap.at(LayoutType::nCsp8c)->createSharedDesc(prec, inputDataShape));
+            config.inConfs[0].setMemDesc(creatorsMap.at(LayoutType::nCsp8c)->createSharedDesc(dataPrc, inputDataShape));
             supportedPrimitiveDescriptorsBuilder(config, transposeParams);
         }
 
         if (srcDims[1] != Shape::UNDEFINED_DIM && srcDims[1] % 16 == 0) {
-            config.inConfs[0].setMemDesc(creatorsMap.at(LayoutType::nCsp16c)->createSharedDesc(prec, inputDataShape));
+            config.inConfs[0].setMemDesc(creatorsMap.at(LayoutType::nCsp16c)->createSharedDesc(dataPrc, inputDataShape));
             supportedPrimitiveDescriptorsBuilder(config, transposeParams);
         }
 #endif // OPENVINO_ARCH_X86_64
-        if (prec == Precision::FP32 || prec == Precision::I8 || prec == Precision::U8) {
-            config.inConfs[0].setMemDesc(creatorsMap.at(LayoutType::nspc)->createSharedDesc(prec, inputDataShape));
-            config.outConfs[0].setMemDesc(creatorsMap.at(LayoutType::nspc)->createSharedDesc(prec, outputDataShape));
+        if (one_of(dataPrc, Precision::FP32, Precision::I8, Precision::U8)) {
+            config.inConfs[0].setMemDesc(creatorsMap.at(LayoutType::nspc)->createSharedDesc(dataPrc, inputDataShape));
+            config.outConfs[0].setMemDesc(creatorsMap.at(LayoutType::nspc)->createSharedDesc(dataPrc, outputDataShape));
             supportedPrimitiveDescriptorsBuilder(config, transposeParams);
         }
     } else {
         // general plain case
-        config.inConfs[0].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc(prec, inputDataShape));
-        config.outConfs[0].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc(prec, outputDataShape));
+        config.inConfs[0].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc(dataPrc, inputDataShape));
+        config.outConfs[0].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc(dataPrc, outputDataShape));
         supportedPrimitiveDescriptorsBuilder(config, transposeParams);
     }
 }
@@ -228,9 +238,15 @@ void Transpose::prepareParams() {
     transposeParams.permuteParams.dst_block_dims = dstDesc->getBlockDims();
 
     if (!isInputOrderConst) {
-        auto orderPtr = reinterpret_cast<const int32_t*>(getParentEdgeAt(0)->getMemoryPtr()->getData());
-        auto orderLen = getParentEdgeAt(0)->getMemoryPtr()->getSize();
-        transposeParams.permuteParams.order.assign(orderPtr, orderPtr + orderLen);
+        auto mem = getParentEdgeAt(0)->getMemoryPtr();
+        auto orderLen = mem->getSize();
+        if (mem->getDesc().getPrecision() == Precision::I64) {
+            auto orderPtr = reinterpret_cast<const int64_t*>(mem->getData());
+            transposeParams.permuteParams.order.assign(orderPtr, orderPtr + orderLen);
+        } else {
+            auto orderPtr = reinterpret_cast<const int32_t*>(mem->getData());
+            transposeParams.permuteParams.order.assign(orderPtr, orderPtr + orderLen);
+        }
     }
 
     auto engine = getEngine();
diff --git a/src/plugins/intel_cpu/src/nodes/transpose.h b/src/plugins/intel_cpu/src/nodes/transpose.h
index 5fb7e9f76570bf..4f187270a2904a 100644
--- a/src/plugins/intel_cpu/src/nodes/transpose.h
+++ b/src/plugins/intel_cpu/src/nodes/transpose.h
@@ -18,9 +18,9 @@ namespace node {
 
 class Transpose : public Node {
 public:
-    Transpose(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    Transpose(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context);
 
-    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
     void createPrimitive() override;
@@ -46,7 +46,6 @@ class Transpose : public Node {
     TransposeExecutorPtr execPtr = nullptr;
     dnnl::primitive prim;
     InferenceEngine::SizeVector order;
-    InferenceEngine::Precision prec;
 
     TransposeParams transposeParams;
 
diff --git a/src/plugins/intel_cpu/src/nodes/unique.cpp b/src/plugins/intel_cpu/src/nodes/unique.cpp
index 5fbb3b4cebe2f0..f525a67d34b76f 100644
--- a/src/plugins/intel_cpu/src/nodes/unique.cpp
+++ b/src/plugins/intel_cpu/src/nodes/unique.cpp
@@ -5,16 +5,15 @@
 #include "unique.hpp"
 
 #include "ie_parallel.hpp"
-#include <openvino/op/unique.hpp>
 #include "common/cpu_memcpy.h"
 #include <utils/shape_inference/shape_inference_internal_dyn.hpp>
+#include <openvino/op/constant.hpp>
+#include <openvino/op/unique.hpp>
 
 using namespace InferenceEngine;
 using namespace ov::intel_cpu;
 using namespace ov::intel_cpu::node;
 
-#define THROW_ERROR IE_THROW() << getTypeStr() << " node with name '" << getName() << "' "
-
 bool Unique::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
         if (!ov::is_type<op::v10::Unique>(op)) {
@@ -40,21 +39,22 @@ Unique::Unique(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr con
     }
 
     if (!one_of(op->get_input_size(), 1u, 2u) || op->get_output_size() != 4)
-        THROW_ERROR << "has incorrect number of input/output edges.";
+        THROW_CPU_NODE_ERR << "has incorrect number of input/output edges.";
 
     for (int i = 0; i < 4; i++) {
         definedOutputs[i] = !op->get_output_target_inputs(i).empty();
     }
 
     sorted = ov::as_type_ptr<op::v10::Unique>(op)->get_sorted();
-    if (op->get_input_size() > AXIS) {
+    auto dataShapeRank = op->get_input_partial_shape(IN_DATA).rank().get_length();
+    if (op->get_input_size() > AXIS && dataShapeRank > 1) {
         flattened = false;
         axis = ov::as_type<op::v0::Constant>(op->get_input_node_ptr(AXIS))->cast_vector<int>()[0];
         if (axis < 0) {
-            axis += op->get_input_partial_shape(IN_DATA).rank().get_length();
+            axis += dataShapeRank;
         }
-        if (axis < 0 || axis >= op->get_input_partial_shape(IN_DATA).rank().get_length()) {
-            THROW_ERROR << "has invalid axis value: " << ov::as_type<op::v0::Constant>(op->get_input_node_ptr(AXIS))->cast_vector<int>()[0];
+        if (axis < 0 || axis >= dataShapeRank) {
+            THROW_CPU_NODE_ERR << "has invalid axis value: " << ov::as_type<op::v0::Constant>(op->get_input_node_ptr(AXIS))->cast_vector<int>()[0];
         }
     } else {
         flattened = true;
@@ -63,21 +63,23 @@ Unique::Unique(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr con
 
 void Unique::initSupportedPrimitiveDescriptors() {
     dataPrecision = getOriginalInputPrecisionAtPort(IN_DATA);
-    if (dataPrecision != Precision::I32 && dataPrecision != Precision::I8 && dataPrecision != Precision::U8) {
+    if (dataPrecision != Precision::I64 && dataPrecision != Precision::I32 && dataPrecision != Precision::I8 && dataPrecision != Precision::U8) {
         dataPrecision = Precision::FP32;
     }
     dataTypeSize = dataPrecision.size();
-    const InferenceEngine::Precision axisPrecision = Precision::I32;
+    Precision axisPrecision = Precision::I64;
 
     impl_desc_type implType = ref;
 
     std::vector<PortConfigurator> inPortConfigs = { {LayoutType::ncsp, dataPrecision} };
-    if (!flattened) {
+    if (getOriginalInputsNumber() > AXIS) {
+        axisPrecision = getOriginalInputPrecisionAtPort(AXIS);
         inPortConfigs.push_back({LayoutType::ncsp, axisPrecision});
     }
     std::vector<PortConfigurator> outPortConfigs;
     for (int i = 0; i < 4; i++) {
-        outPortConfigs.push_back({LayoutType::ncsp, i == 0 ? dataPrecision : axisPrecision});
+        outputsPrc[i] = getOriginalOutputPrecisionAtPort(i);
+        outPortConfigs.push_back({LayoutType::ncsp, i == 0 ? dataPrecision : getOriginalOutputPrecisionAtPort(i)});
     }
 
     addSupportedPrimDesc(inPortConfigs, outPortConfigs, implType);
@@ -90,18 +92,18 @@ void Unique::createPrimitive() {
 void Unique::prepareParams() {
     auto dataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr();
     if (!dataMemPtr || !dataMemPtr->isAllocated()) {
-        THROW_ERROR << " has not allocated input data memory.";
+        THROW_CPU_NODE_ERR << " has not allocated input data memory.";
     }
     for (int i = 0; i < 4; i++) {
         if (definedOutputs[i]) {
             auto dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
             if (!dstMemPtr || !dstMemPtr->isAllocated()) {
-                THROW_ERROR << " has not allocated output memory at port " << i;
+                THROW_CPU_NODE_ERR << " has not allocated output memory at port " << i;
             }
         }
     }
     if (getSelectedPrimitiveDescriptor() == nullptr) {
-        THROW_ERROR << " has unidentified preferable primitive descriptor.";
+        THROW_CPU_NODE_ERR << " has unidentified preferable primitive descriptor.";
     }
 
     size_t srcLen = 1;
@@ -111,9 +113,15 @@ void Unique::prepareParams() {
         auto dstDataShape = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getStaticDims();
         srcLen = dstDataShape[axis];
     }
-    firstUniTmp.resize(srcLen, 0);
-    inToOutTmp.resize(srcLen);
-    occurTmp.resize(srcLen);
+    if (definedOutputs[FIRST_UNIQUE_IDX]) {
+        firstUniTmp.resize(srcLen, 0);
+    }
+    if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
+        inToOutTmp.resize(srcLen);
+    }
+    if (definedOutputs[OCCURRENCES_NUM]) {
+        occurTmp.resize(srcLen);
+    }
 }
 
 template<typename T>
@@ -135,12 +143,14 @@ void Unique::execute(dnnl::stream strm) {
         OV_SWITCH(intel_cpu, flattenExec, this, dataPrecision,
               OV_CASE(Precision::FP32, float),
               OV_CASE(Precision::I32, int32_t),
+              OV_CASE(Precision::I64, int64_t),
               OV_CASE(Precision::I8, int8_t),
               OV_CASE(Precision::U8, uint8_t))
     } else {
         OV_SWITCH(intel_cpu, slicedExec, this, dataPrecision,
               OV_CASE(Precision::FP32, float),
               OV_CASE(Precision::I32, int32_t),
+              OV_CASE(Precision::I64, int64_t),
               OV_CASE(Precision::I8, int8_t),
               OV_CASE(Precision::U8, uint8_t))
     }
@@ -168,7 +178,7 @@ void Unique::flattenTensorExec() {
     const size_t inputLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getSize() / sizeof(T);
     std::vector<T> uniDataTmp(inputLen);
     auto uniDataTmpPtr = uniDataTmp.data();
-    int *firstTmpPtr = nullptr, *inToOutTmpPtr = nullptr, *occurTmpPtr = nullptr;
+    int64_t *firstTmpPtr = nullptr, *inToOutTmpPtr = nullptr, *occurTmpPtr = nullptr;
     if (definedOutputs[FIRST_UNIQUE_IDX]) {
         firstTmpPtr = firstUniTmp.data();
     }
@@ -266,16 +276,13 @@ void Unique::flattenTensorExec() {
     T* uniDataPtr = reinterpret_cast<T*>(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->getData());
     cpu_parallel_memcpy(uniDataPtr, uniDataTmpPtr, uniqueLen * sizeof(T));
     if (definedOutputs[FIRST_UNIQUE_IDX]) {
-        int *firstPtr = reinterpret_cast<int*>(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->getData());
-        cpu_parallel_memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int));
+        copyOutput(FIRST_UNIQUE_IDX, firstUniTmp.data(), uniqueLen);
     }
     if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
-        auto inToOutPtr = reinterpret_cast<int*>(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->getData());
-        cpu_parallel_memcpy(inToOutPtr, inToOutTmp.data(), inputLen * sizeof(int));
+        copyOutput(INPUT_TO_UNIQ_IDX, inToOutTmpPtr, inputLen);
     }
     if (definedOutputs[OCCURRENCES_NUM]) {
-        auto occurPtr = reinterpret_cast<int*>(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->getData());
-        cpu_parallel_memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int));
+        copyOutput(OCCURRENCES_NUM, occurTmpPtr, uniqueLen);
     }
 }
 
@@ -283,16 +290,17 @@ template <typename T>
 void Unique::slicedTensorExec() {
     auto inDataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr();
     auto srcDataPtr = reinterpret_cast<const T*>(inDataMemPtr->getData());
-    int *firstTmpPtr = nullptr, *inToOutTmpPtr = nullptr, *occurTmpPtr = nullptr;
-    if (definedOutputs[FIRST_UNIQUE_IDX]) {
-        firstTmpPtr = firstUniTmp.data();
-    }
-    if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
-        inToOutTmpPtr = inToOutTmp.data();
-    }
-    if (definedOutputs[OCCURRENCES_NUM]) {
-        occurTmpPtr = occurTmp.data();
-    }
+
+    uint8_t *firstTmpPtr = nullptr, *inToOutTmpPtr = nullptr, *occurTmpPtr = nullptr;
+     if (definedOutputs[FIRST_UNIQUE_IDX]) {
+         firstTmpPtr = reinterpret_cast<uint8_t*>(firstUniTmp.data());
+     }
+     if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
+         inToOutTmpPtr = reinterpret_cast<uint8_t*>(inToOutTmp.data());
+     }
+     if (definedOutputs[OCCURRENCES_NUM]) {
+         occurTmpPtr = reinterpret_cast<uint8_t*>(occurTmp.data());
+     }
 
     const auto& srcDataShape = inDataMemPtr->getStaticDims();
 
@@ -309,14 +317,27 @@ void Unique::slicedTensorExec() {
     const auto srcOuterStep = innerLen * axisDim;
 
     if (definedOutputs[FIRST_UNIQUE_IDX]) {
-        firstTmpPtr[0] = 0;
+        if (outputsPrc[FIRST_UNIQUE_IDX] == Precision::I32) {
+            reinterpret_cast<int32_t*>(firstTmpPtr)[0] = 0;
+        } else {
+            reinterpret_cast<int64_t*>(firstTmpPtr)[0] = 0;
+        }
     }
     if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
-        inToOutTmpPtr[0] = 0;
+        if (outputsPrc[INPUT_TO_UNIQ_IDX] == Precision::I32) {
+            reinterpret_cast<int32_t*>(inToOutTmpPtr)[0] = 0;
+        } else {
+            reinterpret_cast<int64_t*>(inToOutTmpPtr)[0] = 0;
+        }
     }
     if (definedOutputs[OCCURRENCES_NUM]) {
-        occurTmpPtr[0] = 1;
-        std::fill(occurTmpPtr, occurTmpPtr + axisDim, 1);
+        if (outputsPrc[OCCURRENCES_NUM] == Precision::I32) {
+            auto dstMem = reinterpret_cast<int32_t*>(occurTmpPtr);
+            std::fill(dstMem, dstMem + axisDim, 1);
+        } else {
+            auto dstMem = reinterpret_cast<int64_t*>(occurTmpPtr);
+            std::fill(dstMem, dstMem + axisDim, 1);
+        }
     }
 
     uniqueLen = 1lu;
@@ -346,17 +367,29 @@ void Unique::slicedTensorExec() {
         }
         if (!equal) {
             if (definedOutputs[FIRST_UNIQUE_IDX]) {
-                firstTmpPtr[uniqueLen] = a;
+                if (outputsPrc[FIRST_UNIQUE_IDX] == Precision::I32) {
+                    reinterpret_cast<int32_t*>(firstTmpPtr)[uniqueLen] = static_cast<int32_t>(a);
+                } else {
+                    reinterpret_cast<int64_t*>(firstTmpPtr)[uniqueLen] = static_cast<int64_t>(a);
+                }
             }
 
             uniqIdx[uniqueLen++] = a;
         } else {
             if (definedOutputs[OCCURRENCES_NUM]) {
-                occurTmpPtr[uIdx]++;
+                if (outputsPrc[OCCURRENCES_NUM] == Precision::I32) {
+                    reinterpret_cast<int32_t*>(occurTmpPtr)[uIdx]++;
+                } else {
+                    reinterpret_cast<int64_t*>(occurTmpPtr)[uIdx]++;
+                }
             }
         }
         if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
-            inToOutTmpPtr[a] = uIdx;
+            if (outputsPrc[INPUT_TO_UNIQ_IDX] == Precision::I32) {
+                reinterpret_cast<int32_t*>(inToOutTmpPtr)[a] = uIdx;
+            } else {
+                reinterpret_cast<int64_t*>(inToOutTmpPtr)[a] = uIdx;
+            }
         }
     }
 
@@ -365,15 +398,15 @@ void Unique::slicedTensorExec() {
     dstDataShape[axis] = uniqueLen;
     redefineOutputMemory({ dstDataShape, {uniqueLen}, {axisDim}, {uniqueLen}});
 
-    int *firstPtr = nullptr, *inToOutPtr = nullptr, *occurNPtr = nullptr;
+    uint8_t *firstPtr = nullptr, *inToOutPtr = nullptr, *occurNPtr = nullptr;
     if (definedOutputs[FIRST_UNIQUE_IDX]) {
-        firstPtr = reinterpret_cast<int*>(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->getData());
+        firstPtr = reinterpret_cast<uint8_t*>(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->getData());
     }
     if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
-        inToOutPtr = reinterpret_cast<int*>(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->getData());
+        inToOutPtr = reinterpret_cast<uint8_t*>(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->getData());
     }
     if (definedOutputs[OCCURRENCES_NUM]) {
-        occurNPtr = reinterpret_cast<int*>(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->getData());
+        occurNPtr = reinterpret_cast<uint8_t*>(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->getData());
     }
 
     T* dstDataPtr = reinterpret_cast<T*>(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->getData());
@@ -391,8 +424,6 @@ void Unique::slicedTensorExec() {
         });
     }
 
-    const auto uniqueLenIB = uniqueLen * sizeof(T);
-
     if (sorted) {
         const auto dstUniDataLen = dstOuterStep * outerLen;
         std::vector<T> vDstBuff(dstUniDataLen);
@@ -405,9 +436,9 @@ void Unique::slicedTensorExec() {
 
         std::vector<OrdEl> colToSort(uniqueLen);
         T *dst1 = dstDataPtr, *dst2 = dstBuff;
-        int *first1 = firstPtr, *first2 = firstTmpPtr;
-        int *occurN1 = occurNPtr, *occurN2 = occurTmpPtr;
-        int *inToOut1 = inToOutPtr, *inToOut2 = inToOutTmpPtr;
+        uint8_t *first1 = firstPtr, *first2 = firstTmpPtr;
+        uint8_t *occurN1 = occurNPtr, *occurN2 = occurTmpPtr;
+        uint8_t *inToOut1 = inToOutPtr, *inToOut2 = inToOutTmpPtr;
 
         const bool defined3outputs = definedOutputs[FIRST_UNIQUE_IDX] || definedOutputs[OCCURRENCES_NUM] || definedOutputs[INPUT_TO_UNIQ_IDX];
 
@@ -432,15 +463,35 @@ void Unique::slicedTensorExec() {
                 if (defined3outputs) {
                     parallel_for(uniqueLen, [&](size_t u) {
                         if (definedOutputs[FIRST_UNIQUE_IDX]) {
-                            first1[u] = first2[colToSort[u].idx];
+                            if (outputsPrc[FIRST_UNIQUE_IDX] == Precision::I32) {
+                                reinterpret_cast<int32_t*>(first1)[u] = reinterpret_cast<int32_t*>(first2)[colToSort[u].idx];
+                            } else {
+                                reinterpret_cast<int64_t*>(first1)[u] = reinterpret_cast<int64_t*>(first2)[colToSort[u].idx];
+                            }
                         }
                         if (definedOutputs[OCCURRENCES_NUM]) {
-                            occurN1[u] = occurN2[colToSort[u].idx];
+                            if (outputsPrc[OCCURRENCES_NUM] == Precision::I32) {
+                                reinterpret_cast<int32_t*>(occurN1)[u] = reinterpret_cast<int32_t*>(occurN2)[colToSort[u].idx];
+                            } else {
+                                reinterpret_cast<int64_t*>(occurN1)[u] = reinterpret_cast<int64_t*>(occurN2)[colToSort[u].idx];
+                            }
                         }
                         if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
-                            for (size_t ax = 0; ax < axisDim; ax++) {
-                                if (inToOut2[ax] == colToSort[u].idx) {
-                                    inToOut1[ax] = u;
+                            if (outputsPrc[INPUT_TO_UNIQ_IDX] == Precision::I32) {
+                                auto inToOut1_i32 = reinterpret_cast<int32_t*>(inToOut1);
+                                auto inToOut2_i32 = reinterpret_cast<int32_t*>(inToOut2);
+                                for (size_t ax = 0; ax < axisDim; ax++) {
+                                    if (inToOut2_i32[ax] == colToSort[u].idx) {
+                                        inToOut1_i32[ax] = static_cast<int32_t>(u);
+                                    }
+                                }
+                            } else {
+                                auto inToOut1_i64 = reinterpret_cast<int64_t*>(inToOut1);
+                                auto inToOut2_i64 = reinterpret_cast<int64_t*>(inToOut2);
+                                for (size_t ax = 0; ax < axisDim; ax++) {
+                                    if (inToOut2_i64[ax] == colToSort[u].idx) {
+                                        inToOut1_i64[ax] = static_cast<int64_t>(u);
+                                    }
                                 }
                             }
                         }
@@ -464,23 +515,41 @@ void Unique::slicedTensorExec() {
             cpu_parallel_memcpy(dstDataPtr, dst1, dstUniDataLen * sizeof(T));
         }
         if (definedOutputs[FIRST_UNIQUE_IDX] && first2 != firstPtr) {
-            cpu_parallel_memcpy(firstPtr, first2, uniqueLenIB);
+            const auto cpyLen = uniqueLen * (outputsPrc[FIRST_UNIQUE_IDX] == Precision::I32 ? sizeof(int32_t) : sizeof(int64_t));
+            cpu_parallel_memcpy(firstPtr, first2, cpyLen);
         }
         if (definedOutputs[INPUT_TO_UNIQ_IDX] && inToOut2 != inToOutPtr) {
-            cpu_parallel_memcpy(inToOutPtr, inToOut2, axisDim * sizeof(int));
+            const auto cpyLen = axisDim * (outputsPrc[INPUT_TO_UNIQ_IDX] == Precision::I32 ? sizeof(int32_t) : sizeof(int64_t));
+            cpu_parallel_memcpy(inToOutPtr, inToOut2, cpyLen);
         }
         if (definedOutputs[OCCURRENCES_NUM] && occurN2 != occurNPtr) {
-            cpu_parallel_memcpy(occurNPtr, occurN2, uniqueLenIB);
+            const auto cpyLen = uniqueLen * (outputsPrc[OCCURRENCES_NUM] == Precision::I32 ? sizeof(int32_t) : sizeof(int64_t));
+            cpu_parallel_memcpy(occurNPtr, occurN2, cpyLen);
         }
     } else {
         if (definedOutputs[FIRST_UNIQUE_IDX]) {
-            cpu_parallel_memcpy(firstPtr, firstUniTmp.data(), uniqueLenIB);
+            const auto cpyLen = uniqueLen * (outputsPrc[FIRST_UNIQUE_IDX] == Precision::I32 ? sizeof(int32_t) : sizeof(int64_t));
+            cpu_parallel_memcpy(firstPtr, firstUniTmp.data(), cpyLen);
         }
         if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
-            cpu_parallel_memcpy(inToOutPtr, inToOutTmp.data(), axisDim * sizeof(int));
+            const auto cpyLen = axisDim * (outputsPrc[INPUT_TO_UNIQ_IDX] == Precision::I32 ? sizeof(int32_t) : sizeof(int64_t));
+            cpu_parallel_memcpy(inToOutPtr, inToOutTmp.data(), cpyLen);
         }
         if (definedOutputs[OCCURRENCES_NUM]) {
-            cpu_parallel_memcpy(occurNPtr, occurTmp.data(), uniqueLenIB);
+            const auto cpyLen = uniqueLen * (outputsPrc[OCCURRENCES_NUM] == Precision::I32 ? sizeof(int32_t) : sizeof(int64_t));
+            cpu_parallel_memcpy(occurNPtr, occurTmp.data(), cpyLen);
         }
     }
 }
+
+void Unique::copyOutput(size_t outIdx, const int64_t* srcPtr, size_t len) {
+    const auto outMem = getChildEdgesAtPort(outIdx)[0]->getMemoryPtr();
+    if (outMem->getDataType() == dnnl::memory::data_type::s64) {
+        cpu_parallel_memcpy(outMem->getData(), srcPtr, len * sizeof(int64_t));
+    } else if (outMem->getDataType() == dnnl::memory::data_type::s32) {
+        auto outPtr = reinterpret_cast<int32_t *>(outMem->getData());
+        parallel_for(len, [&](size_t i) {
+            outPtr[i] = static_cast<int32_t>(srcPtr[i]);
+        });
+    }
+}
diff --git a/src/plugins/intel_cpu/src/nodes/unique.hpp b/src/plugins/intel_cpu/src/nodes/unique.hpp
index 65b8636abe3d01..57d174b4078fee 100644
--- a/src/plugins/intel_cpu/src/nodes/unique.hpp
+++ b/src/plugins/intel_cpu/src/nodes/unique.hpp
@@ -27,6 +27,8 @@ class Unique : public Node {
     bool needShapeInfer() const override { return false; }
 
 private:
+    void copyOutput(size_t outIdx, const int64_t* srcPtr, size_t len);
+
     template <typename T>
     void flattenTensorExec();
     template <typename T>
@@ -37,14 +39,15 @@ class Unique : public Node {
     template<typename T>
     struct slicedExec;
 
-    std::vector<int32_t> firstUniTmp;
-    std::vector<int32_t> inToOutTmp;
-    std::vector<int32_t> occurTmp;
+    std::vector<int64_t> firstUniTmp;
+    std::vector<int64_t> inToOutTmp;
+    std::vector<int64_t> occurTmp;
 
     bool sorted    = false;
     bool flattened = true;
     int  axis = 0;
     bool definedOutputs[4] = { false, false, false, false };
+    InferenceEngine::Precision outputsPrc[4] = { InferenceEngine::Precision::I32 };
     InferenceEngine::Precision dataPrecision;
     int64_t dataTypeSize = 1l;
     size_t uniqueLen = 1lu;
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 9038d660fb525b..d75122e527b988 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -9,7 +9,6 @@
 
 #include "transformations/transformation_pipeline.h"
 #include "itt.h"
-#include "extension_mngr.h"
 #include "extension.h"
 #include "serialize.h"
 #include "threading/ie_executor_manager.hpp"
@@ -17,15 +16,12 @@
 #include "ie_icore.hpp"
 #include "ie_plugin_config.hpp"
 #include "ie_system_conf.h"
-#include "threading/ie_cpu_streams_info.hpp"
 #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
 #include "openvino/runtime/intel_cpu/properties.hpp"
 
-#include <transformations/utils/utils.hpp>
 #include <ie_ngraph_utils.hpp>
 
 #include "performance_heuristics.hpp"
-#include "openvino/runtime/properties.hpp"
 #include "weights_cache.hpp"
 #include "utils/denormals.hpp"
 
@@ -36,7 +32,6 @@
 #endif
 
 #include <cpu/x64/cpu_isa_traits.hpp>
-#include <itt.h>
 
 using namespace InferenceEngine;
 
@@ -155,7 +150,7 @@ static bool streamsSet(const std::map<std::string, std::string>& config) {
            config.count(ov::num_streams.name());
 }
 
-void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ngraph::Function>& ngraphFunc) const {
+void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ov::Model>& ngraphFunc) const {
     auto getNumStreamsLatency = [&]() {
         return std::pair<std::string, std::string>(CONFIG_VALUE(CPU_THROUGHPUT_NUMA), ov::util::to_string(ov::streams::NUMA));
     };
@@ -272,7 +267,7 @@ void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, c
     }
 }
 
-void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr<ngraph::Function>& ngraphFunc) {
+void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr<ov::Model>& ngraphFunc) {
     const auto perf_hint_name = config.perfHintsConfig.ovPerfHint;
     // save hints parameters to model rt_info
     ov::AnyMap hints_props;
@@ -421,6 +416,19 @@ static Config::SnippetsMode getSnippetsMode(const std::map<std::string, std::str
         IE_THROW() << "Wrong value for property key SNIPPETS_MODE. Expected values: ENABLE/DISABLE/IGNORE_CALLBACK";
 }
 
+static void setI64Mode(const std::map<std::string, std::string>& modelConfig, Config& engineConfig) {
+    engineConfig.enableNativeI64 = false;
+    const auto i64prop = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
+    if (i64prop != modelConfig.end()) {
+        if (i64prop->second == PluginConfigParams::YES) {
+            engineConfig.enableNativeI64 = true;
+        } else if (i64prop->second != PluginConfigParams::NO) {
+            IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_CPU_NATIVE_I64 << ": " << i64prop->second
+                                << ". Expected only YES or NO values.";
+        }
+    }
+}
+
 InferenceEngine::IExecutableNetworkInternal::Ptr
 Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &orig_config) {
     OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Engine::LoadExeNetworkImpl");
@@ -454,6 +462,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
     const bool enableLPT = shouldEnableLPT(config, engConfig);
     ov::element::Type inferencePrecision = getInferencePrecision(config, engConfig);
     const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig);
+    setI64Mode(config, engConfig);
 
     auto nGraphFunc = clonedNetwork.getFunction();
 
@@ -729,6 +738,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
     const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
                         || Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */;
     const Config::SnippetsMode snippetsMode = getSnippetsMode(config, conf);
+    setI64Mode(config, conf);
 
     auto model = network.getFunction();
     if (model == nullptr) {
@@ -744,7 +754,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
                                            transformation.UpToCpuSpecificOpSet();
                                            transformation.CpuSpecificOpSet();
                                        },
-                                       [&](const std::shared_ptr<ngraph::Node>& op) {
+                                       [&](const std::shared_ptr<ov::Node>& op) {
                                            std::unique_ptr<Node> ptr;
                                            try {
                                                ptr.reset(Node::factory().create(op, context));
diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
index b2cd223db3e7b2..7ea7256b8e87fa 100644
--- a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
@@ -2,24 +2,19 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <ngraph/pass/constant_folding.hpp>
-#include "ngraph/op/fake_quantize.hpp"
-#include "ngraph/pass/manager.hpp"
 #include "common/pass/reshape_fc_fusion.hpp"
 #include "common/pass/align_matmul_input_ranks.hpp"
-#include "transformations/common_optimizations/reshape_prelu.hpp"
 #include "common/pass/convert_broadcast_to_tiles.hpp"
 #include "common/pass/convert_tile_to_seq_tiles.hpp"
 #include "common/pass/convert_matmul_to_fc.hpp"
 #include "common/pass/convert_to_power_static.hpp"
 #include "common/pass/convert_to_leaky_relu.hpp"
 #include "common/pass/convert_to_swish_cpu.hpp"
-#include "transformations/convert_precision.hpp"
-#include "transformations/utils/utils.hpp"
 #include "common/pass/rnn_sequences_optimization.hpp"
-#include "transformations/common_optimizations/reshape_sequence_fusion.hpp"
 #include "common/pass/ngram_fusion.hpp"
-#include "transformations/defs.hpp"
+#include <openvino/pass/constant_folding.hpp>
+#include "openvino/pass/manager.hpp"
+#include "transformations/common_optimizations/reshape_sequence_fusion.hpp"
 
 #include "itt.hpp"
 
@@ -44,7 +39,6 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphF
     // after transformation "MoveEltwiseUpThroughDataMov" there can be reshaped sequences that should be eliminated or fused
     CPU_REGISTER_PASS_COMMON(manager, ov::pass::ReshapeSequenceFusion);
     CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConstantFolding);
-    CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, precisions_map {{ ngraph::element::i64, ngraph::element::i32 }});
     CPU_REGISTER_PASS_COMMON(manager, NgramFusion);
     CPU_REGISTER_PASS_COMMON(manager, ov::pass::Validate);
 
diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/convert_precision_i64_i32.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/convert_precision_i64_i32.cpp
new file mode 100644
index 00000000000000..5a76f59f2412c7
--- /dev/null
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/convert_precision_i64_i32.cpp
@@ -0,0 +1,155 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+
+#include "convert_precision_i64_i32.hpp"
+#include <openvino/opsets/opset12.hpp>
+#include "transformations/utils/utils.hpp"
+#include "cpu_types.h"
+
+#include <unordered_set>
+
+bool isNativelySupported(const ov::Node::type_info_t &type) {
+    static const std::unordered_set<ov::Node::type_info_t> i64Ops = {
+        ov::opset12::Add::get_type_info_static(),
+        ov::op::v1::Broadcast::get_type_info_static(),
+        ov::op::v3::Broadcast::get_type_info_static(),
+        ov::opset12::Concat::get_type_info_static(),
+        ov::opset12::Constant::get_type_info_static(),
+        ov::opset12::Convert::get_type_info_static(),
+        ov::opset12::CumSum::get_type_info_static(),
+        ov::opset12::Divide::get_type_info_static(),
+        ov::opset12::Equal::get_type_info_static(),
+        ov::opset12::FloorMod::get_type_info_static(),
+        ov::op::v1::Gather::get_type_info_static(),
+        ov::op::v7::Gather::get_type_info_static(),
+        ov::op::v8::Gather::get_type_info_static(),
+        ov::op::v5::GatherND::get_type_info_static(),
+        ov::op::v8::GatherND::get_type_info_static(),
+        ov::opset12::Greater::get_type_info_static(),
+        ov::opset12::Less::get_type_info_static(),
+        ov::opset12::Maximum::get_type_info_static(),
+        ov::opset12::Minimum::get_type_info_static(),
+        ov::opset12::Multiply::get_type_info_static(),
+        ov::opset12::NonMaxSuppression::get_type_info_static(),
+        ov::opset12::NonZero::get_type_info_static(),
+        ov::opset12::OneHot::get_type_info_static(),
+        ov::opset12::Parameter::get_type_info_static(),
+        ov::opset12::ReduceL1::get_type_info_static(),
+        ov::opset12::ReduceL2::get_type_info_static(),
+        ov::opset12::ReduceLogicalAnd::get_type_info_static(),
+        ov::opset12::ReduceMax::get_type_info_static(),
+        ov::opset12::ReduceMean::get_type_info_static(),
+        ov::opset12::ReduceMin::get_type_info_static(),
+        ov::opset12::ReduceProd::get_type_info_static(),
+        ov::opset12::ReduceSum::get_type_info_static(),
+        ov::opset12::Reshape::get_type_info_static(),
+        ov::opset12::Result::get_type_info_static(),
+        ov::opset12::ScatterElementsUpdate::get_type_info_static(),
+        ov::opset12::ScatterNDUpdate::get_type_info_static(),
+        ov::opset12::ScatterUpdate::get_type_info_static(),
+        ov::opset12::Select::get_type_info_static(),
+        ov::opset12::ShapeOf::get_type_info_static(),
+        ov::opset12::Slice::get_type_info_static(),
+        ov::opset12::Split::get_type_info_static(),
+        ov::opset12::Sqrt::get_type_info_static(),
+        ov::opset12::SquaredDifference::get_type_info_static(),
+        ov::opset12::Squeeze::get_type_info_static(),
+        ov::opset12::StridedSlice::get_type_info_static(),
+        ov::opset12::Subtract::get_type_info_static(),
+        ov::opset12::Tile::get_type_info_static(),
+        ov::opset12::Transpose::get_type_info_static(),
+        ov::opset12::Unique::get_type_info_static(),
+        ov::opset12::Unsqueeze::get_type_info_static(),
+        ov::opset12::VariadicSplit::get_type_info_static()
+    };
+
+    return i64Ops.find(type) != i64Ops.end();
+}
+
+std::shared_ptr<ov::Node> changeConstantPrecision(std::shared_ptr<ov::op::v0::Constant>& constant) {
+    const auto* srcData = constant->get_data_ptr<int64_t>();
+    const auto size = shape_size(constant->get_shape());
+
+    auto newConstant = std::make_shared<ov::op::v0::Constant>(ov::element::i32, constant->get_shape());
+    newConstant->output(0).set_names(constant->output(0).get_names());
+    auto* dstData = const_cast<int32_t*>(reinterpret_cast<const int32_t*>(newConstant->get_data_ptr()));
+    if (dstData == nullptr) {
+        throw ngraph::ngraph_error("Can't get destination data pointer");
+    }
+
+    for (size_t i = 0; i < size; ++i) {
+        if (srcData[i] >= std::numeric_limits<int32_t>::max()) {
+            dstData[i] = std::numeric_limits<int32_t>::max();
+        } else if (srcData[i] <= std::numeric_limits<int32_t>::lowest()) {
+            dstData[i] = std::numeric_limits<int32_t>::lowest();
+        } else {
+            dstData[i] = static_cast<int32_t>(srcData[i]);
+        }
+    }
+    return newConstant;
+}
+
+bool ov::intel_cpu::ConvertPrecisionI64ToI32::run_on_model(const std::shared_ptr<ov::Model> &model) {
+    const auto orderedOps = model->get_ordered_ops();
+    for (const auto& op : orderedOps) {
+        if (isNativelySupported(op->get_type_info()) || TypeFromName(op->get_type_name()) == Type::Unknown) {
+            continue;
+        }
+
+        bool convertForOutputsRequired = false;
+        for (const auto& input : op->inputs()) {
+            if (input.get_element_type() == ov::element::i64) {
+                auto parentOutput = input.get_source_output();
+                auto parentNode = parentOutput.get_node_shared_ptr();
+                if (ov::is_type<ov::opset12::Convert>(parentNode) &&
+                        parentNode->get_rt_info().find("convert_i32_i64") != parentNode->get_rt_info().end()) {
+                    input.replace_source_output(parentNode->input_value(0));
+                } else if (auto constOp = ov::as_type_ptr<ov::op::v0::Constant>(parentNode)) {
+                    auto newConst = changeConstantPrecision(constOp);
+                    input.replace_source_output(newConst);
+                    newConst->set_friendly_name(constOp->get_friendly_name());
+                } else {
+                    auto convert = std::make_shared<ov::opset12::Convert>(input.get_source_output(), ov::element::i32);
+                    convert->output(0).add_names(parentOutput.get_names());
+                    input.replace_source_output(convert);
+                }
+                convertForOutputsRequired = true;
+            }
+        }
+
+        if (convertForOutputsRequired) {
+            // Propagate i32 precision into outputs.
+            op->validate_and_infer_types();
+            for (auto& output : op->outputs()) {
+                if (output.get_element_type() == ov::element::i32) {
+                    auto targetInputs = output.get_target_inputs();
+                    auto convert = std::make_shared<ov::opset12::Convert>(output, ov::element::i64);
+
+                    auto& rt_info = convert->get_rt_info();
+                    rt_info["convert_i32_i64"] = "";
+                    for (const auto& targetInput : targetInputs) {
+                        targetInput.replace_source_output(convert);
+                    }
+
+                    auto& convertTensor = convert->output(0).get_tensor();
+                    const std::string newName = ov::op::util::get_ie_output_name(output);
+                    if (ov::descriptor::get_ov_tensor_legacy_name(convertTensor).empty()) {
+                        ov::descriptor::set_ov_tensor_legacy_name(convertTensor, newName);
+                    }
+                    if (!output.get_names().empty()) {
+                        convertTensor.set_names(output.get_names());
+                    }
+                }
+            }
+        }
+
+        if (auto multisubgraph_op = ov::as_type_ptr<ov::op::util::MultiSubGraphOp>(op)) {
+            for (size_t idx = 0; idx < multisubgraph_op->get_internal_subgraphs_size(); ++idx) {
+                run_on_model(multisubgraph_op->get_function(static_cast<int>(idx)));
+            }
+        }
+    }
+
+    return true;
+}
diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/convert_precision_i64_i32.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/convert_precision_i64_i32.hpp
new file mode 100644
index 00000000000000..a3aa5a6f35455e
--- /dev/null
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/convert_precision_i64_i32.hpp
@@ -0,0 +1,21 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/pass.hpp"
+
+namespace ov {
+namespace intel_cpu {
+class ConvertPrecisionI64ToI32: public ov::pass::ModelPass {
+public:
+    OPENVINO_RTTI("ConvertPrecisionI64ToI32", "0");
+
+    ConvertPrecisionI64ToI32() = default;
+
+    bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
+};
+
+}  // namespace intel_cpu
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
index 1ec5d40071d73e..f833edda7658a2 100644
--- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2022 Intel Corporation
+// Copyright (C) 2022-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -25,6 +25,7 @@
 #include "transformations/common_optimizations/fq_mul_fusion.hpp"
 #include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp"
 #include "transformations/common_optimizations/nop_elimination.hpp"
+#include "transformations/common_optimizations/reshape_prelu.hpp"
 #include "transformations/common_optimizations/transpose_sinking.hpp"
 #include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp"
 #include "transformations/common_optimizations/augru_cell_fusion.hpp"
@@ -53,8 +54,6 @@
 #include "transformations/op_conversions/convert_slice_to_strided_slice.hpp"
 #include "transformations/op_conversions/convert_space_to_batch.hpp"
 #include "transformations/op_conversions/convert_space_to_depth.hpp"
-#include "transformations/op_conversions/convert_subtract.hpp"
-#include "transformations/op_conversions/convert_ti_to_sequences.hpp"
 #include "transformations/op_conversions/detection_output_downgrade.hpp"
 #include "transformations/op_conversions/detection_output_upgrade.hpp"
 #include "transformations/op_conversions/eye_decomposition.hpp"
@@ -98,11 +97,7 @@
 #include "transformations/snippets/x64/pass/snippets_mark_skipped.hpp"
 #include "transformations/cpu_opset/x64/pass/mha_fusion.hpp"
 #include "transformations/cpu_opset/x64/pass/convert_to_interaction.hpp"
-#include "transformations/cpu_opset/arm/pass/convert_group_conv.hpp"
-#include "transformations/cpu_opset/arm/pass/convert_group_conv1d.hpp"
-#include "transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.hpp"
-#include "transformations/cpu_opset/arm/pass/mish_decomposition.hpp"
-#include "transformations/cpu_opset/common/pass/decompose_integer_divide.hpp"
+#include "transformations/cpu_opset/x64/pass/convert_precision_i64_i32.hpp"
 #include "transformations/cpu_opset/common/pass/convert_fq_rnn_to_quantized_rnn.hpp"
 #include "transformations/cpu_opset/common/pass/insert_convert_after_extension.hpp"
 #include "transformations/cpu_opset/common/pass/move_eltwise_up_data_movement.hpp"
@@ -127,7 +122,7 @@ namespace intel_cpu {
 
 using const_node_ptr = const std::shared_ptr<const ov::Node>;
 
-bool Transformations::fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, const precisions_map& precisions) {
+bool Transformations::fuse_type_to_convert(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions) {
     const auto& from = node->get_output_element_type(0);
     auto it = precisions.find(from);
     if (it == precisions.end())
@@ -139,7 +134,7 @@ bool Transformations::fuse_type_to_convert(const std::shared_ptr<ngraph::Node>&
         // is converted to be 1 for boolean, but 0 for u8. Thus an Abs and Ceil node should be added before the
         // Convert node for this scenario.
         if (convert->input(0).get_element_type().is_real() &&
-            convert->get_convert_element_type() == ngraph::element::boolean && to.is_integral_number()) {
+            convert->get_convert_element_type() == ov::element::boolean && to.is_integral_number()) {
             auto abs = std::make_shared<ov::opset10::Abs>(convert->input_value(0).get_node_shared_ptr());
             auto ceil = std::make_shared<ov::opset10::Ceiling>(abs);
             auto new_convert = std::make_shared<ov::opset10::Convert>(ceil, to);
@@ -208,11 +203,10 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
     if (useLpt) {
         CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantizationSubgraph, defaultPrecisions);
     }
+    bool supportI64 = config.enableNativeI64;
 
-    auto get_convert_precisions = []() {
+    auto get_convert_precisions = [&]() {
         precisions_map map = {
-            {ov::element::i64,     ov::element::i32},
-            {ov::element::u64,     ov::element::i32},
             {ov::element::i16,     ov::element::i32},
             {ov::element::u16,     ov::element::i32},
             {ov::element::u32,     ov::element::i32},
@@ -223,12 +217,21 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
             {ov::element::u4,      ov::element::u8}
         };
 
-        if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
+        if (supportI64) {
+            map.insert({ov::element::u64, ov::element::i64});
+        } else {
+            map.insert({ov::element::u64, ov::element::i32});
+            map.insert({ov::element::i64, ov::element::i32});
+        }
+
+        if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
             map.insert({ov::element::bf16, ov::element::f32});
+        }
 
         return map;
     };
-    static const auto precisions = get_convert_precisions();
+
+    const auto precisions = get_convert_precisions();
     type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};
 
     CPU_REGISTER_PASS_COMMON(manager, ov::pass::AUGRUCellFusion);
@@ -263,8 +266,13 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
     // Common ConvertPrecision pass handles only a limited set of opevino operations to match the list of precisions supported by the plugin.
     // However, if the extension operation produces an output precision that is not natively supported, this may lead to inconsistency during
     // element type propagation. This transformation is called before the ConvertPrecision pass to align the actual precisions with the list of supported ones.
-    CPU_REGISTER_PASS_COMMON(manager, ov::pass::InsertConvertAfterExtension);
+    if (!supportI64) {
+        CPU_REGISTER_PASS_COMMON(manager, ov::pass::InsertConvertAfterExtension);
+    }
     CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, precisions, type_to_fuse);
+    if (supportI64) {
+        CPU_REGISTER_PASS_X64(manager, ConvertPrecisionI64ToI32);
+    }
 
     CPU_REGISTER_PASS_COMMON(manager, ov::pass::EliminateConvert);
     CPU_REGISTER_PASS_COMMON(manager, SwapConvertTranspose);
diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.h b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.h
index 57ad2e95e122af..290011951aa264 100644
--- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.h
+++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.h
@@ -62,7 +62,7 @@ class Transformations {
 
     void Snippets(void);
 
-    static bool fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, const precisions_map& precisions);
+    static bool fuse_type_to_convert(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
 };
 
 }   // namespace intel_cpu
diff --git a/src/plugins/intel_cpu/src/utils/blob_dump.cpp b/src/plugins/intel_cpu/src/utils/blob_dump.cpp
index dce76d115d0908..af4b32babce63e 100644
--- a/src/plugins/intel_cpu/src/utils/blob_dump.cpp
+++ b/src/plugins/intel_cpu/src/utils/blob_dump.cpp
@@ -166,6 +166,12 @@ void BlobDumper::dumpAsTxt(std::ostream &stream) const {
     const void *ptr = memory->getData();
 
     switch (desc.getPrecision()) {
+        case Precision::FP64 : {
+            auto *blob_ptr = reinterpret_cast<const double*>(ptr);
+            for (size_t i = 0; i < data_size; i++)
+                stream << blob_ptr[desc.getElementOffset(i)] << std::endl;
+            break;
+        }
         case Precision::FP32 : {
             auto *blob_ptr = reinterpret_cast<const float*>(ptr);
             for (size_t i = 0; i < data_size; i++)
@@ -180,6 +186,12 @@ void BlobDumper::dumpAsTxt(std::ostream &stream) const {
             }
             break;
         }
+        case Precision::I64: {
+            auto *blob_ptr = reinterpret_cast<const int64_t*>(ptr);
+            for (size_t i = 0; i < data_size; i++)
+                stream << blob_ptr[desc.getElementOffset(i)] << std::endl;
+            break;
+        }
         case Precision::I32: {
             auto *blob_ptr = reinterpret_cast<const int32_t*>(ptr);
             for (size_t i = 0; i < data_size; i++)
diff --git a/src/plugins/intel_cpu/src/utils/cpu_utils.hpp b/src/plugins/intel_cpu/src/utils/cpu_utils.hpp
index 870b081ba277cb..d28b1aeda33931 100644
--- a/src/plugins/intel_cpu/src/utils/cpu_utils.hpp
+++ b/src/plugins/intel_cpu/src/utils/cpu_utils.hpp
@@ -102,7 +102,8 @@ inline InferenceEngine::Precision normalizeToSupportedPrecision(InferenceEngine:
         case InferenceEngine::Precision::I8:
         case InferenceEngine::Precision::I32:
         case InferenceEngine::Precision::BF16:
-        case InferenceEngine::Precision::FP32: {
+        case InferenceEngine::Precision::FP32:
+        case InferenceEngine::Precision::I64: {
             break;
         }
         case InferenceEngine::Precision::FP64: {
@@ -113,11 +114,13 @@ inline InferenceEngine::Precision normalizeToSupportedPrecision(InferenceEngine:
             precision = InferenceEngine::Precision::U8;
             break;
         }
+        case InferenceEngine::Precision::U64: {
+            precision = InferenceEngine::Precision::I64;
+            break;
+        }
         case InferenceEngine::Precision::U16:
         case InferenceEngine::Precision::I16:
-        case InferenceEngine::Precision::U32:
-        case InferenceEngine::Precision::I64:
-        case InferenceEngine::Precision::U64: {
+        case InferenceEngine::Precision::U32: {
             precision = InferenceEngine::Precision::I32;
             break;
         }
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/comparison.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/comparison.cpp
index 769aea85731bdb..2eff2b4792495e 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/comparison.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/comparison.cpp
@@ -8,6 +8,7 @@
 
 using namespace LayerTestsDefinitions;
 using namespace LayerTestsDefinitions::ComparisonParams;
+using namespace InferenceEngine;
 
 namespace {
 
@@ -20,13 +21,6 @@ std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> inputShapes = {
         {{2, 1, 1, 3, 1}, {{1}, {1, 3, 4}, {2, 1, 3, 4}, {1, 1, 1, 1, 1}}},
 };
 
-std::vector<InferenceEngine::Precision> inputsPrecisions = {
-        InferenceEngine::Precision::FP32,
-        InferenceEngine::Precision::FP16,
-        InferenceEngine::Precision::I32,
-        InferenceEngine::Precision::BOOL,
-};
-
 std::vector<ngraph::helpers::ComparisonTypes> comparisonOpTypes = {
         ngraph::helpers::ComparisonTypes::EQUAL,
         ngraph::helpers::ComparisonTypes::NOT_EQUAL,
@@ -43,17 +37,29 @@ std::vector<ngraph::helpers::InputLayerType> secondInputTypes = {
 
 std::map<std::string, std::string> additional_config = {};
 
-const auto ComparisonTestParams = ::testing::Combine(
-        ::testing::ValuesIn(CommonTestUtils::combineParams(inputShapes)),
-        ::testing::ValuesIn(inputsPrecisions),
-        ::testing::ValuesIn(comparisonOpTypes),
-        ::testing::ValuesIn(secondInputTypes),
-        ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-        ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-        ::testing::Values(CommonTestUtils::DEVICE_CPU),
-        ::testing::Values(additional_config));
-
-INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs, ComparisonLayerTest, ComparisonTestParams, ComparisonLayerTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs, ComparisonLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(CommonTestUtils::combineParams(inputShapes)),
+                ::testing::ValuesIn(std::vector<Precision>{Precision::FP32, Precision::I32, Precision::I64}),
+                ::testing::ValuesIn(comparisonOpTypes),
+                ::testing::ValuesIn(secondInputTypes),
+                ::testing::Values(Precision::UNSPECIFIED),
+                ::testing::Values(Precision::UNSPECIFIED),
+                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                ::testing::Values(additional_config)),
+        ComparisonLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_CompareWithRefs, ComparisonLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(CommonTestUtils::combineParams(inputShapes)),
+                ::testing::ValuesIn(std::vector<Precision>{Precision::FP16, Precision::BOOL}),
+                ::testing::ValuesIn(comparisonOpTypes),
+                ::testing::ValuesIn(secondInputTypes),
+                ::testing::Values(Precision::UNSPECIFIED),
+                ::testing::Values(Precision::UNSPECIFIED),
+                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                ::testing::Values(additional_config)),
+        ComparisonLayerTest::getTestCaseName);
 
 
 std::vector<InputShapesTuple> inputShapesIsOps = {
@@ -80,11 +86,11 @@ std::vector<ngraph::helpers::ComparisonTypes> comparisonOpTypesIs = {
 
 const auto ComparisonTestParamsIs = ::testing::Combine(
         ::testing::ValuesIn(inputShapesIsOps),
-        ::testing::Values(InferenceEngine::Precision::FP32),
+        ::testing::Values(Precision::FP32),
         ::testing::ValuesIn(comparisonOpTypesIs),
         ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
-        ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-        ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        ::testing::Values(Precision::UNSPECIFIED),
+        ::testing::Values(Precision::UNSPECIFIED),
         ::testing::Values(CommonTestUtils::DEVICE_CPU),
         ::testing::Values(additional_config));
 
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/concat.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/concat.cpp
index 9111bf532ce88e..017e217e29ded9 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/concat.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/concat.cpp
@@ -5,7 +5,6 @@
 #include <vector>
 
 #include "single_layer_tests/concat.hpp"
-#include "common_test_utils/test_constants.hpp"
 
 using namespace LayerTestsDefinitions;
 
@@ -20,15 +19,11 @@ std::vector<std::vector<std::vector<size_t>>> inShapes = {
         {{10, 10, 10, 10}, {10, 10, 10, 10}, {10, 10, 10, 10}, {10, 10, 10, 10}, {10, 10, 10, 10}}
 };
 
-
-std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
-                                                         InferenceEngine::Precision::FP16};
-
 INSTANTIATE_TEST_SUITE_P(smoke_NoReshape, ConcatLayerTest,
                         ::testing::Combine(
                                 ::testing::ValuesIn(axes),
                                 ::testing::ValuesIn(inShapes),
-                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(InferenceEngine::Precision::FP32),
                                 ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                                 ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                                 ::testing::Values(InferenceEngine::Layout::ANY),
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/eltwise.cpp
index 8192c4089c4c97..afbb218fc2bd21 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/eltwise.cpp
@@ -48,7 +48,7 @@ std::vector<std::vector<ov::test::InputShape>> inShapesDynamicLargeUpperBound =
 std::vector<ov::test::ElementType> netPrecisions = {
         ov::element::f32,
         ov::element::f16,
-        ov::element::i32,
+        ov::element::i32
 };
 
 std::vector<ngraph::helpers::InputLayerType> secondaryInputTypes = {
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/minimum_maximum.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/minimum_maximum.cpp
deleted file mode 100644
index 6f3c729fd18688..00000000000000
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/minimum_maximum.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <vector>
-#include "single_layer_tests/minimum_maximum.hpp"
-#include "common_test_utils/test_constants.hpp"
-
-using namespace LayerTestsDefinitions;
-
-namespace {
-
-const std::vector<std::vector<std::vector<size_t>>> inShapes = {
-        {{2}, {1}},
-        {{1, 1, 1, 3}, {1}},
-        {{1, 2, 4}, {1}},
-        {{1, 4, 4}, {1}},
-        {{1, 4, 4, 1}, {1}},
-        {{256, 56}, {256, 56}},
-        {{8, 1, 6, 1}, {7, 1, 5}},
-};
-
-const std::vector<InferenceEngine::Precision> netPrecisions = {
-        InferenceEngine::Precision::FP32,
-        InferenceEngine::Precision::FP16,
-};
-
-const std::vector<ngraph::helpers::MinMaxOpType> opType = {
-        ngraph::helpers::MinMaxOpType::MINIMUM,
-        ngraph::helpers::MinMaxOpType::MAXIMUM,
-};
-
-const std::vector<ngraph::helpers::InputLayerType> inputType = {
-        ngraph::helpers::InputLayerType::CONSTANT,
-        ngraph::helpers::InputLayerType::PARAMETER,
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_maximum, MaxMinLayerTest,
-                        ::testing::Combine(
-                                ::testing::ValuesIn(inShapes),
-                                ::testing::ValuesIn(opType),
-                                ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::ValuesIn(inputType),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        MaxMinLayerTest::getTestCaseName);
-
-}  // namespace
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/non_max_suppression.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/non_max_suppression.cpp
index 18a8aa36044458..402ccbd94cddfe 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/non_max_suppression.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/non_max_suppression.cpp
@@ -39,4 +39,19 @@ const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams),
                                           ::testing::Values(CommonTestUtils::DEVICE_CPU)
 );
 
+const auto nmsParams_i64 = ::testing::Combine(::testing::ValuesIn(inShapeParams),
+                                          ::testing::Combine(::testing::Values(Precision::FP32),
+                                                             ::testing::Values(Precision::I64),
+                                                             ::testing::Values(Precision::FP32)),
+                                          ::testing::ValuesIn(maxOutBoxPerClass),
+                                          ::testing::ValuesIn(threshold),
+                                          ::testing::ValuesIn(threshold),
+                                          ::testing::ValuesIn(sigmaThreshold),
+                                          ::testing::ValuesIn(encodType),
+                                          ::testing::ValuesIn(sortResDesc),
+                                          ::testing::ValuesIn(outType),
+                                          ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
 INSTANTIATE_TEST_SUITE_P(smoke_NmsLayerTest, NmsLayerTest, nmsParams, NmsLayerTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_NmsLayerTest_i64, NmsLayerTest, nmsParams_i64, NmsLayerTest::getTestCaseName);
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/range.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/range.cpp
index 70403b61629d28..6d128ec3b37164 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/range.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/range.cpp
@@ -17,7 +17,8 @@ const std::vector<float> step = { 1.0f, 0.1f };
 
 const std::vector<InferenceEngine::Precision> netPrecisions = {
         InferenceEngine::Precision::FP32,
-        InferenceEngine::Precision::FP16
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::I64
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_Basic, RangeLayerTest,
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp
index e5ae486545f926..a8c1a713f55fdd 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp
@@ -13,9 +13,7 @@ namespace {
 const std::vector<InferenceEngine::Precision> netPrecisions = {
         InferenceEngine::Precision::FP32,
         InferenceEngine::Precision::FP16,
-        InferenceEngine::Precision::I64,
-        InferenceEngine::Precision::I32,
-        InferenceEngine::Precision::U64
+        InferenceEngine::Precision::I32
 };
 
 const std::vector<bool> keepDims = {
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp
index 7d3923f8be55dc..719e834a3f32cb 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp
@@ -12,7 +12,8 @@ using namespace LayerTestsDefinitions;
 namespace {
 const std::vector<InferenceEngine::Precision> netPrecisions = {
         InferenceEngine::Precision::FP32,
-        InferenceEngine::Precision::FP16
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::I64
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_ReshapeCheck, ReshapeLayerTest,
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_ND_update.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_ND_update.cpp
index 28698967cbd17a..61f893d896f856 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_ND_update.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_ND_update.cpp
@@ -16,6 +16,7 @@ const std::vector<InferenceEngine::Precision> inputPrecisions = {
         InferenceEngine::Precision::FP32,
         InferenceEngine::Precision::FP16,
         InferenceEngine::Precision::I32,
+        InferenceEngine::Precision::I64
 };
 
 const std::vector<InferenceEngine::Precision> idxPrecisions = {
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_elements_update.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_elements_update.cpp
index 3a2033805d57b5..b633a3fc516522 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_elements_update.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_elements_update.cpp
@@ -27,6 +27,7 @@ const std::vector<InferenceEngine::Precision> inputPrecisions = {
         InferenceEngine::Precision::FP32,
         InferenceEngine::Precision::FP16,
         InferenceEngine::Precision::I32,
+        InferenceEngine::Precision::I64
 };
 
 const std::vector<InferenceEngine::Precision> idxPrecisions = {
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_update.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_update.cpp
index 60a3f488c0040f..6afbf6fe39587f 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_update.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_update.cpp
@@ -15,7 +15,7 @@ namespace {
 const std::vector<InferenceEngine::Precision> inputPrecisions = {
         InferenceEngine::Precision::FP32,
         InferenceEngine::Precision::FP16,
-        InferenceEngine::Precision::I32,
+        InferenceEngine::Precision::I32
 };
 
 const std::vector<InferenceEngine::Precision> idxPrecisions = {
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/select.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/select.cpp
index 5f76a8462c51fb..22ab4942f69eb1 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/select.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/select.cpp
@@ -13,9 +13,9 @@ const std::vector<InferenceEngine::Precision> inputPrecision = {
     InferenceEngine::Precision::I8,
     InferenceEngine::Precision::I16,
     InferenceEngine::Precision::I32,
-    InferenceEngine::Precision::FP32
+    InferenceEngine::Precision::FP32,
     // CPU plug-in doesn't support I64 and U64 precisions at the moment
-    // InferenceEngine::Precision::I64
+    InferenceEngine::Precision::I64
 };
 
 const std::vector<std::vector<std::vector<size_t>>> noneShapes = {
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/squeeze_unsqueeze.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/squeeze_unsqueeze.cpp
index 3e00f12b22284f..c98a60447ea06e 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/squeeze_unsqueeze.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/squeeze_unsqueeze.cpp
@@ -30,7 +30,8 @@ std::map<std::vector<size_t>, std::vector<std::vector<int>>> emptyAxesVectors =
 
 const std::vector<InferenceEngine::Precision> netPrecisions = {
         InferenceEngine::Precision::FP32,
-        InferenceEngine::Precision::FP16
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::I64
 };
 
 const std::vector<ngraph::helpers::SqueezeOpType> opTypes = {
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/tile.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/tile.cpp
index 52eff3f90d0a39..a21f16f6e1fcba 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/tile.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/tile.cpp
@@ -15,7 +15,8 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
         InferenceEngine::Precision::U8,
         InferenceEngine::Precision::I32,
         InferenceEngine::Precision::BF16,
-        InferenceEngine::Precision::FP32
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::I64
 };
 
 const std::vector<InferenceEngine::Precision> netTPrecisions = {
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index 4b79a3bf83e417..9aa442dd236cdf 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -74,10 +74,11 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*OVCompiledModelBaseTest.*(CanGetInputsInfoAndCheck|canSetConfigToCompiledModel).*)",
         R"(.*Behavior.*CorrectConfigCheck.*(canSetConfigAndCheckGetConfig|canSetConfigTwiceAndCheckGetConfig).*CPU_BIND_THREAD=YES.*)",
         // Issue: 72021 Unreasonable abs_threshold for comparing bf16 results
-        R"(.*smoke_Reduce.*type=(Prod|Min).*netPRC=(BF|bf)16.*)",
+        R"(.*smoke_Reduce.*type=Prod.*netPRC=(BF|bf)16.*)",
         // TODO: 56520 Accuracy mismatch
-        R"(.*ReduceOpsLayerTest.*type=Mean_.*netPRC=(I64|I32).*)",
-        R"(.*ReduceOpsLayerTest.*type=Mean_.*netPRC=U64.*)",
+        R"(.*ReduceOpsLayerTest.*type=Mean_.*netPRC=I32.*)",
+        R"(.*fusing.*ReduceCPULayerTest.*netPRC=(i|u)64.*CPU_NATIVE_I64=YES.*)",
+        R"(.*smoke.*Split.*(4D|5D).*netPRC=i8.*)",
         // Not implemented yet:
         R"(.*Behavior.*ExecutableNetworkBaseTest.*canSetConfigToExecNet.*)",
         R"(.*Behavior.*OVCompiledModelBaseTest.*canSetConfigToCompiledModel.*)",
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/broadcast.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/broadcast.cpp
index 84812e1d048bc6..ff796fc21701d5 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/broadcast.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/broadcast.cpp
@@ -6,39 +6,41 @@
 #include "ngraph_functions/builders.hpp"
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include <common_test_utils/ov_tensor_utils.hpp>
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
 using namespace CPUTestUtils;
+using namespace ov::test;
 
 namespace CPULayerTestsDefinitions {
 
 using BroadcastLayerTestParamsSet = typename std::tuple<
-        std::vector<ov::test::InputShape>,     // Shapes
+        std::vector<InputShape>,               // Shapes
         std::vector<int64_t>,                  // Target shapes
         std::vector<int64_t>,                  // Axes mapping
         ov::op::BroadcastType,                 // Broadcast mode
-        ov::element::Type_t,                   // Network precision
+        ElementType,                           // Network precision
         std::vector<bool>,                     // Const inputs
-        std::string>;                          // Device name
+        ov::AnyMap>;                           // Additional network configuration
 
 using BroadcastLayerCPUTestParamsSet = typename std::tuple<
         BroadcastLayerTestParamsSet,
         CPUSpecificParams>;
 
 class BroadcastLayerCPUTest : public testing::WithParamInterface<BroadcastLayerCPUTestParamsSet>,
-                              virtual public ov::test::SubgraphBaseTest, public CPUTestsBase {
+                              virtual public SubgraphBaseTest, public CPUTestsBase {
 public:
     static std::string getTestCaseName(testing::TestParamInfo<BroadcastLayerCPUTestParamsSet> obj) {
         BroadcastLayerTestParamsSet basicParamsSet;
         CPUSpecificParams cpuParams;
         std::tie(basicParamsSet, cpuParams) = obj.param;
 
-        std::vector<ov::test::InputShape> inputShapes;
+        std::vector<InputShape> inputShapes;
         std::vector<int64_t> targetShapes, axesMapping;
         ov::op::BroadcastType mode;
-        ov::element::Type_t netPrecision;
+        ElementType netPrecision;
         std::vector<bool> isConstInputs;
-        std::string deviceName;
-        std::tie(inputShapes, targetShapes, axesMapping, mode, netPrecision, isConstInputs, deviceName) = basicParamsSet;
+        ov::AnyMap additionalConfig;
+        std::tie(inputShapes, targetShapes, axesMapping, mode, netPrecision, isConstInputs, additionalConfig) = basicParamsSet;
 
         std::ostringstream result;
         result << "IS=(";
@@ -56,7 +58,13 @@ class BroadcastLayerCPUTest : public testing::WithParamInterface<BroadcastLayerC
         result << "mode=" << mode << "_";
         result << "netPrec=" << netPrecision << "_";
         result << "constIn=(" << (isConstInputs[0] ? "True" : "False") << "." << (isConstInputs[1] ? "True" : "False") << ")_";
-        result << "trgDev=" << deviceName;
+        if (!additionalConfig.empty()) {
+            result << "_PluginConf";
+            for (auto &item : additionalConfig) {
+                result << "_" << item.first << "=";
+                item.second.print(result);
+            }
+        }
 
         result << CPUTestsBase::getTestCaseName(cpuParams);
 
@@ -65,15 +73,16 @@ class BroadcastLayerCPUTest : public testing::WithParamInterface<BroadcastLayerC
 
 protected:
     void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
         BroadcastLayerTestParamsSet basicParamsSet;
         CPUSpecificParams cpuParams;
         std::tie(basicParamsSet, cpuParams) = this->GetParam();
 
-        std::vector<ov::test::InputShape> inputShapes;
+        std::vector<InputShape> inputShapes;
         ov::op::BroadcastType mode;
         ov::element::Type_t netPrecision;
         std::vector<bool> isConstInput;
-        std::tie(inputShapes, targetShape, axesMapping, mode, netPrecision, isConstInput, targetDevice) = basicParamsSet;
+        std::tie(inputShapes, targetShape, axesMapping, mode, netPrecision, isConstInput, configuration) = basicParamsSet;
         bool isTargetShapeConst = isConstInput[0], isAxesMapConst = isConstInput[1];
         const auto targetShapeRank = targetShape.size();
         const auto axesMappingRank = axesMapping.size();
@@ -173,10 +182,10 @@ class BroadcastLayerCPUTest : public testing::WithParamInterface<BroadcastLayerC
                 }
             } else {
                 if (funcInput.get_element_type().is_real()) {
-                    tensor = ov::test::utils::create_and_fill_tensor(
+                    tensor = utils::create_and_fill_tensor(
                         funcInput.get_element_type(), targetInputStaticShapes[i], 10, 0, 1000);
                 } else {
-                    tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
+                    tensor = utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
                 }
             }
             inputs.insert({funcInput.get_node_shared_ptr(), tensor});
@@ -205,12 +214,18 @@ const auto cpuParams_ndhwc = CPUSpecificParams {{ndhwc}, {ndhwc}, {}, "ref"};
 /* ========== */
 
 /* COMMON PARAMS */
-const std::vector<ov::element::Type_t> inputPrecisions = {
-    ov::element::f32,
-    ov::element::bf16,
-    ov::element::i32,
-    ov::element::i8
+const std::vector<ElementType> inputPrecisions = {
+    ElementType::f32,
+    ElementType::bf16,
+    ElementType::i32,
+    ElementType::i8
+};
+
+const ov::AnyMap emptyConfig = {};
+const ov::AnyMap i64Config = {
+        {InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64, InferenceEngine::PluginConfigParams::YES}
 };
+
 /* ============= */
 
 /* INSTANCES */
@@ -221,7 +236,7 @@ const std::vector<CPUSpecificParams> CPUParams4D = {
         cpuParams_nhwc
 };
 
-const std::vector<std::vector<ov::test::InputShape>> staticInputShapes4D = {
+const std::vector<std::vector<InputShape>> staticInputShapes4D = {
     {
         {{},
             { // Static shapes
@@ -247,7 +262,20 @@ INSTANTIATE_TEST_CASE_P(smoke_StaticShape4D, BroadcastLayerCPUTest,
                             ::testing::Values(ov::op::BroadcastType::NUMPY),
                             ::testing::ValuesIn(inputPrecisions),
                             ::testing::Values(std::vector<bool>{true, true}),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            ::testing::Values(emptyConfig)),
+                        ::testing::ValuesIn(CPUParams4D)),
+                    BroadcastLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_StaticShape4D_I64, BroadcastLayerCPUTest,
+                    ::testing::Combine(
+                            ::testing::Combine(
+                            ::testing::Values(staticInputShapes4D[0]),
+                            ::testing::ValuesIn(std::vector<std::vector<int64_t>>{{1, 16, 3, 3}, {1, 16, 1, 3}}),
+                            ::testing::Values(std::vector<int64_t>{}),
+                            ::testing::Values(ov::op::BroadcastType::NUMPY),
+                            ::testing::Values(ElementType::i64),
+                            ::testing::Values(std::vector<bool>{true, true}),
+                            ::testing::Values(i64Config)),
                         ::testing::ValuesIn(CPUParams4D)),
                     BroadcastLayerCPUTest::getTestCaseName);
 
@@ -260,11 +288,11 @@ INSTANTIATE_TEST_CASE_P(smoke_StaticShape4DE, BroadcastLayerCPUTest,
                             ::testing::Values(ov::op::BroadcastType::EXPLICIT),
                             ::testing::ValuesIn(inputPrecisions),
                             ::testing::Values(std::vector<bool>{true, true}),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            ::testing::Values(emptyConfig)),
                         ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                     BroadcastLayerCPUTest::getTestCaseName);
 
-const std::vector<std::vector<ov::test::InputShape>> staticInputShapesScalar = {
+const std::vector<std::vector<InputShape>> staticInputShapesScalar = {
     {
         {{},
             { // Static shapes
@@ -283,11 +311,11 @@ INSTANTIATE_TEST_CASE_P(smoke_StaticShape4DScalar, BroadcastLayerCPUTest,
                             ::testing::Values(ov::op::BroadcastType::NUMPY),
                             ::testing::ValuesIn(inputPrecisions),
                             ::testing::Values(std::vector<bool>{true, true}),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            ::testing::Values(emptyConfig)),
                         ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                     BroadcastLayerCPUTest::getTestCaseName);
 
-const std::vector<std::vector<ov::test::InputShape>> dynamicInputShapes4D = {
+const std::vector<std::vector<InputShape>> dynamicInputShapes4D = {
     {
         { // Origin dynamic shapes
             {ov::Dimension(1, 20), ov::Dimension(1, 20), ov::Dimension(1, 20), ov::Dimension(1, 20)},
@@ -317,11 +345,23 @@ INSTANTIATE_TEST_CASE_P(smoke_DynamicShape4D, BroadcastLayerCPUTest,
                             ::testing::Values(ov::op::BroadcastType::NUMPY),
                             ::testing::ValuesIn(inputPrecisions),
                             ::testing::ValuesIn(std::vector<std::vector<bool>>{{true, true}, {false, true}}),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            ::testing::Values(emptyConfig)),
                         ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                     BroadcastLayerCPUTest::getTestCaseName);
 
-const std::vector<std::vector<ov::test::InputShape>> dynamicInputShapesScalar = {
+INSTANTIATE_TEST_CASE_P(smoke_DynamicShape4D_I64, BroadcastLayerCPUTest,
+                    ::testing::Combine(::testing::Combine(
+                            ::testing::ValuesIn(dynamicInputShapes4D),
+                            ::testing::ValuesIn(std::vector<std::vector<int64_t>>{{8, 16,  1, 7}, {8, 16, 10, 7}}),
+                            ::testing::Values(std::vector<int64_t>{}),
+                            ::testing::Values(ov::op::BroadcastType::NUMPY),
+                            ::testing::Values(ElementType::i64),
+                            ::testing::ValuesIn(std::vector<std::vector<bool>>{{true, true}, {false, true}}),
+                            ::testing::Values(i64Config)),
+                        ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
+                    BroadcastLayerCPUTest::getTestCaseName);
+
+const std::vector<std::vector<InputShape>> dynamicInputShapesScalar = {
     {
         { // Origin dynamic shapes
             {-1},
@@ -341,12 +381,12 @@ INSTANTIATE_TEST_CASE_P(smoke_DynamicShape4DScalar, BroadcastLayerCPUTest,
                             ::testing::Values(ov::op::BroadcastType::NUMPY),
                             ::testing::ValuesIn(inputPrecisions),
                             ::testing::ValuesIn(std::vector<std::vector<bool>>{{true, true}, {false, true}}),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            ::testing::Values(emptyConfig)),
                         ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                     BroadcastLayerCPUTest::getTestCaseName);
 
 // 5D
-const std::vector<std::vector<ov::test::InputShape>> staticInputShapes5D = {
+const std::vector<std::vector<InputShape>> staticInputShapes5D = {
     {
         {{},
             { // Static shapes
@@ -355,7 +395,7 @@ const std::vector<std::vector<ov::test::InputShape>> staticInputShapes5D = {
         }
     }
 };
-const std::vector<std::vector<ov::test::InputShape>> dynamicInputShapes5D = {
+const std::vector<std::vector<InputShape>> dynamicInputShapes5D = {
     {
         { // Origin dynamic shapes
             {ov::Dimension(1, 20), ov::Dimension(1, 20), ov::Dimension(1, 20), ov::Dimension(1, 20), ov::Dimension(1, 20)},
@@ -396,7 +436,20 @@ INSTANTIATE_TEST_CASE_P(smoke_StaticShape5D, BroadcastLayerCPUTest,
                             ::testing::Values(ov::op::BroadcastType::NUMPY),
                             ::testing::ValuesIn(inputPrecisions),
                             ::testing::Values(std::vector<bool>{true, true}),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            ::testing::Values(emptyConfig)),
+                        ::testing::ValuesIn(CPUParams5D)),
+                    BroadcastLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_StaticShape5D_I64, BroadcastLayerCPUTest,
+                    ::testing::Combine(
+                        ::testing::Combine(
+                            ::testing::ValuesIn(staticInputShapes5D),
+                            ::testing::ValuesIn(std::vector<std::vector<int64_t>>{{1, 16, 1, 1, 3}, {1, 16, 3, 1, 3}}),
+                            ::testing::Values(std::vector<int64_t>{}),
+                            ::testing::Values(ov::op::BroadcastType::NUMPY),
+                            ::testing::Values(ElementType::i64),
+                            ::testing::Values(std::vector<bool>{true, true}),
+                            ::testing::Values(i64Config)),
                         ::testing::ValuesIn(CPUParams5D)),
                     BroadcastLayerCPUTest::getTestCaseName);
 
@@ -409,7 +462,7 @@ INSTANTIATE_TEST_CASE_P(smoke_StaticShape5DScalar, BroadcastLayerCPUTest,
                             ::testing::Values(ov::op::BroadcastType::NUMPY),
                             ::testing::ValuesIn(inputPrecisions),
                             ::testing::Values(std::vector<bool>{true, true}),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            ::testing::Values(emptyConfig)),
                         ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                     BroadcastLayerCPUTest::getTestCaseName);
 
@@ -422,7 +475,7 @@ INSTANTIATE_TEST_CASE_P(smoke_DynamicShape5D, BroadcastLayerCPUTest,
                             ::testing::Values(ov::op::BroadcastType::NUMPY),
                             ::testing::ValuesIn(inputPrecisions),
                             ::testing::ValuesIn(std::vector<std::vector<bool>>{{true, true}, {false, true}}),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            ::testing::Values(emptyConfig)),
                         ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                     BroadcastLayerCPUTest::getTestCaseName);
 
@@ -435,12 +488,12 @@ INSTANTIATE_TEST_CASE_P(smoke_DynamicShape5DScalar, BroadcastLayerCPUTest,
                             ::testing::Values(ov::op::BroadcastType::NUMPY),
                             ::testing::ValuesIn(inputPrecisions),
                             ::testing::ValuesIn(std::vector<std::vector<bool>>{{true, true}, {false, true}}),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            ::testing::Values(emptyConfig)),
                         ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                     BroadcastLayerCPUTest::getTestCaseName);
 
 // 1D
-const std::vector<std::vector<ov::test::InputShape>> dynamicShapes1D = {
+const std::vector<std::vector<InputShape>> dynamicShapes1D = {
     {
         { // Origin dynamic shapes
             {-1},
@@ -460,7 +513,7 @@ INSTANTIATE_TEST_CASE_P(smoke_DynamicShapes1D, BroadcastLayerCPUTest,
                             ::testing::Values(ov::op::BroadcastType::NUMPY),
                             ::testing::ValuesIn(inputPrecisions),
                             ::testing::ValuesIn(std::vector<std::vector<bool>>{{false, true}}),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            ::testing::Values(emptyConfig)),
                         ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                     BroadcastLayerCPUTest::getTestCaseName);
 /* ========= */
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/activation.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/activation.cpp
index b9764711d99204..976f9549b32fc9 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/activation.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/activation.cpp
@@ -5,8 +5,8 @@
 #include "activation.hpp"
 #include "gtest/gtest.h"
 #include "test_utils/cpu_test_utils.hpp"
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
-using namespace InferenceEngine;
 using namespace CPUTestUtils;
 using namespace ngraph::helpers;
 using namespace ov::test;
@@ -17,9 +17,10 @@ std::string ActivationLayerCPUTest::getTestCaseName(const testing::TestParamInfo
     std::vector<ov::test::InputShape> inputShapes;
     std::vector<size_t> activationShapes;
     std::pair<ngraph::helpers::ActivationTypes, std::vector<float>> activationTypeAndConstValue;
-    InferenceEngine::Precision netPrecision, inPrecision, outPrecision;
+    ElementType netPrecision, inPrecision, outPrecision;
     CPUTestUtils::CPUSpecificParams cpuParams;
-    std::tie(inputShapes, activationShapes, activationTypeAndConstValue, netPrecision, inPrecision, outPrecision, cpuParams) = obj.param;
+    ov::AnyMap config;
+    std::tie(inputShapes, activationShapes, activationTypeAndConstValue, netPrecision, inPrecision, outPrecision, config, cpuParams) = obj.param;
 
     std::ostringstream result;
     result << LayerTestsDefinitions::activationNames[activationTypeAndConstValue.first] << "_";
@@ -39,20 +40,28 @@ std::string ActivationLayerCPUTest::getTestCaseName(const testing::TestParamInfo
     }
     result << "AS=" << CommonTestUtils::vec2str(activationShapes) << "_";
     result << "ConstantsValue=" << CommonTestUtils::vec2str(activationTypeAndConstValue.second) << "_";
-    result << "netPRC=" << netPrecision.name() << "_";
-    result << "inPRC=" << inPrecision.name() << "_";
-    result << "outPRC=" << outPrecision.name() << "_";
+    result << "netPRC=" << netPrecision << "_";
+    result << "inPRC=" << inPrecision << "_";
+    result << "outPRC=" << outPrecision << "_";
     result << CPUTestUtils::CPUTestsBase::getTestCaseName(cpuParams);
 
+    if (!config.empty()) {
+        result << "_PluginConf";
+        for (const auto& configItem : config) {
+            result << "_" << configItem.first << "=";
+            configItem.second.print(result);
+        }
+    }
+
     return result.str();
 }
 
-void ActivationLayerCPUTest::generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) {
+void ActivationLayerCPUTest::generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) {
     int32_t startFrom = 0;
     uint32_t range = 0;
     int32_t resolution = 0;
 
-    if (activationType == ActivationTypes::Exp && netPrecision == Precision::BF16) {
+    if (activationType == ActivationTypes::Exp && netPrecision == ElementType::bf16) {
         startFrom = 0;
         range = 2;
         resolution = 32768;
@@ -93,18 +102,16 @@ void ActivationLayerCPUTest::SetUp() {
     std::vector<ov::test::InputShape> inputShapes;
     std::vector<size_t> activationShapes;
     std::pair<ngraph::helpers::ActivationTypes, std::vector<float>> activationTypeAndConstValue;
-    InferenceEngine::Precision inPrecision, outPrecision;
     CPUTestUtils::CPUSpecificParams cpuParams;
-    std::tie(inputShapes, activationShapes, activationTypeAndConstValue, netPrecision, inPrecision, outPrecision, cpuParams) = this->GetParam();
+
+    std::tie(inputShapes, activationShapes, activationTypeAndConstValue, netPrecision, inType, outType, configuration, cpuParams) = this->GetParam();
     std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+
     activationType = activationTypeAndConstValue.first;
     auto constantsValue = activationTypeAndConstValue.second;
 
-    inType  = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrecision);
-    outType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrecision);
-    selectedType = getPrimitiveType() + "_" + netPrecision.name();
-
 #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
+    selectedType = getPrimitiveType() + "_" + netPrecision.name();
 #    if defined(OPENVINO_ARCH_ARM)
     if (activationType == ngraph::helpers::ActivationTypes::GeluErf) // @todo tmp fallback to ref, gelu erf is disabled for 32bit ARM
         selectedType = std::string("ref_") + netPrecision.name();
@@ -114,17 +121,30 @@ void ActivationLayerCPUTest::SetUp() {
         inputShapes.front().first.rank().get_length() > 5)               // @todo tmp fallback to ref, remove after 6D+ ranks are properly supported
         selectedType = std::string("ref_") + netPrecision.name();
 #else
+    selectedType = getPrimitiveType();
     if (activationType == ngraph::helpers::ActivationTypes::Log)  // @todo tmp fallback to ref, remove after Log is supported in emitters
-        selectedType = std::string("ref_") + netPrecision.name();
+        selectedType = std::string("ref");
+
+    if (netPrecision == ElementType::i64 || netPrecision == ElementType::u64) {
+        auto i64It = configuration.find(InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
+        if (i64It == configuration.end() || i64It->second == InferenceEngine::PluginConfigParams::NO) {
+            selectedType = makeSelectedTypeStr(selectedType, ElementType::i32);
+        } else {
+            selectedType = makeSelectedTypeStr(selectedType, ElementType::i64);
+        }
+    } else if (netPrecision == ElementType::boolean) {
+        selectedType = makeSelectedTypeStr(selectedType, ElementType::i8);
+    } else {
+        selectedType = makeSelectedTypeStr(selectedType, netPrecision);
+    }
 #endif
 
     init_input_shapes(inputShapes);
 
-    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
-    auto params = ngraph::builder::makeDynamicParams(ngPrc, {inputDynamicShapes.front()});
-    auto activation = ngraph::builder::makeActivation(params[0], ngPrc, activationType, activationShapes, constantsValue);
+    auto params = ngraph::builder::makeDynamicParams(netPrecision, {inputDynamicShapes.front()});
+    auto activation = ngraph::builder::makeActivation(params[0], netPrecision, activationType, activationShapes, constantsValue);
     activation->get_rt_info() = getCPUInfo();
-    function = std::make_shared<ngraph::Function>(ngraph::NodeVector{activation}, params, "Activation");
+    function = std::make_shared<ov::Model>(ov::NodeVector{activation}, params, "Activation");
 }
 
 TEST_P(ActivationLayerCPUTest, CompareWithRefs) {
@@ -160,8 +180,8 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>>& activationType
     return activationTypes;
 }
 
-const std::vector<Precision>& netPrc() {
-    static const std::vector<Precision> netPrc{Precision::FP32};
+const std::vector<ElementType>& netPrc() {
+    static const std::vector<ElementType> netPrc{ElementType::f32};
 
     return netPrc;
 }
@@ -245,9 +265,9 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>>& activationType
     return activationTypesDynamicMath;
 }
 
-const std::vector<Precision>& netPrecisions() {
-    static const std::vector<Precision> netPrecisions {
-        InferenceEngine::Precision::FP32
+const std::vector<ElementType>& netPrecisions() {
+    static const std::vector<ElementType> netPrecisions {
+        ElementType::f32
     };
 
     return netPrecisions;
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/activation.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/activation.hpp
index b7881fae053691..b443011d019894 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/activation.hpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/activation.hpp
@@ -9,7 +9,6 @@
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include <common_test_utils/ov_tensor_utils.hpp>
 #include "test_utils/cpu_test_utils.hpp"
-#include "gtest/gtest.h"
 
 namespace CPULayerTestsDefinitions  {
 
@@ -17,9 +16,10 @@ using ActivationLayerCPUTestParamSet =
     std::tuple<std::vector<ov::test::InputShape>,                                // Input shapes
                std::vector<size_t>,                                              // Activation shapes
                std::pair<ngraph::helpers::ActivationTypes, std::vector<float>>,  // Activation type and constant value
-               InferenceEngine::Precision,                                       // Net precision
-               InferenceEngine::Precision,                                       // Input precision
-               InferenceEngine::Precision,                                       // Output precision
+               ov::test::ElementType,                                            // Net precision
+               ov::test::ElementType,                                            // Input precision
+               ov::test::ElementType,                                            // Output precision
+               ov::AnyMap,                                                       // Additional network configuration
                CPUTestUtils::CPUSpecificParams>;
 
 class ActivationLayerCPUTest : public testing::WithParamInterface<ActivationLayerCPUTestParamSet>,
@@ -27,13 +27,13 @@ class ActivationLayerCPUTest : public testing::WithParamInterface<ActivationLaye
                                public CPUTestUtils::CPUTestsBase {
 public:
     static std::string getTestCaseName(const testing::TestParamInfo<ActivationLayerCPUTestParamSet> &obj);
-    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override;
+    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override;
 
 protected:
     void SetUp() override;
 
 private:
-    InferenceEngine::Precision netPrecision = InferenceEngine::Precision::UNSPECIFIED;
+    ov::test::ElementType netPrecision = ov::test::ElementType::undefined;
     ngraph::helpers::ActivationTypes activationType = ngraph::helpers::None;
 };
 
@@ -43,7 +43,7 @@ const std::vector<size_t> activationShapes();
 
 const std::map<ngraph::helpers::ActivationTypes, std::vector<std::vector<float>>>& activationTypes();
 
-const std::vector<InferenceEngine::Precision>& netPrc();
+const std::vector<ov::test::ElementType>& netPrc();
 
 /* ============= Activation (1D) ============= */
 const std::vector<CPUTestUtils::CPUSpecificParams>& cpuParams3D();
@@ -62,7 +62,7 @@ const std::vector<std::vector<ov::Shape>>& basic5D();
 
 const std::map<ngraph::helpers::ActivationTypes, std::vector<std::vector<float>>>& activationTypesDynamicMath();
 
-const std::vector<InferenceEngine::Precision>& netPrecisions();
+const std::vector<ov::test::ElementType>& netPrecisions();
 
 const std::vector<CPUTestUtils::CPUSpecificParams>& cpuParamsDynamicMath();
 
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.cpp
index bac15ee7f0152a..c5e259aa9b6f17 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.cpp
@@ -3,22 +3,21 @@
 //
 
 #include "conversion.hpp"
-
-#include "gtest/gtest.h"
 #include "test_utils/cpu_test_utils.hpp"
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
-using namespace ngraph::helpers;
 using namespace ov::test;
 
 namespace CPULayerTestsDefinitions {
 
 std::string ConvertCPULayerTest::getTestCaseName(testing::TestParamInfo<convertLayerTestParamsSet> obj) {
     InputShape inputShape;
-    InferenceEngine::Precision inPrc, outPrc;
+    ElementType inPrc, outPrc;
     CPUSpecificParams cpuParams;
-    std::tie(inputShape, inPrc, outPrc, cpuParams) = obj.param;
+    ov::AnyMap config;
+    std::tie(inputShape, inPrc, outPrc, config, cpuParams) = obj.param;
 
     std::ostringstream result;
 
@@ -27,26 +26,34 @@ std::string ConvertCPULayerTest::getTestCaseName(testing::TestParamInfo<convertL
     for (const auto& shape : inputShape.second) {
         result << CommonTestUtils::vec2str(shape) << "_";
     }
-    result << "inputPRC=" << inPrc.name() << "_";
-    result << "targetPRC=" << outPrc.name() << "_";
+    result << "inputPRC=" << inPrc << "_";
+    result << "targetPRC=" << outPrc << "_";
     result << CPUTestsBase::getTestCaseName(cpuParams);
 
+    if (!config.empty()) {
+        result << "_PluginConf";
+        for (const auto& configItem : config) {
+            result << "_" << configItem.first << "=";
+            configItem.second.print(result);
+        }
+    }
+
     return result.str();
 }
 
-bool ConvertCPULayerTest::isInOutPrecisionSupported(InferenceEngine::Precision inPrc, InferenceEngine::Precision outPrc) {
+bool ConvertCPULayerTest::isInOutPrecisionSupported(ElementType inPrc, ElementType outPrc) {
     // WA: I32 precision support disabled in snippets => primitive has to be changed
     // TODO: remove the WA after I32 is supported in snippets (ticket: 99803)
 #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
-    if (inPrc == InferenceEngine::Precision::I32 || outPrc == InferenceEngine::Precision::I32)
+    if (inPrc == ElementType::i32 || outPrc == ElementType::i32)
         return false;
 #endif
     // ACL does not support specific in-out precision pairs
 #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
-    if ((inPrc == InferenceEngine::Precision::I8 && outPrc == InferenceEngine::Precision::U8) ||
-        (inPrc == InferenceEngine::Precision::U8 && outPrc == InferenceEngine::Precision::I8) ||
-        (inPrc == InferenceEngine::Precision::FP32 && (outPrc == InferenceEngine::Precision::U8 ||
-                                                       outPrc == InferenceEngine::Precision::I8)))
+    if ((inPrc == ElementType::i8 && outPrc == ElementType::u8) ||
+        (inPrc == ElementType::u8 && outPrc == ElementType::i8) ||
+        (inPrc == ElementType::f32 && (outPrc == ElementType::u8 ||
+                                       outPrc == ElementType::i8)))
             return false;
 #endif
     return true;
@@ -57,36 +64,44 @@ void ConvertCPULayerTest::SetUp() {
 
     InputShape shapes;
     CPUSpecificParams cpuParams;
-    std::tie(shapes, inPrc, outPrc, cpuParams) = GetParam();
+    std::tie(shapes, inPrc, outPrc, configuration, cpuParams) = GetParam();
 
     std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
     auto primitive = selectedType;
     if (primitive.empty())
         primitive = getPrimitiveType();
-    if (!isInOutPrecisionSupported(inPrc, outPrc))
-        primitive = "ref";
-
-    auto exec_type_precision = inPrc != InferenceEngine::Precision::U8
-                                    ? inPrc
-                                    : InferenceEngine::Precision(InferenceEngine::Precision::I8);
-    selectedType = makeSelectedTypeStr(primitive, InferenceEngine::details::convertPrecision(exec_type_precision));
+    // WA: I32 precision support disabled in snippets => primitive has to be changed
+    // TODO: remove the WA after I32 is supported in snippets (ticket: 99803)
+    if (inPrc == ElementType::i32 || inPrc == ElementType::i64 || outPrc == ElementType::i32 || outPrc == ElementType::i64)
+        primitive = "unknown";
+
+    if (inPrc == ElementType::i64 || inPrc == ElementType::u64) {
+        auto i64Flag = configuration.find(PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
+        if (i64Flag == configuration.end() || i64Flag->second == PluginConfigParams::NO) {
+            selectedType = makeSelectedTypeStr(primitive, ElementType::i32);
+        } else {
+            selectedType = makeSelectedTypeStr(primitive, ElementType::i64);
+        }
+    } else if (inPrc == ElementType::u8) {
+        selectedType = makeSelectedTypeStr(primitive, ElementType::i8);
+    } else {
+        selectedType = makeSelectedTypeStr(primitive, inPrc);
+    }
 
     for (size_t i = 0; i < shapes.second.size(); i++) {
-        targetStaticShapes.push_back(std::vector<ngraph::Shape>{shapes.second[i]});
+        targetStaticShapes.push_back(std::vector<ov::Shape>{shapes.second[i]});
     }
 
     inputDynamicShapes.push_back(shapes.first);
 
-    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
-    auto targetPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrc);
-    ParameterVector params = builder::makeDynamicParams(ngPrc, inputDynamicShapes);
-    auto conversion = ngraph::builder::makeConversion(params.front(), targetPrc, helpers::ConversionTypes::CONVERT);
+    ov::ParameterVector params = ngraph::builder::makeDynamicParams(inPrc, inputDynamicShapes);
+    auto conversion = ngraph::builder::makeConversion(params.front(), outPrc, ngraph::helpers::ConversionTypes::CONVERT);
 
-    function = makeNgraphFunction(ngPrc, params, conversion, "ConversionCPU");
+    function = makeNgraphFunction(inPrc, params, conversion, "ConversionCPU");
 }
 
-void ConvertCPULayerTest::generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) {
-    if (outPrc != Precision::BOOL) {
+void ConvertCPULayerTest::generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) {
+    if (outPrc != ElementType::boolean) {
         SubgraphBaseTest::generate_inputs(targetInputStaticShapes);
         return;
     }
@@ -104,13 +119,13 @@ void ConvertCPULayerTest::generate_inputs(const std::vector<ngraph::Shape>& targ
     size_t size = shape_size(shape);
     ov::Tensor tensor = ov::test::utils::create_and_fill_tensor(funcInputs[0].get_element_type(), shape, 2 * size);
 
-    if (inPrc == Precision::FP32) {
+    if (inPrc == ElementType::f32) {
         auto* rawBlobDataPtr = static_cast<float*>(tensor.data());
         for (size_t i = 0; i < size; ++i) {
             rawBlobDataPtr[i] = rawBlobDataPtr[i] / size - 1;
         }
-    } else if (inPrc == Precision::BF16) {
-        auto* rawBlobDataPtr = static_cast<ngraph::bfloat16*>(tensor.data());
+    } else if (inPrc == ElementType::bf16) {
+        auto* rawBlobDataPtr = static_cast<ov::bfloat16*>(tensor.data());
         for (size_t i = 0; i < size; ++i) {
             rawBlobDataPtr[i] = rawBlobDataPtr[i] / size - 1;
         }
@@ -162,13 +177,13 @@ const std::vector<InputShape>& inShapes_4D_dynamic() {
     return inShapes_4D_dynamic;
 }
 
-const std::vector<Precision>& precisions() {
-    static const std::vector<Precision> precisions = {
-            Precision::U8,
-            Precision::I8,
-            Precision::I32,
-            Precision::FP32,
-            Precision::BF16
+const std::vector<ElementType>& precisions() {
+    static const std::vector<ElementType> precisions = {
+        ElementType::u8,
+        ElementType::i8,
+        ElementType::i32,
+        ElementType::f32,
+        ElementType::bf16
     };
     return precisions;
 }
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.hpp
index 10c331a0ff255d..b08a8dbf8eed31 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.hpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/conversion.hpp
@@ -9,35 +9,33 @@
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include <common_test_utils/ov_tensor_utils.hpp>
 #include "test_utils/cpu_test_utils.hpp"
-#include "gtest/gtest.h"
-
-using namespace InferenceEngine;
-using namespace ngraph;
-using namespace CPUTestUtils;
-using namespace ov::test;
 
 namespace CPULayerTestsDefinitions  {
-using convertLayerTestParamsSet = std::tuple<InputShape,                   // input shapes
-                                        InferenceEngine::Precision,        // input precision
-                                        InferenceEngine::Precision,        // output precision
-                                        CPUSpecificParams>;
+using convertLayerTestParamsSet = std::tuple<
+        ov::test::InputShape,        // input shapes
+        ov::test::ElementType,       // input precision
+        ov::test::ElementType,       // output precision
+        ov::AnyMap,                  // Additional plugin configuration
+        CPUTestUtils::CPUSpecificParams
+>;
 
 class ConvertCPULayerTest : public testing::WithParamInterface<convertLayerTestParamsSet>,
-                            virtual public SubgraphBaseTest, public CPUTestsBase {
+                            virtual public ov::test::SubgraphBaseTest, public CPUTestUtils::CPUTestsBase {
 public:
     static std::string getTestCaseName(testing::TestParamInfo<convertLayerTestParamsSet> obj);
-    static bool isInOutPrecisionSupported(InferenceEngine::Precision inPrc, InferenceEngine::Precision outPrc);
+    static bool isInOutPrecisionSupported(ov::test::ElementType inPrc, ov::test::ElementType outPrc);
 protected:
     void SetUp() override;
-    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override;
+    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override;
 
 private:
-    InferenceEngine::Precision inPrc, outPrc;
+    ov::test::ElementType inPrc, outPrc;
 };
 
 namespace Conversion {
-    const std::vector<InputShape>& inShapes_4D_static();
-    const std::vector<InputShape>& inShapes_4D_dynamic();
-    const std::vector<Precision>& precisions();
+    const std::vector<ov::test::InputShape>& inShapes_4D_static();
+    const std::vector<ov::test::InputShape>& inShapes_4D_dynamic();
+    const std::vector<ov::test::ElementType>& precisions();
+
 } // namespace Conversion
-} // namespace CPULayerTestsDefinitions
\ No newline at end of file
+} // namespace CPULayerTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/eltwise.cpp
index d81de3af743ee1..72b0fc163adbe5 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/eltwise.cpp
@@ -5,6 +5,7 @@
 #include "eltwise.hpp"
 #include "gtest/gtest.h"
 #include "test_utils/cpu_test_utils.hpp"
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
@@ -28,7 +29,7 @@ std::string EltwiseLayerCPUTest::getTestCaseName(testing::TestParamInfo<EltwiseL
         return result.str();
 }
 
-ov::Tensor EltwiseLayerCPUTest::generate_eltwise_input(const ov::element::Type& type, const ngraph::Shape& shape) {
+ov::Tensor EltwiseLayerCPUTest::generate_eltwise_input(const ov::element::Type& type, const ov::Shape& shape) {
         struct gen_params {
             uint32_t range;
             int32_t start_from;
@@ -63,7 +64,7 @@ ov::Tensor EltwiseLayerCPUTest::generate_eltwise_input(const ov::element::Type&
         return ov::test::utils::create_and_fill_tensor(type, shape, params.range, params.start_from, params.resolution);
     }
 
-void EltwiseLayerCPUTest::generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) {
+void EltwiseLayerCPUTest::generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) {
         inputs.clear();
         const auto& funcInputs = function->inputs();
         for (size_t i = 0; i < funcInputs.size(); ++i) {
@@ -93,7 +94,16 @@ void EltwiseLayerCPUTest::SetUp() {
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
         std::tie(postOpMgrPtr, fusedOps) = fusingParams;
 
-        selectedType = makeSelectedTypeStr(getPrimitiveType(), netType);
+        if (inType == ElementType::i64 || inType == ElementType::u64) {
+            auto i64Flag = configuration.find(PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
+            if (i64Flag == configuration.end() || i64Flag->second == PluginConfigParams::NO) {
+                selectedType = makeSelectedTypeStr(getPrimitiveType(), ElementType::i32);
+            } else {
+                selectedType = makeSelectedTypeStr(getPrimitiveType(), ElementType::i64);
+            }
+        } else {
+            selectedType = makeSelectedTypeStr(getPrimitiveType(), netType);
+        }
         #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
             if (eltwiseType == POWER) {
                 selectedType = std::regex_replace(selectedType, std::regex("acl"), "ref");
@@ -103,7 +113,7 @@ void EltwiseLayerCPUTest::SetUp() {
         shapes.resize(2);
         switch (opType) {
             case CommonTestUtils::OpType::SCALAR: {
-                std::vector<ngraph::Shape> identityShapes(shapes[0].second.size(), {1});
+                std::vector<ov::Shape> identityShapes(shapes[0].second.size(), {1});
                 shapes[1] = {{}, identityShapes};
                 break;
             }
@@ -120,13 +130,13 @@ void EltwiseLayerCPUTest::SetUp() {
 
         configuration.insert(additional_config.begin(), additional_config.end());
         auto parameters = ngraph::builder::makeDynamicParams(netType, {inputDynamicShapes.front()});
-        std::shared_ptr<ngraph::Node> secondaryInput;
+        std::shared_ptr<ov::Node> secondaryInput;
         if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) {
             secondaryInput = ngraph::builder::makeDynamicParams(netType, {inputDynamicShapes.back()}).front();
-            parameters.push_back(std::dynamic_pointer_cast<ngraph::opset3::Parameter>(secondaryInput));
+            parameters.push_back(std::dynamic_pointer_cast<ov::op::v0::Parameter>(secondaryInput));
         } else {
             auto pShape = inputDynamicShapes.back();
-            ngraph::Shape shape;
+            ov::Shape shape;
             if (pShape.is_static()) {
                 shape = pShape.get_shape();
             } else {
@@ -138,16 +148,24 @@ void EltwiseLayerCPUTest::SetUp() {
                     }
                 }
             }
+
             if (netType == ElementType::i32) {
-                auto data_tensor = generate_eltwise_input(ElementType::i32, shape);
+                auto data_tensor = generate_eltwise_input(netType, shape);
                 auto data_ptr = reinterpret_cast<int32_t*>(data_tensor.data());
-                std::vector<int32_t> data(data_ptr, data_ptr + ngraph::shape_size(shape));
+                std::vector<int32_t> data(data_ptr, data_ptr + ov::shape_size(shape));
                 secondaryInput = ngraph::builder::makeConstant(netType, shape, data);
-            } else {
+            } else if (netType == ElementType::i64) {
+                auto data_tensor = generate_eltwise_input(netType, shape);
+                auto data_ptr = reinterpret_cast<int64_t*>(data_tensor.data());
+                std::vector<int64_t> data(data_ptr, data_ptr + ov::shape_size(shape));
+                secondaryInput = ngraph::builder::makeConstant(netType, shape, data);
+            } else if (netType == ElementType::f32 || netType == ElementType::bf16) {
                 auto data_tensor = generate_eltwise_input(ElementType::f32, shape);
                 auto data_ptr = reinterpret_cast<float*>(data_tensor.data());
-                std::vector<float> data(data_ptr, data_ptr + ngraph::shape_size(shape));
+                std::vector<float> data(data_ptr, data_ptr + ov::shape_size(shape));
                 secondaryInput = ngraph::builder::makeConstant(netType, shape, data);
+            } else {
+                IE_THROW() << "Unsupported data type.";
             }
         }
         auto eltwise = ngraph::builder::makeEltwise(parameters[0], secondaryInput, eltwiseType);
@@ -270,8 +288,8 @@ const std::vector<ngraph::helpers::InputLayerType>& secondaryInputTypes() {
         return secondaryInputTypes;
 }
 
-const std::vector<std::vector<ngraph::Shape>>& inShapes_4D_1D() {
-        static const std::vector<std::vector<ngraph::Shape>> inShapes_4D_1D = {
+const std::vector<std::vector<ov::Shape>>& inShapes_4D_1D() {
+        static const std::vector<std::vector<ov::Shape>> inShapes_4D_1D = {
                 {{2, 17, 5, 4}, {4}},
                 {{1, 3, 3, 3}, {3}},
         };
@@ -293,8 +311,8 @@ const std::vector<CPUSpecificParams>& cpuParams_4D_1D_Parameter_mode() {
         return cpuParams_4D_1D_Parameter_mode;
 }
 
-const std::vector<std::vector<ngraph::Shape>>& inShapes_5D_1D() {
-        static const std::vector<std::vector<ngraph::Shape>> inShapes_5D_1D = {
+const std::vector<std::vector<ov::Shape>>& inShapes_5D_1D() {
+        static const std::vector<std::vector<ov::Shape>> inShapes_5D_1D = {
                 {{2, 17, 5, 4, 10}, {10}},
                 {{1, 3, 3, 3, 3}, {3}},
         };
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.cpp
index fbd3f9bb3e485f..a48e3b5f39eaf2 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.cpp
@@ -4,12 +4,11 @@
 
 #include "reduce.hpp"
 
-#include "gtest/gtest.h"
-#include "test_utils/cpu_test_utils.hpp"
+#include <common_test_utils/ov_tensor_utils.hpp>
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
-using namespace ngraph::helpers;
 using namespace ov::test;
 
 namespace CPULayerTestsDefinitions {
@@ -26,8 +25,9 @@ std::string ReduceCPULayerTest::getTestCaseName(testing::TestParamInfo<ReduceLay
     ngraph::helpers::ReductionType reductionType;
     ElementType netPrecision, inPrc, outPrc;
     std::vector<InputShape> inputShapes;
+    ov::AnyMap config;
 
-    std::tie(axes, opType, keepDims, reductionType, netPrecision, inPrc, outPrc, inputShapes) = basicParams;
+    std::tie(axes, opType, keepDims, reductionType, netPrecision, inPrc, outPrc, inputShapes, config) = basicParams;
 
     std::ostringstream result;
     result << "IS=(";
@@ -49,7 +49,15 @@ std::string ReduceCPULayerTest::getTestCaseName(testing::TestParamInfo<ReduceLay
         result << "KeepDims=false_";
     result << "netPRC=" << netPrecision << "_";
     result << "inPRC=" << inPrc << "_";
-    result << "outPRC=" << outPrc << "_";
+    result << "outPRC=" << outPrc;
+
+    if (!config.empty()) {
+        result << "_PluginConf";
+        for (const auto& configItem : config) {
+            result << "_" << configItem.first << "=";
+            configItem.second.print(result);
+        }
+    }
 
     result << CPUTestsBase::getTestCaseName(cpuParams);
     result << CpuTestWithFusing::getTestCaseName(fusingParams);
@@ -74,54 +82,64 @@ void ReduceCPULayerTest::SetUp() {
     ElementType inPrc, outPrc;
     std::vector<InputShape> inputShapes;
 
-    std::tie(axes, opType, keepDims, reductionType, netPrecision, inPrc, outPrc, inputShapes) = basicParams;
+    std::tie(axes, opType, keepDims, reductionType, netPrecision, inPrc, outPrc, inputShapes, configuration) = basicParams;
     inPrc = outPrc = netPrecision;
 
     init_input_shapes(inputShapes);
 
     auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes);
-    auto paramOuts =
-        ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+    auto paramOuts = ngraph::helpers::convert2OutputVector(
+            ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(params));
 
     std::vector<size_t> shapeAxes;
     switch (opType) {
-    case CommonTestUtils::OpType::SCALAR:
-        if (axes.size() > 1)
-            FAIL() << "In reduce op if op type is scalar, 'axis' input's must contain 1 element";
-        break;
-    case CommonTestUtils::OpType::VECTOR:
-        shapeAxes.push_back(axes.size());
-        break;
-    default:
-        FAIL() << "Reduce op doesn't support operation type: " << opType;
+        case CommonTestUtils::OpType::SCALAR:
+            if (axes.size() > 1)
+                FAIL() << "In reduce op if op type is scalar, 'axis' input's must contain 1 element";
+            break;
+        case CommonTestUtils::OpType::VECTOR:
+            shapeAxes.push_back(axes.size());
+            break;
+        default:
+            FAIL() << "Reduce op doesn't support operation type: " << opType;
     }
-    auto reductionAxesNode = std::dynamic_pointer_cast<ngraph::Node>(
-        std::make_shared<ngraph::opset3::Constant>(ngraph::element::Type_t::i64, ngraph::Shape(shapeAxes), axes));
+    auto reductionAxesNode = std::dynamic_pointer_cast<ov::Node>(
+            std::make_shared<ov::op::v0::Constant>(ElementType::i64, ov::Shape(shapeAxes), axes));
 
     const auto reduce = ngraph::builder::makeReduce(paramOuts[0], reductionAxesNode, keepDims, reductionType);
 
-    selectedType = getPrimitiveType() + "_" +
-                   (inPrc == ElementType::boolean ? "I8" : InferenceEngine::details::convertPrecision(inPrc).name());
+    if (inPrc == ElementType::i64 || inPrc == ElementType::u64) {
+        auto i64It = configuration.find(PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
+        if (i64It == configuration.end() || i64It->second == PluginConfigParams::NO) {
+            selectedType = makeSelectedTypeStr(getPrimitiveType(), ElementType::i32);
+        } else {
+            selectedType = makeSelectedTypeStr(getPrimitiveType(), ElementType::i64);
+        }
+    } else if (inPrc == ElementType::boolean) {
+        selectedType = makeSelectedTypeStr(getPrimitiveType(), ElementType::i8);
+    } else {
+        selectedType = makeSelectedTypeStr(getPrimitiveType(), inPrc);
+    }
 
     // hybrid layouts
     if (inFmts.size() != 0 && outFmts.size() == 0) {
         size_t outShapeSize = inputDynamicShapes[0].size() - axes.size();
         switch (outShapeSize) {
-        case 0:
-        case 1:
-            outFmts.push_back(x);
-            break;
-        case 2:
-            outFmts.push_back(nc);
-            break;
-        case 3:
-            outFmts.push_back(tnc);
-            break;
-        case 4:
-            outFmts.push_back(nchw);
-            break;
-        default:
-            FAIL() << "Invaid outShapeSize: " << outShapeSize;
+            case 0:
+            case 1:
+                outFmts.push_back(x);
+                break;
+            case 2:
+                outFmts.push_back(nc);
+                break;
+            case 3:
+                outFmts.push_back(tnc);
+                break;
+            case 4:
+                outFmts.push_back(nchw);
+                break;
+            default:
+                FAIL() << "Invaid outShapeSize: " << outShapeSize;
         }
     }
 
@@ -135,23 +153,25 @@ void ReduceCPULayerTest::generate_inputs(const std::vector<ngraph::Shape>& targe
         const auto& funcInput = funcInputs[i];
         ov::Tensor tensor;
         if (reductionType == ngraph::helpers::ReductionType::Prod) {
-            tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(),
-                                                             targetInputStaticShapes[i],
-                                                             10,
-                                                             5);
+            tensor = utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 10, 1);
             if (netPrecision == ElementType::f32) {
-                auto* rawBlobDataPtr = static_cast<float*>(tensor.data());
+                auto *rawBlobDataPtr = static_cast<float *>(tensor.data());
                 for (size_t i = 0; i < tensor.get_size(); ++i) {
                     rawBlobDataPtr[i] /= 10.f;
                 }
             } else if (netPrecision == ElementType::bf16) {
-                auto* rawBlobDataPtr = static_cast<ngraph::bfloat16*>(tensor.data());
+                auto *rawBlobDataPtr = static_cast<ov::bfloat16 *>(tensor.data());
                 for (size_t i = 0; i < tensor.get_size(); ++i) {
                     rawBlobDataPtr[i] /= 10.f;
                 }
+            } else if (netPrecision == ElementType::i64) {
+            //     auto *rawBlobDataPtr = static_cast<int64_t *>(tensor.data());
+            //     for (size_t i = 0; i < tensor.get_size(); ++i) {
+            //         rawBlobDataPtr[i] /= 10;
+            //     }
             }
         } else {
-            tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
+            tensor = utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
         }
 
         inputs.insert({funcInput.get_node_shared_ptr(), tensor});
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.hpp
index 5325093c222313..dbcfaf9666c5cf 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.hpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.hpp
@@ -5,43 +5,38 @@
 #pragma once
 
 #include "shared_test_classes/base/ov_subgraph.hpp"
-#include "ngraph_functions/builders.hpp"
-#include "test_utils/cpu_test_utils.hpp"
-#include <common_test_utils/ov_tensor_utils.hpp>
 #include "test_utils/fusing_test_utils.hpp"
 
-using namespace CPUTestUtils;
-using namespace ov::test;
-
 namespace CPULayerTestsDefinitions {
 
 typedef std::tuple<
-        std::vector<int>,               // Axis to reduce order
-        CommonTestUtils::OpType,        // Scalar or vector type axis
-        bool,                           // Keep dims
-        ngraph::helpers::ReductionType, // Reduce operation type
-        ElementType,                    // Net precision
-        ElementType,                    // Input precision
-        ElementType,                    // Output precision
-        std::vector<InputShape>         // Input shapes
+        std::vector<int>,                   // Axis to reduce order
+        CommonTestUtils::OpType,            // Scalar or vector type axis
+        bool,                               // Keep dims
+        ngraph::helpers::ReductionType,     // Reduce operation type
+        ov::test::ElementType,              // Net precision
+        ov::test::ElementType,              // Input precision
+        ov::test::ElementType,              // Output precision
+        std::vector<ov::test::InputShape>,  // Input shapes
+        ov::AnyMap                          // Additional network configuration
 > basicReduceParams;
 
 typedef std::tuple<
         basicReduceParams,
-        CPUSpecificParams,
-        fusingSpecificParams> ReduceLayerCPUTestParamSet;
+        CPUTestUtils::CPUSpecificParams,
+        CPUTestUtils::fusingSpecificParams> ReduceLayerCPUTestParamSet;
 
 class ReduceCPULayerTest : public testing::WithParamInterface<ReduceLayerCPUTestParamSet>,
-                           virtual public SubgraphBaseTest, public CpuTestWithFusing {
+                           virtual public ov::test::SubgraphBaseTest, public CPUTestUtils::CpuTestWithFusing {
 public:
     static std::string getTestCaseName(testing::TestParamInfo<ReduceLayerCPUTestParamSet> obj);
 protected:
     void SetUp() override;
-    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override;
+    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override;
 
 private:
     ngraph::helpers::ReductionType reductionType;
-    ElementType netPrecision;
+    ov::test::ElementType netPrecision;
 };
 
 namespace Reduce {
@@ -51,8 +46,8 @@ const std::vector<std::vector<int>>& axes();
 const std::vector<std::vector<int>>& axesND();
 const std::vector<CommonTestUtils::OpType>& opTypes();
 const std::vector<ngraph::helpers::ReductionType>& reductionTypes();
-const std::vector<ElementType>& inpOutPrc();
+const std::vector<ov::test::ElementType>& inpOutPrc();
 const std::vector<ngraph::helpers::ReductionType>& reductionTypesInt32();
 
 } // namespace Reduce
-} // namespace CPULayerTestsDefinitions
\ No newline at end of file
+} // namespace CPULayerTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/transpose.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/transpose.cpp
index f90af8c07d008e..fec1a010176af6 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/transpose.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/transpose.cpp
@@ -14,13 +14,12 @@ using namespace ov::test;
 
 namespace CPULayerTestsDefinitions {
 std::string TransposeLayerCPUTest::getTestCaseName(testing::TestParamInfo<TransposeLayerCPUTestParamSet> obj) {
-    Precision netPrecision;
+    ElementType netPrecision;
     InputShape inputShapes;
     std::vector<size_t> inputOrder;
-    std::string targetDevice;
     CPUSpecificParams cpuParams;
-    std::map<std::string, std::string> additionalConfig;
-    std::tie(inputShapes, inputOrder, netPrecision, targetDevice, additionalConfig, cpuParams) = obj.param;
+    ov::AnyMap config;
+    std::tie(inputShapes, inputOrder, netPrecision, config, cpuParams) = obj.param;
 
     std::ostringstream result;
     result << "IS=" << CommonTestUtils::partialShape2str({inputShapes.first}) << "_";
@@ -30,34 +29,40 @@ std::string TransposeLayerCPUTest::getTestCaseName(testing::TestParamInfo<Transp
     }
     result << ")_";
     result << "inputOrder=" << CommonTestUtils::vec2str(inputOrder) << "_";
-    result << "netPRC=" << netPrecision.name() << "_";
-    result << "trgDev=" << targetDevice;
+    result << "netPRC=" << netPrecision << "_";
     result << CPUTestsBase::getTestCaseName(cpuParams);
+
+    if (!config.empty()) {
+        result << "_PluginConf";
+        for (const auto& configItem : config) {
+            result << "_" << configItem.first << "=";
+            configItem.second.print(result);
+        }
+    }
+
     return result.str();
 }
 
 void TransposeLayerCPUTest::SetUp() {
-    Precision netPrecision;
+    targetDevice = CommonTestUtils::DEVICE_CPU;
+
+    ElementType netPrecision;
     InputShape inputShapes;
     std::vector<size_t> inputOrder;
     CPUSpecificParams cpuParams;
-    std::map<std::string, std::string> additionalConfig;
-    std::tie(inputShapes, inputOrder, netPrecision, targetDevice, additionalConfig, cpuParams) = this->GetParam();
-    configuration.insert(additionalConfig.begin(), additionalConfig.end());
-
-    inType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
-    outType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    std::tie(inputShapes, inputOrder, netPrecision, configuration, cpuParams) = this->GetParam();
 
     std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
 
-    selectedType = makeSelectedTypeStr("unknown", inType);
+    selectedType = makeSelectedTypeStr("unknown", netPrecision);
 
     init_input_shapes({inputShapes});
 
-    auto params = ngraph::builder::makeDynamicParams(inType, {inputDynamicShapes[0]});
+    auto params = ngraph::builder::makeDynamicParams(netPrecision, { inputDynamicShapes[0] });
 
-    const auto inputOrderOp =
-        std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape({inputOrder.size()}), inputOrder);
+    const auto inputOrderOp = std::make_shared<ov::op::v0::Constant>(ov::element::i64,
+                                                                        ov::Shape({inputOrder.size()}),
+                                                                        inputOrder);
     const auto transpose = std::make_shared<ov::op::v1::Transpose>(params[0], inputOrderOp);
     transpose->get_rt_info() = getCPUInfo();
     const ov::ResultVector results{std::make_shared<ov::op::v0::Result>(transpose)};
@@ -72,8 +77,8 @@ TEST_P(TransposeLayerCPUTest, CompareWithRefs) {
 }
 
 namespace Transpose {
-const std::vector<InferenceEngine::Precision>& netPrecisionsPerChannels() {
-    static const std::vector<InferenceEngine::Precision> netPrecisionsPerChannels = {Precision::I8, Precision::FP32};
+const std::vector<ElementType>& netPrecisionsPerChannels() {
+    static const std::vector<ElementType> netPrecisionsPerChannels = {ElementType::i8, ElementType::f32};
     return netPrecisionsPerChannels;
 }
 
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/transpose.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/transpose.hpp
index 6d07d4a0d22943..00089021a3b1b5 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/transpose.hpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/transpose.hpp
@@ -8,24 +8,18 @@
 #include "ngraph_functions/builders.hpp"
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include "test_utils/cpu_test_utils.hpp"
-#include "gtest/gtest.h"
-
-
-using namespace InferenceEngine;
-using namespace CPUTestUtils;
-using namespace ov::test;
 
 namespace CPULayerTestsDefinitions {
 typedef std::tuple<
-        InputShape,                    // Input shapes
-        std::vector<size_t>,                // Input order
-        InferenceEngine::Precision,         // Net precision
-        std::string,                        // Target device name
-        std::map<std::string, std::string>, // Additional network configuration
-        CPUSpecificParams> TransposeLayerCPUTestParamSet;
+        ov::test::InputShape,              // Input shapes
+        std::vector<size_t>,               // Input order
+        ov::test::ElementType,             // Net precision
+        ov::AnyMap,                        // Additional plugin configuration
+        CPUTestUtils::CPUSpecificParams
+> TransposeLayerCPUTestParamSet;
 
 class TransposeLayerCPUTest : public testing::WithParamInterface<TransposeLayerCPUTestParamSet>,
-                              public ov::test::SubgraphBaseTest, public CPUTestsBase {
+                              public ov::test::SubgraphBaseTest, public CPUTestUtils::CPUTestsBase {
 public:
     static std::string getTestCaseName(testing::TestParamInfo<TransposeLayerCPUTestParamSet> obj);
 protected:
@@ -33,10 +27,10 @@ class TransposeLayerCPUTest : public testing::WithParamInterface<TransposeLayerC
 };
 
 namespace Transpose {
-    const std::vector<InferenceEngine::Precision>& netPrecisionsPerChannels();
-    const std::vector<InputShape>& dynamicInputShapes4DC16();
-    const std::vector<InputShape>& dynamicInputShapes4DC32();
-    const std::vector<InputShape>& dynamicInputShapes4D();
+    const std::vector<ov::test::ElementType>& netPrecisionsPerChannels();
+    const std::vector<ov::test::InputShape>& dynamicInputShapes4DC16();
+    const std::vector<ov::test::InputShape>& dynamicInputShapes4DC32();
+    const std::vector<ov::test::InputShape>& dynamicInputShapes4D();
     const std::vector<std::vector<size_t>>& inputOrder4D();
 } // namespace Transpose
-} // namespace CPULayerTestsDefinitions
\ No newline at end of file
+} // namespace CPULayerTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/concat.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/concat.cpp
index 1cb242daaf55f1..bbacc1c38c7c7a 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/concat.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/concat.cpp
@@ -5,6 +5,7 @@
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include "ngraph_functions/builders.hpp"
 #include "test_utils/cpu_test_utils.hpp"
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
 using namespace ov::test;
 using namespace CPUTestUtils;
@@ -12,9 +13,10 @@ using namespace CPUTestUtils;
 namespace CPULayerTestsDefinitions {
 
 typedef std::tuple<
-        size_t,                   // Concat axis
+        int64_t,                  // Concat axis
         std::vector<InputShape>,  // Input shapes
         ElementType,              // Network precision
+        ov::AnyMap,               // Additional config
         CPUSpecificParams
 > concatCPUTestParams;
 
@@ -22,11 +24,12 @@ class ConcatLayerCPUTest : public testing::WithParamInterface<concatCPUTestParam
                            virtual public SubgraphBaseTest, public CPUTestsBase {
 public:
     static std::string getTestCaseName(testing::TestParamInfo<concatCPUTestParams> obj) {
-        int axis;
+        int64_t axis;
         std::vector<InputShape> inputShapes;
         ElementType netPrecision;
+        ov::AnyMap additionalConfig;
         CPUSpecificParams cpuParams;
-        std::tie(axis, inputShapes, netPrecision, cpuParams) = obj.param;
+        std::tie(axis, inputShapes, netPrecision, additionalConfig, cpuParams) = obj.param;
 
         std::ostringstream result;
         result << "IS=";
@@ -46,6 +49,15 @@ class ConcatLayerCPUTest : public testing::WithParamInterface<concatCPUTestParam
         result << "axis=" << axis << "_";
         result << "netPRC=" << netPrecision << "_";
         result << CPUTestsBase::getTestCaseName(cpuParams);
+
+        if (!additionalConfig.empty()) {
+            result << "_PluginConf";
+            for (auto &item : additionalConfig) {
+                result << "_" << item.first << "=";
+                item.second.print(result);
+            }
+        }
+
         return result.str();
     }
 
@@ -67,21 +79,30 @@ class ConcatLayerCPUTest : public testing::WithParamInterface<concatCPUTestParam
     void SetUp() override {
         targetDevice = CommonTestUtils::DEVICE_CPU;
 
-        int axis;
+        int64_t axis;
         std::vector<InputShape> inputShape;
         ElementType netPrecision;
         CPUSpecificParams cpuParams;
-        std::tie(axis, inputShape, netPrecision, cpuParams) = this->GetParam();
+        std::tie(axis, inputShape, netPrecision, configuration, cpuParams) = this->GetParam();
 
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
-        selectedType += std::string("_") + InferenceEngine::details::convertPrecision(netPrecision).name();
+        if (netPrecision == ElementType::i64 || netPrecision == ElementType::u64) {
+            auto i64Flag = configuration.find(InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
+            if (i64Flag == configuration.end() || i64Flag->second == InferenceEngine::PluginConfigParams::NO) {
+                selectedType = makeSelectedTypeStr(selectedType, ElementType::i32);
+            } else {
+                selectedType = makeSelectedTypeStr(selectedType, ElementType::i64);
+            }
+        } else {
+            selectedType = makeSelectedTypeStr(selectedType, netPrecision);
+        }
 
         init_input_shapes(inputShape);
 
         auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes);
         auto paramOuts = ngraph::helpers::convert2OutputVector(
-                ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
-        auto concat = std::make_shared<ngraph::opset1::Concat>(paramOuts, axis);
+                ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(params));
+        auto concat = std::make_shared<ov::op::v0::Concat>(paramOuts, axis);
 
         function = makeNgraphFunction(netPrecision, params, concat, "ConcatCPU");
     }
@@ -118,23 +139,41 @@ const auto blocked16_5D_ref = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {}, "ref
 const std::vector<ElementType> netPrecisions = {
         ElementType::i8,
         ElementType::i32,
+        ElementType::i64,
         ElementType::f32,
         ElementType::bf16
 };
 
+const ov::AnyMap emptyConfig = {};
+const std::vector<ov::AnyMap> i64Config = {
+        {{InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64, InferenceEngine::PluginConfigParams::YES}},
+        {{InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64, InferenceEngine::PluginConfigParams::NO}}
+};
+
 INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_Block8_static, ConcatLayerCPUTest,
                         ::testing::Combine(
                                 ::testing::Values(1, -2, 3),
                                 ::testing::Values(static_shapes_to_test_representation({{2, 16, 3, 5}, {2, 16, 3, 5}})),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(planar_4D_ref, planarChannels_4D, blocked8_4D_ref)),
                         ConcatLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_Block8_I64_static, ConcatLayerCPUTest,
+                        ::testing::Combine(
+                             ::testing::Values(1, -2, 3),
+                             ::testing::Values(static_shapes_to_test_representation({{2, 16, 3, 5}, {2, 16, 3, 5}})),
+                             ::testing::Values(ElementType::i64),
+                             ::testing::ValuesIn(i64Config),
+                             ::testing::Values(planar_4D_ref, planarChannels_4D, blocked8_4D_ref)),
+                        ConcatLayerCPUTest::getTestCaseName);
+
 INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_Block16_static, ConcatLayerCPUTest,
                         ::testing::Combine(
                                 ::testing::Values(1, 2, -1),
                                 ::testing::Values(static_shapes_to_test_representation({{3, 32, 3, 5}, {3, 32, 3, 5}})),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(blocked16_4D_ref)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -162,9 +201,19 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_Block_dynamic_axis_1, ConcatLayerCPU
                                 ::testing::Values(1, -3),
                                 ::testing::ValuesIn(inputShapes4D_Block_axis1),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(blocked8_4D_ref, blocked16_4D_ref)),
                         ConcatLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_Block_dynamic_axis_1_I64, ConcatLayerCPUTest,
+                        ::testing::Combine(
+                             ::testing::Values(1, -3),
+                             ::testing::ValuesIn(inputShapes4D_Block_axis1),
+                             ::testing::Values(ElementType::i64),
+                             ::testing::ValuesIn(i64Config),
+                             ::testing::Values(blocked8_4D_ref, blocked16_4D_ref)),
+                        ConcatLayerCPUTest::getTestCaseName);
+
 const std::vector<std::vector<InputShape>> inputShapes4D_axis1 = {
         {
             {{-1, -1, -1, -1}, {{2, 32, 0, 7}, {2, 32, 5, 7}, {2, 32, 5, 7}, {1, 18, 10, 2}, {2, 32, 5, 7}, {3, 8, 1, 8}, {2, 0, 5, 7}}},
@@ -193,6 +242,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_dynamic_axis_1, ConcatLayerCPUTest,
                                 ::testing::Values(1),
                                 ::testing::ValuesIn(inputShapes4D_axis1),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(planar_4D_ref, planarChannels_4D)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -219,6 +269,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_Block_dynamic_axis_2, ConcatLayerCPU
                                 ::testing::Values(2),
                                 ::testing::ValuesIn(inputShapes4D_Block_axis2),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(blocked8_4D_ref, blocked16_4D_ref)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -240,6 +291,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_dynamic_axis_2, ConcatLayerCPUTest,
                                 ::testing::Values(2, -2),
                                 ::testing::ValuesIn(inputShapes4D_axis2),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(planar_4D_ref, planarChannels_4D)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -261,6 +313,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_Block_dynamic_axis_3, ConcatLayerCPU
                                 ::testing::Values(3),
                                 ::testing::ValuesIn(inputShapes4D_Block_axis3),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(blocked8_4D_ref, blocked16_4D_ref)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -287,6 +340,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_dynamic_axis_3, ConcatLayerCPUTest,
                                 ::testing::Values(3, -1),
                                 ::testing::ValuesIn(inputShapes4D_axis3),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(planar_4D_ref, planarChannels_4D)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -295,14 +349,25 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat5D_CPU_Block8_static, ConcatLayerCPUTest,
                                 ::testing::Values(2, 3, -2),
                                 ::testing::Values(static_shapes_to_test_representation({{2, 16, 3, 5, 7}, {2, 16, 3, 5, 7}})),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(planar_5D_ref, planarChannels_5D, blocked8_5D_ref)),
                         ConcatLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_Concat5D_CPU_Block8_I64_static, ConcatLayerCPUTest,
+                        ::testing::Combine(
+                             ::testing::Values(2, 3, -2),
+                             ::testing::Values(static_shapes_to_test_representation({{2, 16, 3, 5, 7}, {2, 16, 3, 5, 7}})),
+                             ::testing::Values(ElementType::i64),
+                             ::testing::ValuesIn(i64Config),
+                             ::testing::Values(planar_5D_ref, planarChannels_5D, blocked8_5D_ref)),
+                        ConcatLayerCPUTest::getTestCaseName);
+
 INSTANTIATE_TEST_SUITE_P(smoke_Concat5D_CPU_Block16_static, ConcatLayerCPUTest,
                         ::testing::Combine(
                                 ::testing::Values(2, 3, 4),
                                 ::testing::Values(static_shapes_to_test_representation({{2, 32, 3, 5, 7}, {2, 32, 3, 5, 7}})),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(blocked16_5D_ref)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -324,9 +389,19 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat5D_CPU_Block_dynamic_axis_1, ConcatLayerCPU
                                 ::testing::Values(1),
                                 ::testing::ValuesIn(inputShapes5D_Block_axis1),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(blocked8_5D_ref, blocked16_5D_ref)),
                         ConcatLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_Concat5D_CPU_Block_dynamic_axis_1_I64, ConcatLayerCPUTest,
+                        ::testing::Combine(
+                             ::testing::Values(1),
+                             ::testing::ValuesIn(inputShapes5D_Block_axis1),
+                             ::testing::Values(ElementType::i64),
+                             ::testing::ValuesIn(i64Config),
+                             ::testing::Values(blocked8_5D_ref, blocked16_5D_ref)),
+                        ConcatLayerCPUTest::getTestCaseName);
+
 const std::vector<std::vector<InputShape>> inputShapes5D_axis1 = {
         {
             {{-1, -1, -1, -1, -1}, {{2, 5, 5, 7, 6}, {1, 3, 10, 2, 8}, {3, 4, 1, 8, 10}}},
@@ -345,6 +420,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat5D_CPU_dynamic_axis_1, ConcatLayerCPUTest,
                                 ::testing::Values(1),
                                 ::testing::ValuesIn(inputShapes5D_axis1),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(planar_5D_ref, planarChannels_5D)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -366,6 +442,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat5D_CPU_Block_dynamic_axis_2, ConcatLayerCPU
                                 ::testing::Values(-3),
                                 ::testing::ValuesIn(inputShapes5D_Block_axis2),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(blocked8_5D_ref, blocked16_5D_ref)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -387,6 +464,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat5D_CPU_dynamic_axis_2, ConcatLayerCPUTest,
                                 ::testing::Values(2),
                                 ::testing::ValuesIn(inputShapes5D_axis2),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(planar_5D_ref, planarChannels_5D)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -408,6 +486,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat5D_CPU_Block_dynamic_axis_3, ConcatLayerCPU
                                 ::testing::Values(3),
                                 ::testing::ValuesIn(inputShapes5D_Block_axis3),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(blocked8_5D_ref, blocked16_5D_ref)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -429,6 +508,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat5D_CPU_dynamic_axis_3, ConcatLayerCPUTest,
                                 ::testing::Values(3),
                                 ::testing::ValuesIn(inputShapes5D_axis3),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(planar_5D_ref, planarChannels_5D)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -450,6 +530,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat5D_CPU_Block_dynamic_axis_4, ConcatLayerCPU
                                 ::testing::Values(4),
                                 ::testing::ValuesIn(inputShapes5D_Block_axis4),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(blocked8_5D_ref, blocked16_5D_ref)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -471,6 +552,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat5D_CPU_dynamic_axis_4, ConcatLayerCPUTest,
                                 ::testing::Values(4),
                                 ::testing::ValuesIn(inputShapes5D_axis4),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(planar_5D_ref, planarChannels_5D)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -515,16 +597,27 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat_byBatch_static, ConcatLayerCPUTest,
                                  ::testing::Values(0),
                                  ::testing::ValuesIn(inputShapes_byBatch_static),
                                  ::testing::ValuesIn(netPrecisions),
+                                 ::testing::Values(emptyConfig),
+                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"})),
+                         ConcatLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Concat_byBatch_I64_static, ConcatLayerCPUTest,
+                        ::testing::Combine(
+                                 ::testing::Values(0),
+                                 ::testing::ValuesIn(inputShapes_byBatch_static),
+                                 ::testing::Values(ElementType::i64),
+                                 ::testing::ValuesIn(i64Config),
                                  ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"})),
-                                 ConcatLayerCPUTest::getTestCaseName);
+                        ConcatLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Concat_byBatch_dynamic, ConcatLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Values(0),
                                  ::testing::ValuesIn(inputShapes_byBatch_dynamic),
                                  ::testing::ValuesIn(netPrecisions),
+                                 ::testing::Values(emptyConfig),
                                  ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
-                                 ConcatLayerCPUTest::getTestCaseName);
+                         ConcatLayerCPUTest::getTestCaseName);
 
 const std::vector<std::vector<InputShape>> inputShapes3D_axis1 = {
         static_shapes_to_test_representation({{2, 4, 5}, {2, 4, 5}}),
@@ -545,6 +638,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat_3D_axis1, ConcatLayerCPUTest,
                                 ::testing::Values(1),
                                 ::testing::ValuesIn(inputShapes3D_axis1),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -567,6 +661,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat_3D_axis2, ConcatLayerCPUTest,
                                 ::testing::Values(2),
                                 ::testing::ValuesIn(inputShapes3D_axis2),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -589,6 +684,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat_2D_axis1, ConcatLayerCPUTest,
                                 ::testing::Values(1),
                                 ::testing::ValuesIn(inputShapes2D_axis1),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -617,14 +713,25 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat_1D_static, ConcatLayerCPUTest,
                                 ::testing::Values(0),
                                 ::testing::ValuesIn(inputShapes1D_static),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"})),
                         ConcatLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_Concat_1D_I64_static, ConcatLayerCPUTest,
+                        ::testing::Combine(
+                                 ::testing::Values(0),
+                                 ::testing::ValuesIn(inputShapes1D_static),
+                                 ::testing::Values(ElementType::i64),
+                                 ::testing::ValuesIn(i64Config),
+                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"})),
+                        ConcatLayerCPUTest::getTestCaseName);
+
 INSTANTIATE_TEST_SUITE_P(smoke_Concat_1D_dynamic, ConcatLayerCPUTest,
                         ::testing::Combine(
                                 ::testing::Values(0),
                                 ::testing::ValuesIn(inputShapes1D_dynamic),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -643,6 +750,7 @@ INSTANTIATE_TEST_SUITE_P(concat_Concat4D_CPU_Block8inPlace, ConcatLayerCPUTest,
                                                       {{1, 16, -1, -1}, {{1, 16, 5, 7}, {1, 16, 16, 2}, {1, 16, 2, 8}}},
                                                   }),
                                 ::testing::Values(ElementType::f32),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(planar_4D, blocked8_4D)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -660,6 +768,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_Block16inPlace_0, ConcatLayerCPUTest
                                                       {{1, 32, -1, -1}, {{1, 32, 5, 7}, {1, 32, 16, 2}, {1, 32, 2, 8}}},
                                                   }),
                                 ::testing::Values(ElementType::f32),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(blocked16_4D)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -677,6 +786,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat4D_CPU_Block16inPlace_1, ConcatLayerCPUTest
                                                       {{1, 32, -1, -1}, {{1, 32, 5, 7}, {1, 32, 16, 2}, {1, 32, 2, 8}}},
                                                   }),
                                 ::testing::Values(ElementType::f32),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(blocked16_4D)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -694,6 +804,7 @@ INSTANTIATE_TEST_SUITE_P(concat_Concat5D_CPU_Block8inPlace, ConcatLayerCPUTest,
                                                       {{1, 32, -1, -1, -1}, {{1, 32, 5, 7, 3}, {1, 32, 16, 2, 3}, {1, 32, 2, 8, 3}}},
                                                   }),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(planar_5D, blocked8_5D)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -711,6 +822,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat5D_CPU_Block16inPlace, ConcatLayerCPUTest,
                                                       {{1, 32, -1, -1, -1}, {{1, 32, 5, 7, 3}, {1, 32, 16, 2, 3}, {1, 32, 2, 8, 3}}},
                                                   }),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(blocked16_5D)),
                         ConcatLayerCPUTest::getTestCaseName);
 
@@ -721,6 +833,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat_inPlace, ConcatLayerCPUTest,
                                     static_shapes_to_test_representation({{1, 1, 1, 10}, {1, 1, 1, 10}}),
                                     static_shapes_to_test_representation({{1, 1, 5}, {1, 1, 5}})}),
                                 ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(emptyConfig),
                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"})),
                         ConcatLayerCPUTest::getTestCaseName);
 
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/cum_sum.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/cum_sum.cpp
index 84ce22c180db14..237818624e5d2b 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/cum_sum.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/cum_sum.cpp
@@ -5,42 +5,49 @@
 #include "test_utils/cpu_test_utils.hpp"
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include "ngraph_functions/builders.hpp"
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
-using namespace ngraph;
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
-using namespace ov;
-using namespace test;
+using namespace ov::test;
 
 namespace CPULayerTestsDefinitions {
 
 using cumSumParams = std::tuple<
-    ngraph::element::Type, // data precision
-    InputShape, // input shape
-    std::int64_t, // axis
-    bool, // exclusive
-    bool>; // reverse
+    ElementType,     // data precision
+    InputShape,      // input shape
+    std::int64_t,    // axis
+    bool,            // exclusive
+    bool,            // reverse
+    ov::AnyMap>;     // Additional network configuration
 
 class CumSumLayerCPUTest : public testing::WithParamInterface<cumSumParams>,
                            public SubgraphBaseTest, public CPUTestsBase {
 public:
     static std::string getTestCaseName(testing::TestParamInfo<cumSumParams> obj) {
-        ngraph::element::Type inputPrecision;
+        ElementType inputPrecision;
         InputShape shapes;
         std::int64_t axis;
         bool exclusive;
         bool reverse;
-        std::tie(inputPrecision, shapes, axis, exclusive, reverse) = obj.param;
+        ov::AnyMap config;
+        std::tie(inputPrecision, shapes, axis, exclusive, reverse, config) = obj.param;
 
-        std::ostringstream results;
-        results << "IS=" << CommonTestUtils::partialShape2str({shapes.first}) << "_";
-        results << "TS=";
+        std::ostringstream result;
+        result << "IS=" << CommonTestUtils::partialShape2str({shapes.first}) << "_";
+        result << "TS=";
         for (const auto& item : shapes.second) {
-            results << CommonTestUtils::vec2str(item) << "_";
+            result << CommonTestUtils::vec2str(item) << "_";
         }
-        results << "Prc=" << inputPrecision << "_";
-        results << "Axis=" << axis << "_" << (exclusive ? "exclusive" : "") << "_" << (reverse ? "reverse" : "");
-        return results.str();
+        result << "Prc=" << inputPrecision << "_";
+        result << "Axis=" << axis << "_" << (exclusive ? "exclusive" : "") << "_" << (reverse ? "reverse" : "");
+
+        for (auto const& configItem : config) {
+            result << "_configItem=" << configItem.first << "_";
+            configItem.second.print(result);
+        }
+
+        return result.str();
     }
 
 protected:
@@ -50,7 +57,7 @@ class CumSumLayerCPUTest : public testing::WithParamInterface<cumSumParams>,
         std::int64_t axis;
         bool exclusive;
         bool reverse;
-        std::tie(inType, shapes, axis, exclusive, reverse) = this->GetParam();
+        std::tie(inType, shapes, axis, exclusive, reverse, configuration) = this->GetParam();
         if (inType == ElementType::bf16)
             rel_threshold = 0.05f;
 
@@ -58,11 +65,11 @@ class CumSumLayerCPUTest : public testing::WithParamInterface<cumSumParams>,
         init_input_shapes({shapes});
 
         auto params = ngraph::builder::makeDynamicParams(inType, inputDynamicShapes);
-        auto axisNode = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{}, std::vector<int64_t>{axis})->output(0);
+        auto axisNode = ov::op::v0::Constant::create(ElementType::i32, ov::Shape{}, std::vector<int64_t>{axis})->output(0);
         auto cumSum = ngraph::builder::makeCumSum(params[0], axisNode, exclusive, reverse);
 
-        function = std::make_shared<ngraph::Function>(ngraph::NodeVector{ cumSum }, params, "CumSumLayerCPUTest");
-        functionRefs = ngraph::clone_function(*function);
+        function = std::make_shared<ov::Model>(ov::NodeVector{ cumSum }, params, "CumSumLayerCPUTest");
+        functionRefs = ov::clone_model(*function);
     }
 };
 
@@ -71,10 +78,10 @@ TEST_P(CumSumLayerCPUTest, CompareWithRefs) {
     CheckPluginRelatedResults(compiledModel, "CumSum");
 }
 
-const ngraph::element::TypeVector inputPrecision = {
-    ngraph::element::i8,
-    ngraph::element::bf16,
-    ngraph::element::f32
+const std::vector<ElementType> inputPrecision = {
+    ElementType::i8,
+    ElementType::bf16,
+    ElementType::f32
 };
 
 const std::vector<int64_t> axes = { 0, 1, 2, 3, 4, 5, 6 };
@@ -112,12 +119,16 @@ const std::vector<InputShape> inShapes = {
       {{2, 4, 6, 5, 4, 3, 1}, {3, 5, 6, 6, 5, 3, 1}, {5, 7, 4, 6, 3, 7, 2}}}
 };
 
+const ov::AnyMap emptyConfig = {};
+const ov::AnyMap i64Config = {{PluginConfigInternalParams::KEY_CPU_NATIVE_I64, PluginConfigParams::YES}};
+
 const auto testCasesAxis_0 = ::testing::Combine(
     ::testing::ValuesIn(inputPrecision),
     ::testing::ValuesIn(inShapes),
     ::testing::Values(axes[0]),
     ::testing::ValuesIn(exclusive),
-    ::testing::ValuesIn(reverse)
+    ::testing::ValuesIn(reverse),
+    ::testing::Values(emptyConfig)
 );
 
 const auto testCasesAxis_1 = ::testing::Combine(
@@ -125,7 +136,8 @@ const auto testCasesAxis_1 = ::testing::Combine(
     ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 1, inShapes.end())),
     ::testing::Values(axes[1]),
     ::testing::ValuesIn(exclusive),
-    ::testing::ValuesIn(reverse)
+    ::testing::ValuesIn(reverse),
+    ::testing::Values(emptyConfig)
 );
 
 const auto testCasesAxis_2 = ::testing::Combine(
@@ -133,7 +145,8 @@ const auto testCasesAxis_2 = ::testing::Combine(
     ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 2, inShapes.end())),
     ::testing::Values(axes[2]),
     ::testing::ValuesIn(exclusive),
-    ::testing::ValuesIn(reverse)
+    ::testing::ValuesIn(reverse),
+    ::testing::Values(emptyConfig)
 );
 
 const auto testCasesAxis_3 = ::testing::Combine(
@@ -141,7 +154,8 @@ const auto testCasesAxis_3 = ::testing::Combine(
     ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 3, inShapes.end())),
     ::testing::Values(axes[3]),
     ::testing::ValuesIn(exclusive),
-    ::testing::ValuesIn(reverse)
+    ::testing::ValuesIn(reverse),
+    ::testing::Values(emptyConfig)
 );
 
 const auto testCasesAxis_4 = ::testing::Combine(
@@ -149,7 +163,8 @@ const auto testCasesAxis_4 = ::testing::Combine(
     ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 4, inShapes.end())),
     ::testing::Values(axes[4]),
     ::testing::ValuesIn(exclusive),
-    ::testing::ValuesIn(reverse)
+    ::testing::ValuesIn(reverse),
+    ::testing::Values(emptyConfig)
 );
 
 const auto testCasesAxis_5 = ::testing::Combine(
@@ -157,7 +172,8 @@ const auto testCasesAxis_5 = ::testing::Combine(
     ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 5, inShapes.end())),
     ::testing::Values(axes[5]),
     ::testing::ValuesIn(exclusive),
-    ::testing::ValuesIn(reverse)
+    ::testing::ValuesIn(reverse),
+    ::testing::Values(emptyConfig)
 );
 
 const auto testCasesAxis_6 = ::testing::Combine(
@@ -165,7 +181,8 @@ const auto testCasesAxis_6 = ::testing::Combine(
     ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 6, inShapes.end())),
     ::testing::Values(axes[6]),
     ::testing::ValuesIn(exclusive),
-    ::testing::ValuesIn(reverse)
+    ::testing::ValuesIn(reverse),
+    ::testing::Values(emptyConfig)
 );
 
 const auto testCasesAxis_negative = ::testing::Combine(
@@ -173,7 +190,8 @@ const auto testCasesAxis_negative = ::testing::Combine(
     ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 6, inShapes.end())),
     ::testing::ValuesIn(negativeAxes),
     ::testing::ValuesIn(exclusive),
-    ::testing::ValuesIn(reverse)
+    ::testing::ValuesIn(reverse),
+    ::testing::Values(emptyConfig)
 );
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefsNumpy_axis_0, CumSumLayerCPUTest, testCasesAxis_0, CumSumLayerCPUTest::getTestCaseName);
@@ -185,4 +203,41 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefsNumpy_axis_5, CumSumLayerCPUTest,
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefsNumpy_axis_6, CumSumLayerCPUTest, testCasesAxis_6, CumSumLayerCPUTest::getTestCaseName);
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefsNumpy_negative_axes, CumSumLayerCPUTest, testCasesAxis_negative, CumSumLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefsNumpy_axis_0_I64, CumSumLayerCPUTest,
+         ::testing::Combine(
+                 ::testing::Values(ElementType::i64),
+                 ::testing::ValuesIn(inShapes),
+                 ::testing::Values(axes[0]),
+                 ::testing::ValuesIn(exclusive),
+                 ::testing::ValuesIn(reverse),
+                 ::testing::Values(i64Config)),
+         CumSumLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefsNumpy_axis_3_I64, CumSumLayerCPUTest,
+         ::testing::Combine(
+                 ::testing::Values(ElementType::i64),
+                 ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 3, inShapes.end())),
+                 ::testing::Values(axes[3]),
+                 ::testing::ValuesIn(exclusive),
+                 ::testing::ValuesIn(reverse),
+                 ::testing::Values(i64Config)),
+         CumSumLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefsNumpy_axis_6_I64, CumSumLayerCPUTest,
+         ::testing::Combine(
+                 ::testing::Values(ElementType::i64),
+                 ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 6, inShapes.end())),
+                 ::testing::Values(axes[6]),
+                 ::testing::ValuesIn(exclusive),
+                 ::testing::ValuesIn(reverse),
+                 ::testing::Values(i64Config)),
+         CumSumLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefsNumpy_negative_axes_I64, CumSumLayerCPUTest,
+         ::testing::Combine(
+                 ::testing::Values(ElementType::i64),
+                 ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 6, inShapes.end())),
+                 ::testing::ValuesIn(negativeAxes),
+                 ::testing::ValuesIn(exclusive),
+                 ::testing::ValuesIn(reverse),
+                 ::testing::Values(i64Config)),
+         CumSumLayerCPUTest::getTestCaseName);
+
 } // namespace CPULayerTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather.cpp
index 3e4326321e7732..8d6100673bdcd5 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather.cpp
@@ -6,7 +6,9 @@
 #include "ngraph_functions/builders.hpp"
 #include "test_utils/cpu_test_utils.hpp"
 #include <common_test_utils/ov_tensor_utils.hpp>
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
+using namespace InferenceEngine;
 using namespace CPUTestUtils;
 using namespace ov::test;
 
@@ -15,10 +17,11 @@ namespace CPULayerTestsDefinitions {
 typedef std::tuple<
         std::vector<InputShape>,           // Input shapes
         std::tuple<int, int>,              // Axis and Batch dim
-        ElementType,                       // Network precision
+        ElementType,                       // Data precision
+        ElementType,                       // Indices precision
         bool,                              // Is const Axis
         CPUSpecificParams,                 // CPU specific params
-        std::map<std::string, std::string> // Additional config
+        ov::AnyMap                         // Additional config
 > GatherLayerTestCPUParams;
 
 class GatherLayerTestCPU : public testing::WithParamInterface<GatherLayerTestCPUParams>,
@@ -27,12 +30,12 @@ class GatherLayerTestCPU : public testing::WithParamInterface<GatherLayerTestCPU
     static std::string getTestCaseName(testing::TestParamInfo<GatherLayerTestCPUParams> obj) {
         std::vector<InputShape> inputShapes;
         std::tuple<int, int> axisAndBatchDims;
-        ElementType netPrecision;
+        ElementType dataPrc, idxPrc;
         bool isAxisConstant;
         CPUSpecificParams cpuParams;
-        std::map<std::string, std::string> additionalConfig;
+        ov::AnyMap additionalConfig;
 
-        std::tie(inputShapes, axisAndBatchDims, netPrecision, isAxisConstant, cpuParams, additionalConfig) = obj.param;
+        std::tie(inputShapes, axisAndBatchDims, dataPrc, idxPrc, isAxisConstant, cpuParams, additionalConfig) = obj.param;
 
         std::ostringstream result;
         result << "IS=(";
@@ -49,15 +52,16 @@ class GatherLayerTestCPU : public testing::WithParamInterface<GatherLayerTestCPU
         }
         result << "axis=" << std::get<0>(axisAndBatchDims) << "_";
         result << "batchDims=" << std::get<1>(axisAndBatchDims) << "_";
-        result << "netPrc=" << netPrecision << "_";
+        result << "dataPrc=" << dataPrc << "_";
+        result << "idxPrc=" << idxPrc << "_";
         result << "constAx=" << (isAxisConstant ? "True" : "False") << "_";
         result << CPUTestsBase::getTestCaseName(cpuParams);
 
         if (!additionalConfig.empty()) {
             result << "_PluginConf";
             for (auto &item : additionalConfig) {
-                if (item.second == InferenceEngine::PluginConfigParams::YES)
-                    result << "_" << item.first << "=" << item.second;
+                result << "_" << item.first << "=";
+                item.second.print(result);
             }
         }
 
@@ -68,24 +72,24 @@ class GatherLayerTestCPU : public testing::WithParamInterface<GatherLayerTestCPU
     void SetUp() override {
         std::vector<InputShape> inputShapes;
         std::tuple<int, int> axisAndBatchDims;
-        ElementType netPrecision;
+        ElementType dataPrc, idxPrc;
+        const ElementType axisPrc = ElementType::i64;
         bool isAxisConstant;
         CPUSpecificParams cpuParams;
-        std::map<std::string, std::string> additionalConfig;
-        const ElementType intInputsPrecision = ElementType::i64;
 
-        std::tie(inputShapes, axisAndBatchDims, netPrecision, isAxisConstant, cpuParams, additionalConfig) = this->GetParam();
+        std::tie(inputShapes, axisAndBatchDims, dataPrc, idxPrc, isAxisConstant, cpuParams, configuration) = this->GetParam();
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
         axis = std::get<0>(axisAndBatchDims);
         const int batchDims = std::get<1>(axisAndBatchDims);
         targetDevice = CommonTestUtils::DEVICE_CPU;
+
         init_input_shapes(inputShapes);
-        configuration.insert(additionalConfig.begin(), additionalConfig.end());
 
-        if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) {
+        auto bf16Flag = configuration.find(PluginConfigParams::KEY_ENFORCE_BF16);
+        if (bf16Flag != configuration.end() && bf16Flag->second == PluginConfigParams::YES) {
             selectedType = makeSelectedTypeStr(selectedType, ElementType::bf16);
         } else {
-            selectedType = makeSelectedTypeStr(selectedType, netPrecision);
+            selectedType = makeSelectedTypeStr(selectedType, dataPrc);
         }
 
         if (!isAxisConstant) {
@@ -95,26 +99,26 @@ class GatherLayerTestCPU : public testing::WithParamInterface<GatherLayerTestCPU
             }
         }
 
-        ngraph::ParameterVector params {
-            std::make_shared<ov::op::v0::Parameter>(netPrecision, inputDynamicShapes[0]),
-            std::make_shared<ov::op::v0::Parameter>(intInputsPrecision, inputDynamicShapes[1])
+        ov::ParameterVector params {
+            std::make_shared<ov::op::v0::Parameter>(dataPrc, inputDynamicShapes[0]),
+            std::make_shared<ov::op::v0::Parameter>(idxPrc, inputDynamicShapes[1])
         };
         params[0]->set_friendly_name("data");
         params[1]->set_friendly_name("indices");
         if (!isAxisConstant) {
-            params.push_back(std::make_shared<ov::op::v0::Parameter>(intInputsPrecision, inputDynamicShapes[2]));
+            params.push_back(std::make_shared<ov::op::v0::Parameter>(axisPrc, inputDynamicShapes[2]));
             params[2]->set_friendly_name("axis");
         }
         auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(params));
         std::shared_ptr<ov::Node> gatherNode;
         if (isAxisConstant) {
             gatherNode = std::make_shared<ov::op::v8::Gather>(paramOuts[0], paramOuts[1],
-                    ov::op::v0::Constant::create(intInputsPrecision, ov::Shape({1}), { axis }), batchDims);
+                    ov::op::v0::Constant::create(axisPrc, ov::Shape({1}), { axis }), batchDims);
         } else {
             gatherNode = std::make_shared<ov::op::v8::Gather>(paramOuts[0], paramOuts[1], paramOuts[2], batchDims);
         }
 
-        function = makeNgraphFunction(netPrecision, params, gatherNode, "GatherCPU");
+        function = makeNgraphFunction(dataPrc, params, gatherNode, "GatherCPU");
     }
 
     void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
@@ -225,24 +229,31 @@ TEST_P(GatherInPlaceLayerTestCPU, CompareWithRefs) {
 }
 
 namespace {
-const std::vector<ElementType> netPrecisions = {
+const std::vector<ElementType> dataPrcs = {
         ElementType::f32,
         ElementType::bf16,
         ElementType::i8
 };
 
-std::vector<std::map<std::string, std::string>> additionalConfig
-    = {{{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO}},
-       {{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::YES}}};
+const std::vector<ov::AnyMap> bf16Config = {
+        {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}},
+        {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}
+};
+
+const ov::AnyMap i64Config = {
+        {PluginConfigInternalParams::KEY_CPU_NATIVE_I64, PluginConfigParams::YES}
+};
+
+const ov::AnyMap emptyConfig = {};
 
 std::vector<bool> isAxisConst{true, false};
 const CPUSpecificParams cpuParamsRef{{}, {}, {"ref_any"}, "ref_any"};
 
 std::vector<CPUSpecificParams> getCPUInfo() {
     std::vector<CPUSpecificParams> resCPUParams;
-    if (InferenceEngine::with_cpu_x86_avx512f()) {
+    if (with_cpu_x86_avx512f()) {
         resCPUParams.push_back(CPUSpecificParams{{}, {}, {"jit_avx512"}, "jit_avx512"});
-    } else if (InferenceEngine::with_cpu_x86_avx2()) {
+    } else if (with_cpu_x86_avx2()) {
         resCPUParams.push_back(CPUSpecificParams{{}, {}, {"jit_avx2"}, "jit_avx2"});
     } else {
         resCPUParams.push_back(CPUSpecificParams{{}, {}, {"ref"}, "ref"});
@@ -284,10 +295,22 @@ INSTANTIATE_TEST_SUITE_P(smoke_static_1D, GatherLayerTestCPU,
                 ::testing::Combine(
                     ::testing::ValuesIn(staticInputShapes1D),
                     ::testing::Values(std::tuple<int, int>{0, 0}),
-                    ::testing::ValuesIn(netPrecisions),
+                    ::testing::ValuesIn(dataPrcs),
+                    ::testing::Values(ElementType::i32),
                     ::testing::Values(true),
                     ::testing::ValuesIn(getCPUInfo()),
-                    ::testing::Values(additionalConfig[0])),
+                    ::testing::Values(emptyConfig)),
+                GatherLayerTestCPU::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_static_1D_i64, GatherLayerTestCPU,
+                ::testing::Combine(
+                    ::testing::ValuesIn(staticInputShapes1D),
+                    ::testing::Values(std::tuple<int, int>{0, 0}),
+                    ::testing::Values(ElementType::i64),
+                    ::testing::Values(ElementType::i32, ElementType::i64),
+                    ::testing::Values(true),
+                    ::testing::Values(CPUSpecificParams{{}, {}, {"ref_any"}, "ref_any"}),
+                    ::testing::Values(i64Config)),
                 GatherLayerTestCPU::getTestCaseName);
 
 const std::vector<std::vector<ov::test::InputShape>> dynamicInputShapes1D = {
@@ -301,16 +324,28 @@ INSTANTIATE_TEST_SUITE_P(smoke_dynamic_1D, GatherLayerTestCPU,
                 ::testing::Combine(
                     ::testing::ValuesIn(dynamicInputShapes1D),
                     ::testing::Values(std::tuple<int, int>{0, 0}),
-                    ::testing::ValuesIn(netPrecisions),
+                    ::testing::ValuesIn(dataPrcs),
+                    ::testing::Values(ElementType::i32),
                     ::testing::Values(true, false),
                     ::testing::ValuesIn(getCPUInfo()),
-                    ::testing::Values(additionalConfig[0])),
+                    ::testing::Values(emptyConfig)),
+                GatherLayerTestCPU::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_dynamic_1D_i64, GatherLayerTestCPU,
+                ::testing::Combine(
+                    ::testing::ValuesIn(dynamicInputShapes1D),
+                    ::testing::Values(std::tuple<int, int>{0, 0}),
+                    ::testing::Values(ElementType::i64),
+                    ::testing::ValuesIn({ElementType::i32, ElementType::i64}),
+                    ::testing::Values(true, false),
+                    ::testing::Values(CPUSpecificParams{{}, {}, {"ref_any"}, "ref_any"}),
+                    ::testing::Values(i64Config)),
                 GatherLayerTestCPU::getTestCaseName);
 
 ///// 4D JIT /////
 std::vector<std::vector<ov::test::InputShape>> get4DShapesJitStat(int maxBatchDims) {
     std::vector<std::vector<ov::test::InputShape>> result = {};
-    if (InferenceEngine::with_cpu_x86_avx2()) {
+    if (with_cpu_x86_avx2()) {
         if (maxBatchDims == 2) {
             result = {
                 { { {}, { {18, 2, 2, 1} } },   // Static shapes
@@ -369,7 +404,7 @@ std::vector<std::vector<ov::test::InputShape>> get4DShapesJitStat(int maxBatchDi
             throw std::invalid_argument("Invalid test case. Not valid batch dims.");
         }
     } // AVX2
-    if (InferenceEngine::with_cpu_x86_avx512f()) {
+    if (with_cpu_x86_avx512f()) {
         std::vector<std::vector<ov::test::InputShape>> tmp;
         if (maxBatchDims == 2) {
             tmp = {
@@ -436,7 +471,7 @@ std::vector<std::vector<ov::test::InputShape>> get4DShapesJitStat(int maxBatchDi
 
 std::vector<std::tuple<int, int>> get4DAxisBatchJitStat(ov::element::Type type, int maxBatchDims) {
     std::vector<std::tuple<int, int>> result = {};
-    if (InferenceEngine::with_cpu_x86_avx512f()) {
+    if (with_cpu_x86_avx512f()) {
         if (type.size() == 4 || type.size() == 2 || type.size() == 1) {
             if (maxBatchDims == 2)
                 return std::vector<std::tuple<int, int>>{{3, 0}, {3, 1}, {3, 2}, {2, 0}, {2, 1}, {2, 2}};
@@ -445,7 +480,7 @@ std::vector<std::tuple<int, int>> get4DAxisBatchJitStat(ov::element::Type type,
             else
                 throw std::invalid_argument("Invalid test case. Not valid batch dims.");
         }
-    } else if (InferenceEngine::with_cpu_x86_avx2()) {
+    } else if (with_cpu_x86_avx2()) {
         if (type.size() == 4) {
             if (maxBatchDims == 2)
                 return std::vector<std::tuple<int, int>>{{3, 0}, {3, 1}, {3, 2}, {2, 0}, {2, 1}, {2, 2}};
@@ -470,9 +505,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_static_4D_jit32, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesJitStat(2)),
                     ::testing::ValuesIn(get4DAxisBatchJitStat(ElementType::f32, 2)),
                     ::testing::Values(ElementType::f32),
+                    ::testing::Values(ElementType::i32),
                     ::testing::Values(true),
                     ::testing::ValuesIn(getCPUInfo()),
-                    ::testing::ValuesIn(additionalConfig)),
+                    ::testing::ValuesIn(bf16Config)),
                 GatherLayerTestCPU::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_static_4D_jit16, GatherLayerTestCPU,
@@ -480,9 +516,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_static_4D_jit16, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesJitStat(2)),
                     ::testing::ValuesIn(get4DAxisBatchJitStat(ElementType::bf16, 2)),
                     ::testing::Values(ElementType::bf16),
+                    ::testing::Values(ElementType::i32),
                     ::testing::Values(true),
                     ::testing::ValuesIn(getCPUInfo()),
-                    ::testing::Values(additionalConfig[0])),
+                    ::testing::Values(emptyConfig)),
                 GatherLayerTestCPU::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_static_4D_jit8, GatherLayerTestCPU,
@@ -490,9 +527,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_static_4D_jit8, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesJitStat(2)),
                     ::testing::ValuesIn(get4DAxisBatchJitStat(ElementType::i8, 2)),
                     ::testing::Values(ElementType::i8),
+                    ::testing::Values(ElementType::i32),
                     ::testing::Values(true),
                     ::testing::ValuesIn(getCPUInfo()),
-                    ::testing::Values(additionalConfig[0])),
+                    ::testing::Values(emptyConfig)),
                 GatherLayerTestCPU::getTestCaseName);
 
 // batchDims == indicesRank
@@ -501,9 +539,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_static_4D_jit32_Bmax, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesJitStat(3)),
                     ::testing::ValuesIn(get4DAxisBatchJitStat(ElementType::f32, 3)),
                     ::testing::Values(ElementType::f32),
+                    ::testing::Values(ElementType::i32),
                     ::testing::Values(true),
                     ::testing::ValuesIn(getCPUInfo()),
-                    ::testing::ValuesIn(additionalConfig)),
+                    ::testing::ValuesIn(bf16Config)),
                 GatherLayerTestCPU::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_static_4D_jit16_Bmax, GatherLayerTestCPU,
@@ -511,9 +550,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_static_4D_jit16_Bmax, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesJitStat(3)),
                     ::testing::ValuesIn(get4DAxisBatchJitStat(ElementType::bf16, 3)),
                     ::testing::Values(ElementType::bf16),
+                    ::testing::Values(ElementType::i32),
                     ::testing::Values(true),
                     ::testing::ValuesIn(getCPUInfo()),
-                    ::testing::Values(additionalConfig[0])),
+                    ::testing::Values(emptyConfig)),
                 GatherLayerTestCPU::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_static_4D_jit8_Bmax, GatherLayerTestCPU,
@@ -521,15 +561,16 @@ INSTANTIATE_TEST_SUITE_P(smoke_static_4D_jit8_Bmax, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesJitStat(3)),
                     ::testing::ValuesIn(get4DAxisBatchJitStat(ElementType::i8, 3)),
                     ::testing::Values(ElementType::i8),
+                    ::testing::Values(ElementType::i32),
                     ::testing::Values(true),
                     ::testing::ValuesIn(getCPUInfo()),
-                    ::testing::Values(additionalConfig[0])),
+                    ::testing::Values(emptyConfig)),
                 GatherLayerTestCPU::getTestCaseName);
 
 
 std::vector<std::vector<ov::test::InputShape>> get4DShapesJitDyn(int maxBatchDims) {
     std::vector<std::vector<ov::test::InputShape>> result = {};
-    if (InferenceEngine::with_cpu_x86_avx2()) {
+    if (with_cpu_x86_avx2()) {
         if (maxBatchDims == 2) {
             result = {
                 { { { ov::Dimension(5, 15), -1, -1, -1 },                            // Dynamic shape 0
@@ -572,7 +613,7 @@ std::vector<std::vector<ov::test::InputShape>> get4DShapesJitDyn(int maxBatchDim
             throw std::invalid_argument("Invalid test case. Not valid batch dims.");
         }
     }
-    if (InferenceEngine::with_cpu_x86_avx512f()) {
+    if (with_cpu_x86_avx512f()) {
         std::vector<std::vector<ov::test::InputShape>> tmp;
         if (maxBatchDims == 2) {
             tmp = {
@@ -623,7 +664,7 @@ std::vector<std::vector<ov::test::InputShape>> get4DShapesJitDyn(int maxBatchDim
 
 std::vector<std::tuple<int, int>> get4DAxisBatchJitDyn(ov::element::Type type, int maxBatchDims) {
     std::vector<std::tuple<int, int>> result = {};
-    if (InferenceEngine::with_cpu_x86_avx512f()) {
+    if (with_cpu_x86_avx512f()) {
         if (type.size() == 4 || type.size() == 2 || type.size() == 1) {
             if (maxBatchDims == 2)
                 return std::vector<std::tuple<int, int>>{{3, 0}, {3, 1}, {3, 2}};
@@ -632,7 +673,7 @@ std::vector<std::tuple<int, int>> get4DAxisBatchJitDyn(ov::element::Type type, i
             else
                 throw std::invalid_argument("Invalid test case. Not valid batch dims.");
         }
-    } else if (InferenceEngine::with_cpu_x86_avx2()) {
+    } else if (with_cpu_x86_avx2()) {
         if (type.size() == 4 || type.size() == 2 || type.size() == 1) {
             if (maxBatchDims == 2)
                 return std::vector<std::tuple<int, int>>{{3, 0}, {3, 1}, {3, 2}};
@@ -650,9 +691,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_dynamic_4D_jit32, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesJitDyn(2)),
                     ::testing::ValuesIn(get4DAxisBatchJitDyn(ElementType::f32, 2)),
                     ::testing::Values(ElementType::f32),
+                    ::testing::Values(ElementType::i32),
                     ::testing::ValuesIn(isAxisConst),
                     ::testing::ValuesIn(getCPUInfo()),
-                    ::testing::ValuesIn(additionalConfig)),
+                    ::testing::ValuesIn(bf16Config)),
                 GatherLayerTestCPU::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_dynamic_4D_jit16, GatherLayerTestCPU,
@@ -660,9 +702,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_dynamic_4D_jit16, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesJitDyn(2)),
                     ::testing::ValuesIn(get4DAxisBatchJitDyn(ElementType::bf16, 2)),
                     ::testing::Values(ElementType::bf16),
+                    ::testing::Values(ElementType::i32),
                     ::testing::ValuesIn(isAxisConst),
                     ::testing::ValuesIn(getCPUInfo()),
-                    ::testing::Values(additionalConfig[0])),
+                    ::testing::Values(emptyConfig)),
                 GatherLayerTestCPU::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_dynamic_4D_jit8, GatherLayerTestCPU,
@@ -670,9 +713,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_dynamic_4D_jit8, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesJitDyn(2)),
                     ::testing::ValuesIn(get4DAxisBatchJitDyn(ElementType::i8, 2)),
                     ::testing::Values(ElementType::i8),
+                    ::testing::Values(ElementType::i32),
                     ::testing::ValuesIn(isAxisConst),
                     ::testing::ValuesIn(getCPUInfo()),
-                    ::testing::Values(additionalConfig[0])),
+                    ::testing::Values(emptyConfig)),
                 GatherLayerTestCPU::getTestCaseName);
 
 // batchDims == indicesRank
@@ -681,9 +725,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_dynamic_4D_jit32_Bmax, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesJitDyn(3)),
                     ::testing::ValuesIn(get4DAxisBatchJitDyn(ElementType::f32, 3)),
                     ::testing::Values(ElementType::f32),
+                    ::testing::Values(ElementType::i32),
                     ::testing::ValuesIn(isAxisConst),
                     ::testing::ValuesIn(getCPUInfo()),
-                    ::testing::ValuesIn(additionalConfig)),
+                    ::testing::ValuesIn(bf16Config)),
                 GatherLayerTestCPU::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_dynamic_4D_jit16_Bmax, GatherLayerTestCPU,
@@ -691,9 +736,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_dynamic_4D_jit16_Bmax, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesJitDyn(3)),
                     ::testing::ValuesIn(get4DAxisBatchJitDyn(ElementType::bf16, 3)),
                     ::testing::Values(ElementType::bf16),
+                    ::testing::Values(ElementType::i32),
                     ::testing::ValuesIn(isAxisConst),
                     ::testing::ValuesIn(getCPUInfo()),
-                    ::testing::Values(additionalConfig[0])),
+                    ::testing::Values(emptyConfig)),
                 GatherLayerTestCPU::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_dynamic_4D_jit8_Bmax, GatherLayerTestCPU,
@@ -701,16 +747,17 @@ INSTANTIATE_TEST_SUITE_P(smoke_dynamic_4D_jit8_Bmax, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesJitDyn(3)),
                     ::testing::ValuesIn(get4DAxisBatchJitDyn(ElementType::i8, 3)),
                     ::testing::Values(ElementType::i8),
+                    ::testing::Values(ElementType::i32),
                     ::testing::ValuesIn(isAxisConst),
                     ::testing::ValuesIn(getCPUInfo()),
-                    ::testing::Values(additionalConfig[0])),
+                    ::testing::Values(emptyConfig)),
                 GatherLayerTestCPU::getTestCaseName);
 
 
 ///// 4D REFERENCE /////
 std::vector<std::vector<ov::test::InputShape>> get4DShapesRefStat(bool maxBatchDims) {
     std::vector<std::vector<ov::test::InputShape>> result = {};
-    if (InferenceEngine::with_cpu_x86_avx2()) {
+    if (with_cpu_x86_avx2()) {
         if (!maxBatchDims) {
             result = {
                 { { {}, { {10, 2, 9, 9} } },   // Static shapes
@@ -767,7 +814,7 @@ std::vector<std::vector<ov::test::InputShape>> get4DShapesRefStat(bool maxBatchD
             };
         }
     }
-    if (InferenceEngine::with_cpu_x86_avx512f()) {
+    if (with_cpu_x86_avx512f()) {
         std::vector<std::vector<ov::test::InputShape>> tmp;
         if (!maxBatchDims) {
             tmp = {
@@ -832,8 +879,8 @@ std::vector<std::vector<ov::test::InputShape>> get4DShapesRefStat(bool maxBatchD
 
 std::vector<std::tuple<int, int>> get4DAxisBatchRefStat(ov::element::Type type, bool maxBatchDims) {
     std::vector<std::tuple<int, int>> result = {};
-    if (InferenceEngine::with_cpu_x86_avx512f()) {
-        if (type.size() == 4) {
+    if (with_cpu_x86_avx512f()) {
+        if (type.size() == 4 || type.size() == 8) {
             if (!maxBatchDims)
                 return std::vector<std::tuple<int, int>>{{1, 0}, {1, 1}, {0, 0}};
             else
@@ -844,8 +891,8 @@ std::vector<std::tuple<int, int>> get4DAxisBatchRefStat(ov::element::Type type,
             else
                 return std::vector<std::tuple<int, int>>{{2, 2}};
         }
-    } else if (InferenceEngine::with_cpu_x86_avx2()) {
-        if (type.size() == 4) {
+    } else if (with_cpu_x86_avx2()) {
+        if (type.size() == 4 || type.size() == 8) {
             if (!maxBatchDims)
                 return std::vector<std::tuple<int, int>>{{1, 0}, {1, 1}, {0, 0}};
             else
@@ -860,14 +907,26 @@ std::vector<std::tuple<int, int>> get4DAxisBatchRefStat(ov::element::Type type,
     return {};
 }
 
+INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref64, GatherLayerTestCPU,
+                ::testing::Combine(
+                    ::testing::ValuesIn(get4DShapesRefStat(false)),
+                    ::testing::ValuesIn(get4DAxisBatchRefStat(ElementType::i64, false)),
+                    ::testing::Values(ElementType::i64),
+                    ::testing::ValuesIn({ElementType::i32, ElementType::i64}),
+                    ::testing::Values(true),
+                    ::testing::Values(cpuParamsRef),
+                    ::testing::Values(i64Config)),
+                GatherLayerTestCPU::getTestCaseName);
+
 INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref32, GatherLayerTestCPU,
                 ::testing::Combine(
                     ::testing::ValuesIn(get4DShapesRefStat(false)),
                     ::testing::ValuesIn(get4DAxisBatchRefStat(ElementType::f32, false)),
                     ::testing::Values(ElementType::f32),
+                    ::testing::Values(ElementType::i32),
                     ::testing::Values(true),
                     ::testing::Values(cpuParamsRef),
-                    ::testing::ValuesIn(additionalConfig)),
+                    ::testing::ValuesIn(bf16Config)),
                 GatherLayerTestCPU::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref16, GatherLayerTestCPU,
@@ -875,9 +934,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref16, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesRefStat(false)),
                     ::testing::ValuesIn(get4DAxisBatchRefStat(ElementType::bf16, false)),
                     ::testing::Values(ElementType::bf16),
+                    ::testing::Values(ElementType::i32),
                     ::testing::Values(true),
                     ::testing::Values(cpuParamsRef),
-                    ::testing::Values(additionalConfig[0])),
+                    ::testing::Values(emptyConfig)),
                 GatherLayerTestCPU::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref8, GatherLayerTestCPU,
@@ -885,9 +945,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref8, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesRefStat(false)),
                     ::testing::ValuesIn(get4DAxisBatchRefStat(ElementType::i8, false)),
                     ::testing::Values(ElementType::i8),
+                    ::testing::Values(ElementType::i32),
                     ::testing::Values(true),
                     ::testing::Values(cpuParamsRef),
-                    ::testing::Values(additionalConfig[0])),
+                    ::testing::Values(emptyConfig)),
                 GatherLayerTestCPU::getTestCaseName);
 
 // batchDims == indicesRank
@@ -896,9 +957,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref32_Bmax, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesRefStat(true)),
                     ::testing::ValuesIn(get4DAxisBatchRefStat(ElementType::f32, true)),
                     ::testing::Values(ElementType::f32),
+                    ::testing::Values(ElementType::i32),
                     ::testing::Values(true),
                     ::testing::Values(cpuParamsRef),
-                    ::testing::ValuesIn(additionalConfig)),
+                    ::testing::ValuesIn(bf16Config)),
                 GatherLayerTestCPU::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref16_Bmax, GatherLayerTestCPU,
@@ -906,9 +968,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref16_Bmax, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesRefStat(true)),
                     ::testing::ValuesIn(get4DAxisBatchRefStat(ElementType::bf16, true)),
                     ::testing::Values(ElementType::bf16),
+                    ::testing::Values(ElementType::i32),
                     ::testing::Values(true),
                     ::testing::Values(cpuParamsRef),
-                    ::testing::Values(additionalConfig[0])),
+                    ::testing::Values(emptyConfig)),
                 GatherLayerTestCPU::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref8_Bmax, GatherLayerTestCPU,
@@ -916,9 +979,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref8_Bmax, GatherLayerTestCPU,
                     ::testing::ValuesIn(get4DShapesRefStat(true)),
                     ::testing::ValuesIn(get4DAxisBatchRefStat(ElementType::i8, true)),
                     ::testing::Values(ElementType::i8),
+                    ::testing::Values(ElementType::i32),
                     ::testing::Values(true),
                     ::testing::Values(cpuParamsRef),
-                    ::testing::Values(additionalConfig[0])),
+                    ::testing::Values(emptyConfig)),
                 GatherLayerTestCPU::getTestCaseName);
 
 // InPlace
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather_nd.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather_nd.cpp
index 41c01f9228bbe1..a1ac6b1f3ab245 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather_nd.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather_nd.cpp
@@ -5,19 +5,20 @@
 #include <shared_test_classes/single_layer/gather_nd.hpp>
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include "ngraph_functions/builders.hpp"
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
 using namespace InferenceEngine;
-using namespace ov;
-using namespace test;
+using namespace ov::test;
 
 namespace CPULayerTestsDefinitions {
 
 using GatherNDLayerCPUTestParamSet = std::tuple<
         InputShape,                                     // Input shapes
-        std::pair<Shape, std::vector<int>>,             // Indexes shape and values
+        std::pair<ov::Shape, std::vector<int>>,         // Indexes shape and values
         ElementType,                                    // Input element type
         ElementType,                                    // Indices element type
-        int                                             // Batch dims
+        int,                                            // Batch dims
+        ov::AnyMap                                      // Additional config
 >;
 
 class GatherNDLayerCPUTest : public testing::WithParamInterface<GatherNDLayerCPUTestParamSet>,
@@ -25,10 +26,11 @@ class GatherNDLayerCPUTest : public testing::WithParamInterface<GatherNDLayerCPU
 public:
     static std::string getTestCaseName(testing::TestParamInfo<GatherNDLayerCPUTestParamSet> obj) {
         InputShape shapes;
-        std::pair<Shape, std::vector<int>> indexes;
+        std::pair<ov::Shape, std::vector<int>> indexes;
         ElementType dataElementType, idxElementType;
         int batchDims;
-        std::tie(shapes, indexes, dataElementType, idxElementType, batchDims) = obj.param;
+        ov::AnyMap config;
+        std::tie(shapes, indexes, dataElementType, idxElementType, batchDims, config) = obj.param;
 
         std::ostringstream results;
         results << "IS=" << CommonTestUtils::partialShape2str({shapes.first}) << "_";
@@ -39,7 +41,15 @@ class GatherNDLayerCPUTest : public testing::WithParamInterface<GatherNDLayerCPU
         results << "IDXShape=" << CommonTestUtils::vec2str(indexes.first) << "_";
         results << "SRCPrc=" << dataElementType << "_";
         results << "IDXPrc=" << idxElementType << "_";
-        results << "BD=" << batchDims << "_";
+        results << "BD=" << batchDims;
+
+        if (!config.empty()) {
+            results << "_PluginConf";
+            for (const auto& configItem : config) {
+                results << "_" << configItem.first << "=";
+                configItem.second.print(results);
+            }
+        }
 
         return results.str();
 }
@@ -47,19 +57,19 @@ class GatherNDLayerCPUTest : public testing::WithParamInterface<GatherNDLayerCPU
 protected:
     void SetUp() override {
         InputShape shapes;
-        std::pair<Shape, std::vector<int>> indexes;
+        std::pair<ov::Shape, std::vector<int>> indexes;
         ElementType dataElementType, idxElementType;
         int batchDims;
-        std::tie(shapes, indexes, dataElementType, idxElementType, batchDims) = this->GetParam();
+        std::tie(shapes, indexes, dataElementType, idxElementType, batchDims, configuration) = this->GetParam();
 
         targetDevice = CommonTestUtils::DEVICE_CPU;
         init_input_shapes({shapes});
 
         auto params = ngraph::builder::makeDynamicParams(dataElementType, inputDynamicShapes);
-        auto indexes_node = ngraph::opset3::Constant::create(idxElementType, indexes.first, indexes.second);
-        auto gather_nd = std::make_shared<ngraph::opset5::GatherND>(params[0], indexes_node, batchDims);
-        ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(gather_nd)};
-        function = std::make_shared<ngraph::Function>(results, params, "gatherND");
+        auto indexes_node = ov::op::v0::Constant::create(idxElementType, indexes.first, indexes.second);
+        auto gather_nd = std::make_shared<ov::op::v5::GatherND>(params[0], indexes_node, batchDims);
+        ngraph::ResultVector results{std::make_shared<ov::op::v0::Result>(gather_nd)};
+        function = std::make_shared<ov::Model>(results, params, "gatherND");
     }
 };
 
@@ -73,16 +83,16 @@ class GatherND8LayerCPUTest : public testing::WithParamInterface<GatherNDLayerCP
 protected:
     void SetUp() override {
         InputShape shapes;
-        std::pair<Shape, std::vector<int>> indexes;
+        std::pair<ov::Shape, std::vector<int>> indexes;
         ElementType dataElementType, idxElementType;
         int batchDims;
-        std::tie(shapes, indexes, dataElementType, idxElementType, batchDims) = this->GetParam();
+        std::tie(shapes, indexes, dataElementType, idxElementType, batchDims, configuration) = this->GetParam();
 
         targetDevice = CommonTestUtils::DEVICE_CPU;
         init_input_shapes({shapes});
 
         auto params = ngraph::builder::makeDynamicParams(dataElementType, inputDynamicShapes);
-        auto indexes_node = ngraph::opset3::Constant::create(idxElementType, indexes.first, indexes.second);
+        auto indexes_node = ov::op::v0::Constant::create(idxElementType, indexes.first, indexes.second);
         auto gather_nd = std::make_shared<ngraph::opset8::GatherND>(params[0], indexes_node, batchDims);
         ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(gather_nd)};
         function = std::make_shared<ngraph::Function>(results, params, "gatherND");
@@ -120,10 +130,13 @@ const std::vector<InputShape> inputShapesDynamicBD_0 = {
          {{4, 5, 5, 5, 5}, {4, 5, 5, 8, 5}, {10, 8, 5, 5, 5}}},   // target
 };
 
-const std::vector<std::pair<Shape, std::vector<int>>> indexesShapesBD_0 = {
-        std::pair<Shape, std::vector<int>>{{2, 2}, {3, 3, 2, 1}},
-        std::pair<Shape, std::vector<int>>{{1, 2, 3}, {0, 1, 1, 1, 0, 2}},
-        std::pair<Shape, std::vector<int>>{{2, 1, 1, 2}, {0, 2, 1, 1}},
+ov::AnyMap empty_config = {};
+ov::AnyMap config_i64 = {{PluginConfigInternalParams::KEY_CPU_NATIVE_I64, PluginConfigParams::YES}};
+
+const std::vector<std::pair<ov::Shape, std::vector<int>>> indexesShapesBD_0 = {
+        std::pair<ov::Shape, std::vector<int>>{{2, 2}, {3, 3, 2, 1}},
+        std::pair<ov::Shape, std::vector<int>>{{1, 2, 3}, {0, 1, 1, 1, 0, 2}},
+        std::pair<ov::Shape, std::vector<int>>{{2, 1, 1, 2}, {0, 2, 1, 1}},
 };
 
 const auto subset_BD0 = ::testing::Combine(
@@ -131,10 +144,20 @@ const auto subset_BD0 = ::testing::Combine(
         ::testing::ValuesIn(indexesShapesBD_0),
         ::testing::ValuesIn(inputPrecisions),
         ::testing::ValuesIn(indexesPrecisions),
-        ::testing::Values(0));
+        ::testing::Values(0),
+        ::testing::Values(empty_config));
 
 INSTANTIATE_TEST_SUITE_P(smoke_GatherND5DynamicBD_0, GatherNDLayerCPUTest, subset_BD0, GatherNDLayerCPUTest::getTestCaseName);
 INSTANTIATE_TEST_SUITE_P(smoke_GatherND8DynamicBD_0, GatherND8LayerCPUTest, subset_BD0, GatherNDLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_GatherND8DynamicBD_0_I64, GatherND8LayerCPUTest,
+                ::testing::Combine(
+                        ::testing::ValuesIn(inputShapesDynamicBD_0),
+                        ::testing::ValuesIn(indexesShapesBD_0),
+                        ::testing::Values(ElementType::i64),
+                        ::testing::Values(ElementType::i64),
+                        ::testing::Values(0),
+                        ::testing::Values(config_i64)),
+                GatherNDLayerCPUTest::getTestCaseName);
 
 const std::vector<InputShape> inputShapesDynamicBD_1 = {
         {{3, -1, -1},                                            // dynamic
@@ -144,10 +167,10 @@ const std::vector<InputShape> inputShapesDynamicBD_1 = {
          {{3, 5, 5, 5, 5}, {3, 8, 10, 10, 10}, {3, 8, 6, 8, 7}}}, // target
 };
 
-const std::vector<std::pair<Shape, std::vector<int>>> indexesShapesBD_1 = {
-        std::pair<Shape, std::vector<int>>{{3, 2}, {0, 1, 2, 1, 0, 0}},
-        std::pair<Shape, std::vector<int>>{{3, 2, 2}, {0, 1, 1, 1, 0, 2, 0, 1, 1, 1, 0, 2}},
-        std::pair<Shape, std::vector<int>>{{3, 1, 1, 2}, {0, 2, 1, 1, 0, 2}},
+const std::vector<std::pair<ov::Shape, std::vector<int>>> indexesShapesBD_1 = {
+        std::pair<ov::Shape, std::vector<int>>{{3, 2}, {0, 1, 2, 1, 0, 0}},
+        std::pair<ov::Shape, std::vector<int>>{{3, 2, 2}, {0, 1, 1, 1, 0, 2, 0, 1, 1, 1, 0, 2}},
+        std::pair<ov::Shape, std::vector<int>>{{3, 1, 1, 2}, {0, 2, 1, 1, 0, 2}},
 };
 
 const auto subset_BD1 = ::testing::Combine(
@@ -155,10 +178,20 @@ const auto subset_BD1 = ::testing::Combine(
         ::testing::ValuesIn(indexesShapesBD_1),
         ::testing::ValuesIn(inputPrecisions),
         ::testing::ValuesIn(indexesPrecisions),
-        ::testing::Values(0));
+        ::testing::Values(0),
+        ::testing::Values(empty_config));
 
 INSTANTIATE_TEST_SUITE_P(smoke_GatherND5DynamicBD_1, GatherNDLayerCPUTest, subset_BD1, GatherNDLayerCPUTest::getTestCaseName);
 INSTANTIATE_TEST_SUITE_P(smoke_GatherND8DynamicBD_1, GatherND8LayerCPUTest, subset_BD1, GatherNDLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_GatherND8DynamicBD_1_I64, GatherND8LayerCPUTest,
+                ::testing::Combine(
+                        ::testing::ValuesIn(inputShapesDynamicBD_1),
+                        ::testing::ValuesIn(indexesShapesBD_1),
+                        ::testing::Values(ElementType::i64),
+                        ::testing::Values(ElementType::i64),
+                        ::testing::Values(0),
+                        ::testing::Values(config_i64)),
+                GatherNDLayerCPUTest::getTestCaseName);
 
 const std::vector<InputShape> inputShapesDynamicBD_2 = {
         {{2, 2, -1, -1, -1},                                                       // dynamic
@@ -168,10 +201,10 @@ const std::vector<InputShape> inputShapesDynamicBD_2 = {
          {{2, 2, 5, 5, 5}, {2, 2, 10, 10, 5}, {2, 2, 7, 8, 7}}},                   // target
 };
 
-const std::vector<std::pair<Shape, std::vector<int>>> indexesShapesBD_2 = {
-        std::pair<Shape, std::vector<int>>{{2, 2, 3}, {0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}},
-        std::pair<Shape, std::vector<int>>{{2, 2, 2, 3}, {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
-                                                                0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0}},
+const std::vector<std::pair<ov::Shape, std::vector<int>>> indexesShapesBD_2 = {
+        std::pair<ov::Shape, std::vector<int>>{{2, 2, 3}, {0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}},
+        std::pair<ov::Shape, std::vector<int>>{{2, 2, 2, 3}, {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
+                                                              0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0}},
 };
 
 const auto subset_BD2 = ::testing::Combine(
@@ -179,11 +212,20 @@ const auto subset_BD2 = ::testing::Combine(
         ::testing::ValuesIn(indexesShapesBD_2),
         ::testing::ValuesIn(inputPrecisions),
         ::testing::ValuesIn(indexesPrecisions),
-        ::testing::Values(0));
+        ::testing::Values(0),
+        ::testing::Values(empty_config));
 
 INSTANTIATE_TEST_SUITE_P(smoke_GatherND5DynamicBD_2, GatherNDLayerCPUTest, subset_BD2, GatherNDLayerCPUTest::getTestCaseName);
 INSTANTIATE_TEST_SUITE_P(smoke_GatherND8DynamicBD_2, GatherND8LayerCPUTest, subset_BD2, GatherNDLayerCPUTest::getTestCaseName);
-
+INSTANTIATE_TEST_SUITE_P(smoke_GatherND8DynamicBD_2_I64, GatherND8LayerCPUTest,
+                ::testing::Combine(
+                        ::testing::ValuesIn(inputShapesDynamicBD_2),
+                        ::testing::ValuesIn(indexesShapesBD_2),
+                        ::testing::Values(ElementType::i64),
+                        ::testing::Values(ElementType::i64),
+                        ::testing::Values(0),
+                        ::testing::Values(config_i64)),
+                GatherNDLayerCPUTest::getTestCaseName);
 
 }  // namespace
 } // namespace CPULayerTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/activation.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/activation.cpp
index 20a5089288811f..a37d0732d6791d 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/activation.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/activation.cpp
@@ -14,14 +14,17 @@ using namespace ov::test;
 namespace CPULayerTestsDefinitions {
 namespace Activation {
 
+ov::AnyMap empty_config = {};
+
 /* ============= Activation (1D) ============= */
 const auto basicCases3D = ::testing::Combine(
     ::testing::ValuesIn(static_shapes_to_test_representation(basic3D())),
     ::testing::Values(activationShapes()),
     ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes())),
     ::testing::ValuesIn(netPrc()),
-    ::testing::Values(Precision::FP32),
-    ::testing::Values(Precision::FP32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(empty_config),
     ::testing::ValuesIn(filterCPUSpecificParams(cpuParams3D()))
 );
 
@@ -33,8 +36,9 @@ const auto basicCases4D = ::testing::Combine(
     ::testing::Values(activationShapes()),
     ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes())),
     ::testing::ValuesIn(netPrc()),
-    ::testing::Values(Precision::FP32),
-    ::testing::Values(Precision::FP32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(empty_config),
     ::testing::ValuesIn(filterCPUSpecificParams(cpuParams4D()))
 );
 
@@ -46,8 +50,9 @@ const auto basicCases5D = ::testing::Combine(
     ::testing::Values(activationShapes()),
     ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes())),
     ::testing::ValuesIn(netPrc()),
-    ::testing::Values(Precision::FP32),
-    ::testing::Values(Precision::FP32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(empty_config),
     ::testing::ValuesIn(filterCPUSpecificParams(cpuParams5D()))
 );
 
@@ -58,8 +63,9 @@ const auto dynamicMathBasicCases = ::testing::Combine(
     ::testing::Values(activationShapes()),
     ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypesDynamicMath())),
     ::testing::ValuesIn(netPrecisions()),
-    ::testing::Values(Precision::FP32),
-    ::testing::Values(Precision::FP32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(empty_config),
     ::testing::ValuesIn(cpuParamsDynamicMath())
 );
 
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/conversion.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/conversion.cpp
index cf2af8b8a70bba..33a7ef56f39ce6 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/conversion.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/conversion.cpp
@@ -6,7 +6,7 @@
 #include "shared_test_classes/single_layer/conversion.hpp"
 #include "test_utils/cpu_test_utils.hpp"
 
-using namespace InferenceEngine;
+// using namespace InferenceEngine;
 using namespace CPUTestUtils;
 using namespace ngraph::helpers;
 using namespace ov::test;
@@ -29,11 +29,14 @@ std::vector<CPUSpecificParams> memForm4D_dynamic = {
     CPUSpecificParams({nhwc}, {nhwc}, {}, expectedPrimitiveType()),
 };
 
+ov::AnyMap empty_config = {};
+
 INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_Dynamic, ConvertCPULayerTest,
                         ::testing::Combine(
                                 ::testing::ValuesIn(inShapes_4D_dynamic()),
                                 ::testing::ValuesIn(precisions()),
                                 ::testing::ValuesIn(precisions()),
+                                ::testing::Values(empty_config),
                                 ::testing::ValuesIn(memForm4D_dynamic)),
                         ConvertCPULayerTest::getTestCaseName);
 
@@ -47,8 +50,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest, ConvertCPULayerTest,
                                 ::testing::ValuesIn(inShapes_4D_static()),
                                 ::testing::ValuesIn(precisions()),
                                 ::testing::ValuesIn(precisions()),
+                                ::testing::Values(empty_config),
                                 ::testing::ValuesIn(memForm4D_static_common)),
                         ConvertCPULayerTest::getTestCaseName);
 
 }  // namespace Conversion
-}  // namespace CPULayerTestsDefinitions
\ No newline at end of file
+}  // namespace CPULayerTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/reduce.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/reduce.cpp
index 386417bcf0c258..c132c08c6629cf 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/reduce.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/reduce.cpp
@@ -55,30 +55,34 @@ std::vector<CPUSpecificParams> cpuParams_4D = {
 #endif
 };
 
+ov::AnyMap enpty_config = {};
+
 /* ================================ 1.1 No fusion - Arithmetic ================================ */
 const auto params_OneAxis = testing::Combine(
         testing::Combine(
-            testing::ValuesIn(axes()),
-            testing::ValuesIn(opTypes()),
-            testing::ValuesIn(keepDims()),
-            testing::ValuesIn(reductionTypes()),
-            testing::ValuesIn(inpOutPrc()),
-            testing::Values(ElementType::undefined),
-            testing::Values(ElementType::undefined),
-            testing::ValuesIn(inputShapes)),
+                testing::ValuesIn(axes()),
+                testing::ValuesIn(opTypes()),
+                testing::ValuesIn(keepDims()),
+                testing::ValuesIn(reductionTypes()),
+                testing::ValuesIn(inpOutPrc()),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes),
+                testing::Values(enpty_config)),
         testing::Values(emptyCPUSpec),
         testing::Values(emptyFusingSpec));
 
 const auto params_OneAxis_dynamic = testing::Combine(
         testing::Combine(
-            testing::Values(1),                                 // ACL supports reduce against static dims only
-            testing::ValuesIn(opTypes()),
-            testing::ValuesIn(keepDims()),
-            testing::ValuesIn(reductionTypes()),
-            testing::ValuesIn(inpOutPrc()),
-            testing::Values(ElementType::undefined),
-            testing::Values(ElementType::undefined),
-            testing::ValuesIn(inputShapes_dynamic_3dims)),
+                testing::Values(1),                                 // ACL supports reduce against static dims only
+                testing::ValuesIn(opTypes()),
+                testing::ValuesIn(keepDims()),
+                testing::ValuesIn(reductionTypes()),
+                testing::ValuesIn(inpOutPrc()),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_dynamic_3dims),
+                testing::Values(enpty_config)),
         testing::Values(emptyCPUSpec),
         testing::Values(emptyFusingSpec));
 
@@ -91,7 +95,8 @@ const auto params_MultiAxis_4D = testing::Combine(
                 testing::ValuesIn(inpOutPrc()),
                 testing::Values(ElementType::undefined),
                 testing::Values(ElementType::undefined),
-                testing::ValuesIn(inputShapes)),
+                testing::ValuesIn(inputShapes),
+                testing::Values(enpty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
         testing::Values(emptyFusingSpec));
 
@@ -104,20 +109,22 @@ const auto params_MultiAxis_4D_dynamic = testing::Combine(
                 testing::ValuesIn(inpOutPrc()),
                 testing::Values(ElementType::undefined),
                 testing::Values(ElementType::undefined),
-                testing::ValuesIn(inputShapes_dynamic_2dims)),
+                testing::ValuesIn(inputShapes_dynamic_2dims),
+                testing::Values(enpty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
         testing::Values(emptyFusingSpec));
 
 const auto params_Int32 = testing::Combine(
         testing::Combine(
-            testing::ValuesIn(axes()),
-            testing::Values(CommonTestUtils::OpType::VECTOR),
-            testing::ValuesIn(keepDims()),
-            testing::ValuesIn(reductionTypesInt32()),
-            testing::Values(ElementType::i32),
-            testing::Values(ElementType::undefined),
-            testing::Values(ElementType::undefined),
-            testing::ValuesIn(inputShapes_Int32)),
+                testing::ValuesIn(axes()),
+                testing::Values(CommonTestUtils::OpType::VECTOR),
+                testing::ValuesIn(keepDims()),
+                testing::ValuesIn(reductionTypesInt32()),
+                testing::Values(ElementType::i32),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_Int32),
+                testing::Values(enpty_config)),
         testing::Values(emptyCPUSpec),
         testing::Values(emptyFusingSpec));
 
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/transpose.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/transpose.cpp
index 0684aaeaec622d..39a7fcaff419cd 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/transpose.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/transpose.cpp
@@ -6,21 +6,21 @@
 #include "shared_test_classes/single_layer/transpose.hpp"
 #include "test_utils/cpu_test_utils.hpp"
 
-using namespace InferenceEngine;
+// using namespace InferenceEngine;
 using namespace CPUTestUtils;
-using namespace ngraph::helpers;
+// using namespace ngraph::helpers;
 using namespace ov::test;
 
 namespace CPULayerTestsDefinitions {
 namespace Transpose {
-std::map<std::string, std::string> additional_config;
+ov::AnyMap additional_config;
 
 const auto cpuParams_nhwc = CPUSpecificParams {{nhwc}, {}, {}, {}};
 const auto cpuParams_nchw = CPUSpecificParams {{nchw}, {}, {}, {}};
 
-const std::vector<InferenceEngine::Precision> netPrecisions = {
-        Precision::I8,
-        Precision::FP32
+const std::vector<ElementType> netPrecisions = {
+        ElementType::i8,
+        ElementType::f32
 };
 
 const std::vector<std::vector<size_t>> inputOrderPerChannels4D = {
@@ -39,7 +39,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_staticShapes4DC16_Transpose, TransposeLayerCPUTes
                                  ::testing::ValuesIn(dynamicInputShapes4DC16()),
                                  ::testing::ValuesIn(inputOrder4D()),
                                  ::testing::ValuesIn(netPrecisions),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                  ::testing::Values(additional_config),
                                  ::testing::ValuesIn(CPUParams4D)),
                          TransposeLayerCPUTest::getTestCaseName);
@@ -49,7 +48,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_staticShapes4DC32_Transpose, TransposeLayerCPUTes
                                  ::testing::ValuesIn(dynamicInputShapes4DC32()),
                                  ::testing::ValuesIn(inputOrder4D()),
                                  ::testing::ValuesIn(netPrecisions),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                  ::testing::Values(additional_config),
                                  ::testing::ValuesIn(CPUParams4D)),
                          TransposeLayerCPUTest::getTestCaseName);
@@ -59,7 +57,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_dynamicShapes4D_Transpose, TransposeLayerCPUTest,
                                  ::testing::ValuesIn(dynamicInputShapes4D()),
                                  ::testing::ValuesIn(inputOrder4D()),
                                  ::testing::ValuesIn(netPrecisions),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                  ::testing::Values(additional_config),
                                  ::testing::Values(CPUSpecificParams{})),
                          TransposeLayerCPUTest::getTestCaseName);
@@ -69,7 +66,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_staticShapes4DC16_PermutePerChannels, TransposeLa
                                  ::testing::ValuesIn(dynamicInputShapes4DC16()),
                                  ::testing::ValuesIn(inputOrderPerChannels4D),
                                  ::testing::ValuesIn(netPrecisionsPerChannels()),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                  ::testing::Values(additional_config),
                                  ::testing::Values(cpuParams_nhwc)),
                          TransposeLayerCPUTest::getTestCaseName);
@@ -79,7 +75,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_staticShapes4DC32_PermutePerChannels, TransposeLa
                                  ::testing::ValuesIn(dynamicInputShapes4DC32()),
                                  ::testing::ValuesIn(inputOrderPerChannels4D),
                                  ::testing::ValuesIn(netPrecisionsPerChannels()),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                  ::testing::Values(additional_config),
                                  ::testing::Values(cpuParams_nhwc)),
                          TransposeLayerCPUTest::getTestCaseName);
@@ -89,10 +84,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_dynamicShapes4D_PermutePerChannels, TransposeLaye
                                  ::testing::ValuesIn(dynamicInputShapes4D()),
                                  ::testing::ValuesIn(inputOrderPerChannels4D),
                                  ::testing::ValuesIn(netPrecisionsPerChannels()),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                  ::testing::Values(additional_config),
                                  ::testing::Values(CPUSpecificParams{})),
                          TransposeLayerCPUTest::getTestCaseName);
 
 } // namespace Transpose
-} // namespace CPULayerTestsDefinitions
\ No newline at end of file
+} // namespace CPULayerTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/activation.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/activation.cpp
index 794437e770ef37..a7c0ac290cdac6 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/activation.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/activation.cpp
@@ -5,8 +5,8 @@
 #include "single_layer_tests/classes/activation.hpp"
 #include "shared_test_classes/single_layer/activation.hpp"
 #include "test_utils/cpu_test_utils.hpp"
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
-using namespace InferenceEngine;
 using namespace CPUTestUtils;
 using namespace ngraph::helpers;
 using namespace ov::test;
@@ -15,10 +15,10 @@ namespace CPULayerTestsDefinitions  {
 namespace Activation {
 namespace {
 
-const std::vector<Precision>& netPrc() {
-    static const std::vector<Precision> netPrc {
-        Precision::FP32,
-        Precision::BF16,
+const std::vector<ElementType>& netPrc() {
+    static const std::vector<ElementType> netPrc {
+        ElementType::f32,
+        ElementType::bf16,
     };
 
     return netPrc;
@@ -41,13 +41,25 @@ const std::vector<CPUSpecificParams>& cpuParams3Dblocked() {
     return cpuParams3Dblocked;
 }
 
+const std::map<ActivationTypes, std::vector<std::vector<float>>>& activationTypes_i64() {
+    static const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes {
+        {Sqrt, {{}}}
+    };
+
+    return activationTypes;
+}
+
+ov::AnyMap empty_config = {};
+ov::AnyMap config_i64 = {{InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64, InferenceEngine::PluginConfigParams::YES}};
+
 const auto blockedCases3D = ::testing::Combine(
     ::testing::ValuesIn(static_shapes_to_test_representation(basic3D())),
     ::testing::Values(activationShapes()),
     ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypesBlocked())),
     ::testing::ValuesIn(netPrc()),
-    ::testing::Values(Precision::FP32),
-    ::testing::Values(Precision::FP32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(empty_config),
     ::testing::ValuesIn(filterCPUSpecificParams(cpuParams3Dblocked()))
 );
 
@@ -67,13 +79,26 @@ const auto basicCases4D = ::testing::Combine(
     ::testing::Values(activationShapes()),
     ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes())),
     ::testing::ValuesIn(netPrc()),
-    ::testing::Values(Precision::FP32),
-    ::testing::Values(Precision::FP32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(empty_config),
     ::testing::ValuesIn(filterCPUSpecificParams(cpuParams4Dblocked()))
 );
 
 INSTANTIATE_TEST_SUITE_P(smoke_Activation4D_Eltwise_CPU_Blocked, ActivationLayerCPUTest, basicCases4D, ActivationLayerCPUTest::getTestCaseName);
 
+// INSTANTIATE_TEST_SUITE_P(smoke_Activation4D_Eltwise_CPU_Blocked_I64, ActivationLayerCPUTest,
+//         ::testing::Combine(
+//                 ::testing::ValuesIn(static_shapes_to_test_representation(basic4D())),
+//                 ::testing::Values(activationShapes()),
+//                 ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes_i64())),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(config_i64),
+//                 ::testing::ValuesIn(filterCPUSpecificParams(cpuParams4Dblocked()))
+//         ), ActivationLayerCPUTest::getTestCaseName);
+
 /* ============= Activation (3D) ============= */
 const std::vector<CPUSpecificParams>& cpuParams5Dblocked() {
     static const std::vector<CPUSpecificParams> cpuParams5Dblocked {
@@ -88,13 +113,73 @@ const auto basicCases5D = ::testing::Combine(
     ::testing::Values(activationShapes()),
     ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes())),
     ::testing::ValuesIn(netPrc()),
-    ::testing::Values(Precision::FP32),
-    ::testing::Values(Precision::FP32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(empty_config),
     ::testing::ValuesIn(filterCPUSpecificParams(cpuParams5Dblocked()))
 );
 
 INSTANTIATE_TEST_SUITE_P(smoke_Activation5D_Eltwise_CPU_Blocked, ActivationLayerCPUTest, basicCases5D, ActivationLayerCPUTest::getTestCaseName);
 
+// INSTANTIATE_TEST_SUITE_P(smoke_Activation5D_Eltwise_CPU_Blocked_I64, ActivationLayerCPUTest,
+//         ::testing::Combine(
+//                 ::testing::ValuesIn(static_shapes_to_test_representation(basic5D())),
+//                 ::testing::Values(activationShapes()),
+//                 ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes_i64())),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(config_i64),
+//                 ::testing::ValuesIn(filterCPUSpecificParams(cpuParams5Dblocked()))
+//         ), ActivationLayerCPUTest::getTestCaseName);
+
+const auto basicCases3D = ::testing::Combine(
+    ::testing::ValuesIn(static_shapes_to_test_representation(basic3D())),
+    ::testing::Values(activationShapes()),
+    ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes())),
+    ::testing::ValuesIn(netPrc()),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(ElementType::f32),
+    ::testing::Values(empty_config),
+    ::testing::ValuesIn(filterCPUSpecificParams(cpuParams3D()))
+);
+
+// INSTANTIATE_TEST_SUITE_P(smoke_Activation3D_Eltwise_CPU_I64, ActivationLayerCPUTest,
+//         ::testing::Combine(
+//                 ::testing::ValuesIn(static_shapes_to_test_representation(basic3D())),
+//                 ::testing::Values(activationShapes()),
+//                 ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes_i64())),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(config_i64),
+//                 ::testing::ValuesIn(filterCPUSpecificParams(cpuParams3D()))),
+//         ActivationLayerCPUTest::getTestCaseName);
+
+// INSTANTIATE_TEST_SUITE_P(smoke_Activation4D_Eltwise_CPU_I64, ActivationLayerCPUTest,
+//         ::testing::Combine(
+//                 ::testing::ValuesIn(static_shapes_to_test_representation(basic4D())),
+//                 ::testing::Values(activationShapes()),
+//                 ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes_i64())),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(config_i64),
+//                 ::testing::ValuesIn(filterCPUSpecificParams(cpuParams4D()))),
+//         ActivationLayerCPUTest::getTestCaseName);
+
+// INSTANTIATE_TEST_SUITE_P(smoke_Activation5D_Eltwise_CPU, ActivationLayerCPUTest,
+//         ::testing::Combine(
+//                 ::testing::ValuesIn(static_shapes_to_test_representation(basic5D())),
+//                 ::testing::Values(activationShapes()),
+//                 ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes_i64())),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(ElementType::i64),
+//                 ::testing::Values(config_i64),
+//                 ::testing::ValuesIn(filterCPUSpecificParams(cpuParams5D()))),
+//         ActivationLayerCPUTest::getTestCaseName);
+
 } // namespace
 } // namespace Activation
 } // namespace CPULayerTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/conversion.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/conversion.cpp
index 9206eca36d7352..a49c41b039d780 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/conversion.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/conversion.cpp
@@ -5,10 +5,9 @@
 #include "single_layer_tests/classes/conversion.hpp"
 #include "shared_test_classes/single_layer/conversion.hpp"
 #include "test_utils/cpu_test_utils.hpp"
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
-using namespace InferenceEngine;
 using namespace CPUTestUtils;
-using namespace ngraph::helpers;
 using namespace ov::test;
 
 namespace CPULayerTestsDefinitions {
@@ -20,11 +19,15 @@ std::vector<CPUSpecificParams> memForm4D_dynamic = {
     CPUSpecificParams({nChw16c}, {nChw16c}, {}, "ref")
 };
 
+ov::AnyMap empty_config = {};
+ov::AnyMap config_i64 = {{InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64, InferenceEngine::PluginConfigParams::YES}};
+
 INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_blocked_Dynamic, ConvertCPULayerTest,
                         ::testing::Combine(
                                 ::testing::ValuesIn(inShapes_4D_dynamic()),
                                 ::testing::ValuesIn(precisions()),
                                 ::testing::ValuesIn(precisions()),
+                                ::testing::Values(empty_config),
                                 ::testing::ValuesIn(memForm4D_dynamic)),
                         ConvertCPULayerTest::getTestCaseName);
 
@@ -36,9 +39,14 @@ std::vector<CPUSpecificParams> memForm4D_static_blocked = {
     CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
 };
 
-const std::vector<Precision> precisions_floating_point = {
-        Precision::FP32,
-        Precision::BF16
+const std::vector<ElementType> precisions_floating_point = {
+        ElementType::f32,
+        ElementType::bf16
+};
+
+std::vector<CPUSpecificParams> memForm4D_static_common = {
+    CPUSpecificParams({nchw}, {nchw}, {}, {}),
+    CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_Blocked, ConvertCPULayerTest,
@@ -46,6 +54,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_Blocked, ConvertCPULayerTest,
                                 ::testing::ValuesIn(inShapes_4D_blocked),
                                 ::testing::ValuesIn(precisions()),
                                 ::testing::ValuesIn(precisions()),
+                                ::testing::Values(empty_config),
                                 ::testing::ValuesIn(filterCPUSpecificParams(memForm4D_static_blocked))),
                         ConvertCPULayerTest::getTestCaseName);
 
@@ -53,7 +62,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_BOOL_Static, ConvertCPULayerT
                         ::testing::Combine(
                                 ::testing::ValuesIn(inShapes_4D_static()),
                                 ::testing::ValuesIn(precisions_floating_point),
-                                ::testing::Values(Precision::BOOL),
+                                ::testing::Values(ElementType::boolean),
+                                ::testing::Values(empty_config),
                                 ::testing::Values(CPUSpecificParams({nchw}, {nchw}, {}, {}))),
                         ConvertCPULayerTest::getTestCaseName);
 
@@ -61,10 +71,47 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_BOOL_Dynamic, ConvertCPULayer
                         ::testing::Combine(
                                 ::testing::ValuesIn(inShapes_4D_dynamic()),
                                 ::testing::ValuesIn(precisions_floating_point),
-                                ::testing::Values(Precision::BOOL),
+                                ::testing::Values(ElementType::boolean),
+                                ::testing::Values(empty_config),
                                 ::testing::Values(CPUSpecificParams({nchw}, {nchw}, {}, "ref"))),
                         ConvertCPULayerTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_FromI64_Dynamic, ConvertCPULayerTest,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(inShapes_4D_dynamic()),
+                                 ::testing::Values(ElementType::i64),
+                                 ::testing::ValuesIn(precisions()),
+                                 ::testing::Values(config_i64),
+                                 ::testing::ValuesIn(memForm4D_dynamic)),
+                         ConvertCPULayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_ToI64_Dynamic, ConvertCPULayerTest,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(inShapes_4D_dynamic()),
+                                 ::testing::ValuesIn(precisions()),
+                                 ::testing::Values(ElementType::i64),
+                                 ::testing::Values(config_i64),
+                                 ::testing::ValuesIn(memForm4D_dynamic)),
+                         ConvertCPULayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_FromI64, ConvertCPULayerTest,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(inShapes_4D_static()),
+                                 ::testing::Values(ElementType::i64),
+                                 ::testing::ValuesIn(precisions()),
+                                 ::testing::Values(config_i64),
+                                 ::testing::ValuesIn(memForm4D_static_common)),
+                         ConvertCPULayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_ToI64, ConvertCPULayerTest,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(inShapes_4D_static()),
+                                 ::testing::ValuesIn(precisions()),
+                                 ::testing::Values(ElementType::i64),
+                                 ::testing::Values(config_i64),
+                                 ::testing::ValuesIn(memForm4D_static_common)),
+                         ConvertCPULayerTest::getTestCaseName);
+
 }  // namespace
 }  // namespace Conversion
 }  // namespace CPULayerTestsDefinitions
\ No newline at end of file
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/eltwise.cpp
index ac8ff11d1e9b18..bd764b1be122fe 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/eltwise.cpp
@@ -8,6 +8,7 @@
 #include "test_utils/fusing_test_utils.hpp"
 #include <ngraph_functions/builders.hpp>
 #include <common_test_utils/ov_tensor_utils.hpp>
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
@@ -25,6 +26,8 @@ const std::vector<ElementType>& netType() {
         return netType;
 }
 
+ov::AnyMap additional_config_i64 = {{PluginConfigInternalParams::KEY_CPU_NATIVE_I64, PluginConfigParams::YES}};
+
 const std::vector<InputShape>& inShapes_4D_dyn_param_fusing() {
         static const std::vector<InputShape> inShapes_4D_dyn_param_fusing = {
         {
@@ -172,6 +175,22 @@ const auto params_4D_Blocked_Blocked = ::testing::Combine(
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_MemOrder_Blocked_Blocked, EltwiseLayerCPUTest, params_4D_Blocked_Blocked,
                          EltwiseLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_MemOrder_Blocked_I64, EltwiseLayerCPUTest,
+         ::testing::Combine(
+                 ::testing::Combine(
+                         ::testing::ValuesIn(static_shapes_to_test_representation(inShapes_4D())),
+                         ::testing::ValuesIn(eltwiseOpTypesBinInp()),
+                         ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
+                         ::testing::ValuesIn(opTypes()),
+                         ::testing::Values(ElementType::i64),
+                         ::testing::Values(ov::element::undefined),
+                         ::testing::Values(ov::element::undefined),
+                         ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                         ::testing::Values(additional_config_i64)),
+                 ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_Blocked_Blocked())),
+                 ::testing::Values(emptyFusingSpec)),
+         EltwiseLayerCPUTest::getTestCaseName);
+
 const auto params_4D_fusing = ::testing::Combine(
         ::testing::Combine(
                 ::testing::ValuesIn(static_shapes_to_test_representation(inShapes_4D_fusing())),
@@ -255,6 +274,22 @@ const auto params_5D_Blocked_Blocked = ::testing::Combine(
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_MemOrder_Blocked_Blocked, EltwiseLayerCPUTest, params_5D_Blocked_Blocked,
                          EltwiseLayerCPUTest::getTestCaseName);
 
+// INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_MemOrder_Blocked_Blocked_I64, EltwiseLayerCPUTest,
+//         ::testing::Combine(
+//                 ::testing::Combine(
+//                         ::testing::ValuesIn(static_shapes_to_test_representation(inShapes_5D())),
+//                         ::testing::ValuesIn(eltwiseOpTypesBinInp()),
+//                         ::testing::ValuesIn(secondaryInputTypes()),
+//                         ::testing::ValuesIn(opTypes()),
+//                         ::testing::Values(ElementType::i64),
+//                         ::testing::Values(ov::element::undefined),
+//                         ::testing::Values(ov::element::undefined),
+//                         ::testing::Values(CommonTestUtils::DEVICE_CPU),
+//                         ::testing::Values(additional_config_i64)),
+//                 ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_Blocked_Blocked())),
+//                 ::testing::Values(emptyFusingSpec)),
+//         EltwiseLayerCPUTest::getTestCaseName);
+
 const std::vector<fusingSpecificParams> fusingParamsSet_I32{
     fusingMultiplyAddPerChannel
 };
@@ -291,6 +326,22 @@ const auto params_4D_Blocked_Planar = ::testing::Combine(
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_Blocked_Planar, EltwiseLayerCPUTest, params_4D_Blocked_Planar, EltwiseLayerCPUTest::getTestCaseName);
 
+// INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_Blocked_Planar_I64, EltwiseLayerCPUTest,
+//         ::testing::Combine(
+//                 ::testing::Combine(
+//                         ::testing::ValuesIn(static_shapes_to_test_representation(inShapes_4D_Blocked_Planar())),
+//                         ::testing::ValuesIn(eltwiseOpTypesBinInp()),
+//                         ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
+//                         ::testing::ValuesIn(opTypes()),
+//                         ::testing::Values(ElementType::i64),
+//                         ::testing::Values(ov::element::undefined),
+//                         ::testing::Values(ov::element::undefined),
+//                         ::testing::Values(CommonTestUtils::DEVICE_CPU),
+//                         ::testing::Values(additional_config_i64)),
+//                 ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_Blocked_Planar())),
+//                 ::testing::Values(emptyFusingSpec)),
+//         EltwiseLayerCPUTest::getTestCaseName);
+
 const auto params_4D_Planar_Blocked = ::testing::Combine(
         ::testing::Combine(
                 ::testing::ValuesIn(static_shapes_to_test_representation(inShapes_4D_Planar_Blocked())),
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/reduce.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/reduce.cpp
index 3e2384c5bfa420..190eac8a13f29f 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/reduce.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/reduce.cpp
@@ -3,13 +3,10 @@
 //
 
 #include "single_layer_tests/classes/reduce.hpp"
-#include "shared_test_classes/single_layer/reduce_ops.hpp"
-#include "test_utils/cpu_test_utils.hpp"
-#include "test_utils/fusing_test_utils.hpp"
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
-using namespace ngraph::helpers;
 using namespace ov::test;
 
 
@@ -17,27 +14,35 @@ namespace CPULayerTestsDefinitions {
 namespace Reduce {
 namespace {
 
-std::vector<std::vector<ov::test::InputShape>> inputShapes_dyn = {
+std::vector<std::vector<InputShape>> inputShapes = {
+        {{{}, {{2, 19, 2, 9}}}},
+};
+
+std::vector<std::vector<InputShape>> inputShapes_5D = {
+        {{{}, {{2, 19, 2, 2, 9}}}},
+};
+
+std::vector<std::vector<InputShape>> inputShapes_dyn = {
     {{{{1, 5}, 19, {1, 5}, {1, 10}}, {{2, 19, 2, 2}, {2, 19, 2, 9}}}},
 };
 
-std::vector<std::vector<ov::test::InputShape>> inputShapes_5D_dyn = {
+std::vector<std::vector<InputShape>> inputShapes_5D_dyn = {
     {{{{1, 5}, 19, {1, 5}, {1, 5}, {1, 5}}, {{2, 19, 2, 2, 2}, {2, 19, 3, 2, 2}}}},
 };
 
-std::vector<std::vector<ov::test::InputShape>> inputShapes_6D_dyn = {
+std::vector<std::vector<InputShape>> inputShapes_6D_dyn = {
     {{{{1, 5}, 19, {1, 5}, {1, 5}, {1, 5}, {1, 5}}, {{2, 19, 2, 2, 2, 2}, {2, 19, 2, 2, 3, 2}}}},
 };
 
-std::vector<std::vector<ov::test::InputShape>> inputShapes_Int32_dyn = {
+std::vector<std::vector<InputShape>> inputShapes_Int32_dyn = {
     {{{{1, 5}, 19, {1, 5}, {1, 10}}, {{2, 19, 2, 2}, {2, 19, 2, 3}}}},
 };
 
-std::vector<std::vector<ov::test::InputShape>> inputShapes_SmallChannel_dyn = {
+std::vector<std::vector<InputShape>> inputShapes_SmallChannel_dyn = {
     {{{{1, 5}, 3, {1, 5}, {1, 10}}, {{2, 3, 2, 2}, {2, 3, 2, 9}}}},
 };
 
-std::vector<std::vector<ov::test::InputShape>> inputShapes_SingleBatch_dyn = {
+std::vector<std::vector<InputShape>> inputShapes_SingleBatch_dyn = {
     {{{{1, 5}, 19, {1, 5}, {1, 10}}, {{1, 19, 2, 2}, {1, 19, 2, 9}}}},
 };
 
@@ -79,22 +84,44 @@ const std::vector<std::vector<int>> axesHW = {
         {2, 3}
 };
 
+std::vector<CPUSpecificParams> cpuParams_4D_I64 = {
+        CPUSpecificParams({nChw16c}, {nChw8c}, {}, {}),
+        CPUSpecificParams({nchw}, {nchw}, {}, {}),
+        CPUSpecificParams({nhwc}, {nhwc}, {}, {})
+};
+
 std::vector<CPUSpecificParams> cpuParams_5D = {
         CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {}),
         CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}),
         CPUSpecificParams({ncdhw}, {ncdhw}, {}, {}),
 };
 
+std::vector<CPUSpecificParams> cpuParams_5D_I64 = {
+        CPUSpecificParams({nCdhw8c}, {nCdhw8c}, {}, {}),
+        CPUSpecificParams({ncdhw}, {ncdhw}, {}, {}),
+        CPUSpecificParams({ndhwc}, {ndhwc}, {}, {})
+};
+
 std::vector<CPUSpecificParams> cpuParams_HybridLayout_4D = {
         CPUSpecificParams({nChw16c}, {}, {}, {}),
         CPUSpecificParams({nhwc}, {}, {}, {})
 };
 
+std::vector<CPUSpecificParams> cpuParams_HybridLayout_4D_I64 = {
+        CPUSpecificParams({nChw8c}, {}, {}, {}),
+        CPUSpecificParams({nhwc}, {}, {}, {})
+};
+
 std::vector<CPUSpecificParams> cpuParams_HybridLayout_5D = {
         CPUSpecificParams({nCdhw16c}, {}, {}, {}),
         CPUSpecificParams({ndhwc}, {}, {}, {})
 };
 
+std::vector<CPUSpecificParams> cpuParams_HybridLayout_5D_I64 = {
+        CPUSpecificParams({nCdhw16c}, {}, {}, {}),
+        CPUSpecificParams({ndhwc}, {}, {}, {})
+};
+
 std::vector<CPUSpecificParams> cpuParams_NHWC_4D = {
         CPUSpecificParams({nhwc}, {nhwc}, {}, {})
 };
@@ -120,6 +147,14 @@ const std::vector<fusingSpecificParams> fusingParamsSet {
         fusingScaleShift
 };
 
+const std::vector<fusingSpecificParams> fusingParamsSet_I64 {
+        /* FQ */
+        fusingFakeQuantizePerChannelRelu,
+        fusingFakeQuantizePerTensorRelu,
+        /* another patterns */
+        fusingScaleShift
+};
+
 // Exclude cases of fusingFakeQuantizePerChannelRelu, where FQ for non-1 channel fallbacks
 // to decomposed ngraph reference implementation, so such fusing tests are N/A
 const std::vector<fusingSpecificParams> fusingParamsSet_KeepNoDims {
@@ -132,16 +167,42 @@ const std::vector<fusingSpecificParams> fusingParamsSet_KeepNoDims {
         fusingScaleShift
 };
 
+const std::vector<fusingSpecificParams> fusingParamsSet_KeepNoDims_I64 {
+        /* FQ */
+        fusingFakeQuantizePerTensorRelu,
+        /* another patterns */
+        fusingScaleShift
+};
+
+ov::AnyMap empty_config = {};
+ov::AnyMap config_i64 = {{PluginConfigInternalParams::KEY_CPU_NATIVE_I64, PluginConfigParams::YES}};
+
+/* ================================ 1.1 No fusion - Arithmetic ================================ */
 const auto params_OneAxis = testing::Combine(
         testing::Combine(
-            testing::ValuesIn(axes()),
-            testing::ValuesIn(opTypes()),
-            testing::ValuesIn(keepDims()),
-            testing::ValuesIn(reductionTypes()),
-            testing::ValuesIn(inpOutPrc()),
-            testing::Values(ElementType::undefined),
-            testing::Values(ElementType::undefined),
-            testing::ValuesIn(inputShapes_dyn)),
+                testing::ValuesIn(axes()),
+                testing::ValuesIn(opTypes()),
+                testing::ValuesIn(keepDims()),
+                testing::ValuesIn(reductionTypes()),
+                testing::ValuesIn(inpOutPrc()),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_dyn),
+                testing::Values(empty_config)),
+        testing::Values(emptyCPUSpec),
+        testing::Values(emptyFusingSpec));
+
+const auto params_OneAxis_I64 = testing::Combine(
+        testing::Combine(
+                testing::ValuesIn(axes()),
+                testing::ValuesIn(opTypes()),
+                testing::ValuesIn(keepDims()),
+                testing::ValuesIn(reductionTypes()),
+                testing::Values(ElementType::i64, ElementType::u64),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes),
+                testing::Values(config_i64)),
         testing::Values(emptyCPUSpec),
         testing::Values(emptyFusingSpec));
 
@@ -154,20 +215,36 @@ const auto params_MultiAxis_4D = testing::Combine(
                 testing::ValuesIn(inpOutPrc()),
                 testing::Values(ElementType::undefined),
                 testing::Values(ElementType::undefined),
-                testing::ValuesIn(inputShapes_dyn)),
+                testing::ValuesIn(inputShapes_dyn),
+                testing::Values(empty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
         testing::Values(emptyFusingSpec));
 
+const auto params_MultiAxis_4D_I64 = testing::Combine(
+        testing::Combine(
+                testing::ValuesIn(axesND()),
+                testing::Values(CommonTestUtils::OpType::VECTOR),
+                testing::Values(true),
+                testing::ValuesIn(reductionTypes()),
+                testing::Values(ElementType::i64),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes),
+                testing::Values(config_i64)),
+        testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_I64, ElementType::i64)),
+        testing::Values(emptyFusingSpec));
+
 const auto params_Int32 = testing::Combine(
         testing::Combine(
-            testing::ValuesIn(axes()),
-            testing::Values(CommonTestUtils::OpType::VECTOR),
-            testing::ValuesIn(keepDims()),
-            testing::ValuesIn(reductionTypesInt32()),
-            testing::Values(ElementType::i32),
-            testing::Values(ElementType::undefined),
-            testing::Values(ElementType::undefined),
-            testing::ValuesIn(inputShapes_Int32_dyn)),
+                testing::ValuesIn(axes()),
+                testing::Values(CommonTestUtils::OpType::VECTOR),
+                testing::ValuesIn(keepDims()),
+                testing::ValuesIn(reductionTypesInt32()),
+                testing::ValuesIn({ElementType::i32, ElementType::i64}),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_Int32_dyn),
+                testing::Values(empty_config)),
         testing::Values(emptyCPUSpec),
         testing::Values(emptyFusingSpec));
 
@@ -178,6 +255,13 @@ INSTANTIATE_TEST_SUITE_P(
         ReduceCPULayerTest::getTestCaseName
 );
 
+INSTANTIATE_TEST_SUITE_P(
+        smoke_Reduce_OneAxis_CPU_I64,
+        ReduceCPULayerTest,
+        params_OneAxis_I64,
+        ReduceCPULayerTest::getTestCaseName
+);
+
 INSTANTIATE_TEST_SUITE_P(
         smoke_Reduce_MultiAxis_4D_CPU,
         ReduceCPULayerTest,
@@ -185,6 +269,13 @@ INSTANTIATE_TEST_SUITE_P(
         ReduceCPULayerTest::getTestCaseName
 );
 
+INSTANTIATE_TEST_SUITE_P(
+        smoke_Reduce_MultiAxis_4D_CPU_I64,
+        ReduceCPULayerTest,
+        params_MultiAxis_4D_I64,
+        ReduceCPULayerTest::getTestCaseName
+);
+
 INSTANTIATE_TEST_SUITE_P(
         smoke_Reduce_Int32_CPU,
         ReduceCPULayerTest,
@@ -201,36 +292,81 @@ const auto params_MultiAxis_5D = testing::Combine(
                 testing::ValuesIn(inpOutPrc()),
                 testing::Values(ElementType::undefined),
                 testing::Values(ElementType::undefined),
-                testing::ValuesIn(inputShapes_5D_dyn)),
+                testing::ValuesIn(inputShapes_5D_dyn),
+                testing::Values(empty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
         testing::Values(emptyFusingSpec));
 
+const auto params_MultiAxis_5D_I64 = testing::Combine(
+        testing::Combine(
+                testing::ValuesIn(axes5D),
+                testing::Values(CommonTestUtils::OpType::VECTOR),
+                testing::Values(true),
+                testing::ValuesIn(reductionTypes()),
+                testing::Values(ElementType::i64),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_5D),
+                testing::Values(config_i64)),
+        testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_I64, ElementType::i64)),
+        testing::Values(emptyFusingSpec));
+
 const auto params_MultiAxis_4D_Hybrid = testing::Combine(
         testing::Combine(
-            testing::ValuesIn(axesND()),
-            testing::Values(CommonTestUtils::OpType::VECTOR),
-            testing::Values(false),
-            testing::ValuesIn(reductionTypes()),
-            testing::ValuesIn(inpOutPrc()),
-            testing::Values(ElementType::undefined),
-            testing::Values(ElementType::undefined),
-            testing::ValuesIn(inputShapes_dyn)),
+                testing::ValuesIn(axesND()),
+                testing::Values(CommonTestUtils::OpType::VECTOR),
+                testing::Values(false),
+                testing::ValuesIn(reductionTypes()),
+                testing::ValuesIn(inpOutPrc()),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_dyn),
+                testing::Values(empty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D)),
         testing::Values(emptyFusingSpec));
 
+const auto params_MultiAxis_4D_Hybrid_I64 = testing::Combine(
+        testing::Combine(
+                testing::ValuesIn(axesND()),
+                testing::Values(CommonTestUtils::OpType::VECTOR),
+                testing::Values(false),
+                testing::ValuesIn(reductionTypes()),
+                testing::Values(ElementType::i64),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes),
+                testing::Values(config_i64)),
+        testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D_I64, ElementType::i64)),
+        testing::Values(emptyFusingSpec));
+
 const auto params_MultiAxis_5D_Hybrid = testing::Combine(
         testing::Combine(
-            testing::ValuesIn(axes5D),
-            testing::Values(CommonTestUtils::OpType::VECTOR),
-            testing::Values(false),
-            testing::ValuesIn(reductionTypes()),
-            testing::ValuesIn(inpOutPrc()),
-            testing::Values(ElementType::undefined),
-            testing::Values(ElementType::undefined),
-            testing::ValuesIn(inputShapes_5D_dyn)),
+                testing::ValuesIn(axes5D),
+                testing::Values(CommonTestUtils::OpType::VECTOR),
+                testing::Values(false),
+                testing::ValuesIn(reductionTypes()),
+                testing::ValuesIn(inpOutPrc()),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_5D_dyn),
+                testing::Values(empty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)),
         testing::Values(emptyFusingSpec));
 
+const auto params_MultiAxis_5D_Hybrid_I64 = testing::Combine(
+        testing::Combine(
+                testing::ValuesIn(axes5D),
+                testing::Values(CommonTestUtils::OpType::VECTOR),
+                testing::Values(false),
+                testing::ValuesIn(reductionTypes()),
+                testing::Values(ElementType::i64),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_5D),
+                testing::Values(config_i64)),
+        testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D_I64, ElementType::i64)),
+        testing::Values(emptyFusingSpec));
+
 const auto params_MultiAxis_6D = testing::Combine(
         testing::Combine(
                 testing::ValuesIn(axes6D),
@@ -240,7 +376,8 @@ const auto params_MultiAxis_6D = testing::Combine(
                 testing::ValuesIn(inpOutPrc()),
                 testing::Values(ElementType::undefined),
                 testing::Values(ElementType::undefined),
-                testing::ValuesIn(inputShapes_6D_dyn)),
+                testing::ValuesIn(inputShapes_6D_dyn),
+                testing::Values(empty_config)),
         testing::Values(emptyCPUSpec),
         testing::Values(emptyFusingSpec));
 
@@ -253,7 +390,8 @@ const auto params_NHWC_SmallChannel = testing::Combine(
                 testing::ValuesIn(inpOutPrc()),
                 testing::Values(ElementType::undefined),
                 testing::Values(ElementType::undefined),
-                testing::ValuesIn(inputShapes_SmallChannel_dyn)),
+                testing::ValuesIn(inputShapes_SmallChannel_dyn),
+                testing::Values(empty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)),
         testing::Values(emptyFusingSpec));
 
@@ -266,7 +404,8 @@ const auto params_SingleBatch = testing::Combine(
                 testing::ValuesIn(inpOutPrc()),
                 testing::Values(ElementType::undefined),
                 testing::Values(ElementType::undefined),
-                testing::ValuesIn(inputShapes_SingleBatch_dyn)),
+                testing::ValuesIn(inputShapes_SingleBatch_dyn),
+                testing::Values(empty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)),
         testing::Values(emptyFusingSpec));
 
@@ -277,6 +416,13 @@ INSTANTIATE_TEST_SUITE_P(
         ReduceCPULayerTest::getTestCaseName
 );
 
+INSTANTIATE_TEST_SUITE_P(
+        smoke_Reduce_MultiAxis_5D_CPU_I64,
+        ReduceCPULayerTest,
+        params_MultiAxis_5D_I64,
+        ReduceCPULayerTest::getTestCaseName
+);
+
 INSTANTIATE_TEST_SUITE_P(
         smoke_Reduce_MultiAxis_4D_Hybrid_CPU,
         ReduceCPULayerTest,
@@ -284,6 +430,13 @@ INSTANTIATE_TEST_SUITE_P(
         ReduceCPULayerTest::getTestCaseName
 );
 
+INSTANTIATE_TEST_SUITE_P(
+        smoke_Reduce_MultiAxis_4D_Hybrid_CPU_I64,
+        ReduceCPULayerTest,
+        params_MultiAxis_4D_Hybrid_I64,
+        ReduceCPULayerTest::getTestCaseName
+);
+
 INSTANTIATE_TEST_SUITE_P(
         smoke_Reduce_MultiAxis_5D_Hybrid_CPU,
         ReduceCPULayerTest,
@@ -291,6 +444,13 @@ INSTANTIATE_TEST_SUITE_P(
         ReduceCPULayerTest::getTestCaseName
 );
 
+INSTANTIATE_TEST_SUITE_P(
+        smoke_Reduce_MultiAxis_5D_Hybrid_CPU_I64,
+        ReduceCPULayerTest,
+        params_MultiAxis_5D_Hybrid_I64,
+        ReduceCPULayerTest::getTestCaseName
+);
+
 INSTANTIATE_TEST_SUITE_P(
         smoke_Reduce_MultiAxis_6D_CPU,
         ReduceCPULayerTest,
@@ -315,14 +475,15 @@ INSTANTIATE_TEST_SUITE_P(
 /* ================================ 1.2 No fusion - Logical ================================ */
 const auto params_OneAxis_Logical = testing::Combine(
         testing::Combine(
-            testing::ValuesIn(axes()),
-            testing::ValuesIn(opTypes()),
-            testing::ValuesIn(keepDims()),
-            testing::ValuesIn((reductionLogicalTypes)),
-            testing::Values(ElementType::boolean),
-            testing::Values(ElementType::undefined),
-            testing::Values(ElementType::undefined),
-            testing::ValuesIn(inputShapes_dyn)),
+                testing::ValuesIn(axes()),
+                testing::ValuesIn(opTypes()),
+                testing::ValuesIn(keepDims()),
+                testing::ValuesIn((reductionLogicalTypes)),
+                testing::Values(ElementType::boolean),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_dyn),
+                testing::Values(empty_config)),
         testing::Values(emptyCPUSpec),
         testing::Values(emptyFusingSpec));
 
@@ -335,7 +496,8 @@ const auto params_MultiAxis_4D_Logical = testing::Combine(
                 testing::Values(ElementType::boolean),
                 testing::Values(ElementType::undefined),
                 testing::Values(ElementType::undefined),
-                testing::ValuesIn(inputShapes_dyn)),
+                testing::ValuesIn(inputShapes_dyn),
+                testing::Values(empty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
         testing::Values(emptyFusingSpec));
 
@@ -348,33 +510,36 @@ const auto params_MultiAxis_5D_Logical = testing::Combine(
                 testing::Values(ElementType::boolean),
                 testing::Values(ElementType::undefined),
                 testing::Values(ElementType::undefined),
-                testing::ValuesIn(inputShapes_5D_dyn)),
+                testing::ValuesIn(inputShapes_5D_dyn),
+                testing::Values(empty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
         testing::Values(emptyFusingSpec));
 
 const auto params_MultiAxis_4D_Hybrid_Logical = testing::Combine(
         testing::Combine(
-            testing::ValuesIn(axesND()),
-            testing::Values(CommonTestUtils::OpType::VECTOR),
-            testing::Values(false),
-            testing::ValuesIn((reductionLogicalTypes)),
-            testing::Values(ElementType::boolean),
-            testing::Values(ElementType::undefined),
-            testing::Values(ElementType::undefined),
-            testing::ValuesIn(inputShapes_dyn)),
+                testing::ValuesIn(axesND()),
+                testing::Values(CommonTestUtils::OpType::VECTOR),
+                testing::Values(false),
+                testing::ValuesIn((reductionLogicalTypes)),
+                testing::Values(ElementType::boolean),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_dyn),
+                testing::Values(empty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D)),
         testing::Values(emptyFusingSpec));
 
 const auto params_MultiAxis_5D_Hybrid_Logical = testing::Combine(
         testing::Combine(
-            testing::ValuesIn(axes5D),
-            testing::Values(CommonTestUtils::OpType::VECTOR),
-            testing::Values(false),
-            testing::ValuesIn((reductionLogicalTypes)),
-            testing::Values(ElementType::boolean),
-            testing::Values(ElementType::undefined),
-            testing::Values(ElementType::undefined),
-            testing::ValuesIn(inputShapes_5D_dyn)),
+                testing::ValuesIn(axes5D),
+                testing::Values(CommonTestUtils::OpType::VECTOR),
+                testing::Values(false),
+                testing::ValuesIn((reductionLogicalTypes)),
+                testing::Values(ElementType::boolean),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_5D_dyn),
+                testing::Values(empty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)),
         testing::Values(emptyFusingSpec));
 
@@ -387,7 +552,8 @@ const auto params_MultiAxis_6D_Logical = testing::Combine(
                 testing::Values(ElementType::boolean),
                 testing::Values(ElementType::undefined),
                 testing::Values(ElementType::undefined),
-                testing::ValuesIn(inputShapes_6D_dyn)),
+                testing::ValuesIn(inputShapes_6D_dyn),
+                testing::Values(empty_config)),
         testing::Values(emptyCPUSpec),
         testing::Values(emptyFusingSpec));
 
@@ -436,17 +602,32 @@ INSTANTIATE_TEST_SUITE_P(
 /* ================================ 2.1 Fusion - KeepDims ================================ */
 const auto params_OneAxis_fusing = testing::Combine(
         testing::Combine(
-            testing::ValuesIn(axes()),
-            testing::ValuesIn(opTypes()),
-            testing::Values(true),
-            testing::ValuesIn(reductionTypesFusing),
-            testing::ValuesIn(inpOutPrc()),
-            testing::Values(ElementType::undefined),
-            testing::Values(ElementType::undefined),
-            testing::ValuesIn(inputShapes_dyn)),
+                testing::ValuesIn(axes()),
+                testing::ValuesIn(opTypes()),
+                testing::Values(true),
+                testing::ValuesIn(reductionTypesFusing),
+                testing::ValuesIn(inpOutPrc()),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_dyn),
+                testing::Values(empty_config)),
         testing::Values(emptyCPUSpec),
         testing::ValuesIn(fusingParamsSet));
 
+const auto params_OneAxis_fusing_I64 = testing::Combine(
+        testing::Combine(
+                testing::ValuesIn(axes()),
+                testing::ValuesIn(opTypes()),
+                testing::Values(true),
+                testing::ValuesIn(reductionTypesFusing),
+                testing::Values(ElementType::i64, ElementType::u64),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes),
+                testing::Values(config_i64)),
+        testing::Values(emptyCPUSpec),
+        testing::ValuesIn(fusingParamsSet_I64));
+
 const auto params_MultiAxis_4D_fusing = testing::Combine(
         testing::Combine(
                 testing::ValuesIn(axesND()),
@@ -456,10 +637,25 @@ const auto params_MultiAxis_4D_fusing = testing::Combine(
                 testing::ValuesIn(inpOutPrc()),
                 testing::Values(ElementType::undefined),
                 testing::Values(ElementType::undefined),
-                testing::ValuesIn(inputShapes_dyn)),
+                testing::ValuesIn(inputShapes_dyn),
+                testing::Values(empty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
         testing::ValuesIn(fusingParamsSet));
 
+const auto params_MultiAxis_4D_fusing_I64 = testing::Combine(
+        testing::Combine(
+                testing::ValuesIn(axesND()),
+                testing::Values(CommonTestUtils::OpType::VECTOR),
+                testing::Values(true),
+                testing::ValuesIn(reductionTypesFusing),
+                testing::Values(ElementType::i64),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes),
+                testing::Values(config_i64)),
+        testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_I64, ElementType::i64)),
+        testing::ValuesIn(fusingParamsSet_I64));
+
 const auto params_MultiAxis_5D_fusing = testing::Combine(
         testing::Combine(
                 testing::ValuesIn(axes5D),
@@ -469,7 +665,8 @@ const auto params_MultiAxis_5D_fusing = testing::Combine(
                 testing::ValuesIn(inpOutPrc()),
                 testing::Values(ElementType::undefined),
                 testing::Values(ElementType::undefined),
-                testing::ValuesIn(inputShapes_5D_dyn)),
+                testing::ValuesIn(inputShapes_5D_dyn),
+                testing::Values(empty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
         testing::ValuesIn(fusingParamsSet));
 
@@ -480,6 +677,13 @@ INSTANTIATE_TEST_SUITE_P(
         ReduceCPULayerTest::getTestCaseName
 );
 
+INSTANTIATE_TEST_SUITE_P(
+        smoke_Reduce_OneAxis_fusing_CPU_I64,
+        ReduceCPULayerTest,
+        params_OneAxis_fusing_I64,
+        ReduceCPULayerTest::getTestCaseName
+);
+
 INSTANTIATE_TEST_SUITE_P(
         smoke_Reduce_MultiAxis_4D_fusing_CPU,
         ReduceCPULayerTest,
@@ -487,6 +691,13 @@ INSTANTIATE_TEST_SUITE_P(
         ReduceCPULayerTest::getTestCaseName
 );
 
+INSTANTIATE_TEST_SUITE_P(
+        smoke_Reduce_MultiAxis_4D_fusing_CPU_I64,
+        ReduceCPULayerTest,
+        params_MultiAxis_4D_fusing_I64,
+        ReduceCPULayerTest::getTestCaseName
+);
+
 INSTANTIATE_TEST_SUITE_P(
         smoke_Reduce_MultiAxis_5D_fusing_CPU,
         ReduceCPULayerTest,
@@ -497,40 +708,57 @@ INSTANTIATE_TEST_SUITE_P(
 /* ================================ 2.2 Fusion - KeepNoDims ================================ */
 const auto params_OneAxis_fusing_KeepNoDims = testing::Combine(
         testing::Combine(
-            testing::ValuesIn(axes()),
-            testing::ValuesIn(opTypes()),
-            testing::Values(false),
-            testing::ValuesIn(reductionTypesFusing),
-            testing::ValuesIn(inpOutPrc()),
-            testing::Values(ElementType::undefined),
-            testing::Values(ElementType::undefined),
-            testing::ValuesIn(inputShapes_dyn)),
+                testing::ValuesIn(axes()),
+                testing::ValuesIn(opTypes()),
+                testing::Values(false),
+                testing::ValuesIn(reductionTypesFusing),
+                testing::ValuesIn(inpOutPrc()),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_dyn),
+                testing::Values(empty_config)),
         testing::Values(emptyCPUSpec),
         testing::ValuesIn(fusingParamsSet_KeepNoDims));
 
+const auto params_OneAxis_fusing_KeepNoDims_I64 = testing::Combine(
+        testing::Combine(
+                testing::ValuesIn(axes()),
+                testing::ValuesIn(opTypes()),
+                testing::Values(false),
+                testing::ValuesIn(reductionTypesFusing),
+                testing::Values(ElementType::i64),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes),
+                testing::Values(config_i64)),
+        testing::Values(emptyCPUSpec),
+        testing::ValuesIn(fusingParamsSet_KeepNoDims_I64));
+
 const auto params_MultiAxis_4D_Hybrid_fusing_KeepNoDims = testing::Combine(
         testing::Combine(
-            testing::ValuesIn(axesNDFusing),
-            testing::Values(CommonTestUtils::OpType::VECTOR),
-            testing::Values(false),
-            testing::ValuesIn(reductionTypesFusing),
-            testing::ValuesIn(inpOutPrc()),
-            testing::Values(ElementType::undefined),
-            testing::Values(ElementType::undefined),
-            testing::ValuesIn(inputShapes_dyn)),
+                testing::ValuesIn(axesNDFusing),
+                testing::Values(CommonTestUtils::OpType::VECTOR),
+                testing::Values(false),
+                testing::ValuesIn(reductionTypesFusing),
+                testing::ValuesIn(inpOutPrc()),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_dyn),
+                testing::Values(empty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D)),
         testing::ValuesIn(fusingParamsSet_KeepNoDims));
 
 const auto params_MultiAxis_5D_Hybrid_fusing_KeepNoDims = testing::Combine(
         testing::Combine(
-            testing::ValuesIn(axes5DFusing),
-            testing::Values(CommonTestUtils::OpType::VECTOR),
-            testing::Values(false),
-            testing::ValuesIn(reductionTypesFusing),
-            testing::ValuesIn(inpOutPrc()),
-            testing::Values(ElementType::undefined),
-            testing::Values(ElementType::undefined),
-            testing::ValuesIn(inputShapes_5D_dyn)),
+                testing::ValuesIn(axes5DFusing),
+                testing::Values(CommonTestUtils::OpType::VECTOR),
+                testing::Values(false),
+                testing::ValuesIn(reductionTypesFusing),
+                testing::ValuesIn(inpOutPrc()),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_5D_dyn),
+                testing::Values(empty_config)),
         testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)),
         testing::ValuesIn(fusingParamsSet_KeepNoDims));
 
@@ -541,6 +769,13 @@ INSTANTIATE_TEST_SUITE_P(
         ReduceCPULayerTest::getTestCaseName
 );
 
+INSTANTIATE_TEST_SUITE_P(
+        smoke_Reduce_OneAxis_fusing_KeepNoDims_CPU_I64,
+        ReduceCPULayerTest,
+        params_OneAxis_fusing_KeepNoDims_I64,
+        ReduceCPULayerTest::getTestCaseName
+);
+
 INSTANTIATE_TEST_SUITE_P(
         smoke_Reduce_MultiAxis_4D_Hybrid_fusing_KeepNoDims_CPU,
         ReduceCPULayerTest,
@@ -557,4 +792,4 @@ INSTANTIATE_TEST_SUITE_P(
 
 } // namespace
 } // namespace Reduce
-} // namespace CPULayerTestsDefinitions
\ No newline at end of file
+} // namespace CPULayerTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/transpose.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/transpose.cpp
index b6dbb7657007e2..e16f9197052a00 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/transpose.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/transpose.cpp
@@ -5,17 +5,18 @@
 #include "single_layer_tests/classes/transpose.hpp"
 #include "shared_test_classes/single_layer/transpose.hpp"
 #include "test_utils/cpu_test_utils.hpp"
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
-using namespace InferenceEngine;
 using namespace CPUTestUtils;
-using namespace ngraph::helpers;
 using namespace ov::test;
 
-
 namespace CPULayerTestsDefinitions {
 namespace Transpose {
 namespace {
-std::map<std::string, std::string> additional_config;
+ov::AnyMap empty_config = {};
+ov::AnyMap config_i64 = {{InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64, InferenceEngine::PluginConfigParams::YES}};
+
+const auto cpuParams_nchw = CPUSpecificParams {{nchw}, {}, {}, {}};
 
 const auto cpuParams_ndhwc = CPUSpecificParams {{ndhwc}, {}, {}, {}};
 const auto cpuParams_ncdhw = CPUSpecificParams {{ncdhw}, {}, {}, {}};
@@ -26,10 +27,15 @@ const auto cpuParams_nCdhw16c = CPUSpecificParams {{nCdhw16c}, {}, {}, {}};
 const auto cpuParams_nChw8c = CPUSpecificParams {{nChw8c}, {}, {}, {}};
 const auto cpuParams_nCdhw8c = CPUSpecificParams {{nCdhw8c}, {}, {}, {}};
 
-const std::vector<InferenceEngine::Precision> netPrecisions = {
-        Precision::I8,
-        Precision::BF16,
-        Precision::FP32
+const std::vector<InputShape> staticInputShapes4DC32 = {InputShape{// dynamic
+                                                                   {-1, 32, -1, -1},
+                                                                   // target
+                                                                   {{4, 32, 16, 14}, {16, 32, 5, 16}, {4, 32, 16, 14}}}};
+
+const std::vector<ElementType> netPrecisions = {
+        ElementType::i8,
+        ElementType::bf16,
+        ElementType::f32
 };
 
 const std::vector<CPUSpecificParams> CPUParams4D_blocked = {
@@ -37,13 +43,23 @@ const std::vector<CPUSpecificParams> CPUParams4D_blocked = {
         cpuParams_nChw8c,
 };
 
+const std::vector<CPUSpecificParams> CPUParams4D = {
+        cpuParams_nChw16c,
+        cpuParams_nChw8c,
+        cpuParams_nchw,
+};
+
+const std::vector<InputShape> staticInputShapes4DC16 = {InputShape{// dynamic
+                                                                   {-1, 16, -1, -1},
+                                                                   // target
+                                                                   {{2, 16, 21, 10}, {3, 16, 11, 12}, {2, 16, 21, 10}}}};
+
 INSTANTIATE_TEST_SUITE_P(smoke_staticShapes4DC16_TransposeBlocked, TransposeLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::ValuesIn(dynamicInputShapes4DC16()),
                                  ::testing::ValuesIn(inputOrder4D()),
                                  ::testing::ValuesIn(netPrecisions),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                                 ::testing::Values(additional_config),
+                                 ::testing::Values(empty_config),
                                  ::testing::ValuesIn(CPUParams4D_blocked)),
                          TransposeLayerCPUTest::getTestCaseName);
 
@@ -52,8 +68,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_staticShapes4DC32_TransposeBlocked, TransposeLaye
                                  ::testing::ValuesIn(dynamicInputShapes4DC32()),
                                  ::testing::ValuesIn(inputOrder4D()),
                                  ::testing::ValuesIn(netPrecisions),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                                 ::testing::Values(additional_config),
+                                 ::testing::Values(empty_config),
                                  ::testing::ValuesIn(CPUParams4D_blocked)),
                          TransposeLayerCPUTest::getTestCaseName);
 
@@ -61,9 +76,35 @@ INSTANTIATE_TEST_SUITE_P(smoke_dynamicShapes4D_Transpose, TransposeLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::ValuesIn(dynamicInputShapes4D()),
                                  ::testing::ValuesIn(inputOrder4D()),
-                                 ::testing::Values(Precision::BF16),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                                 ::testing::Values(additional_config),
+                                 ::testing::Values(ElementType::bf16),
+                                 ::testing::Values(empty_config),
+                                 ::testing::Values(CPUSpecificParams{})),
+                         TransposeLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_staticShapes4DC16_Transpose_I64, TransposeLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(staticInputShapes4DC16),
+                                 ::testing::ValuesIn(inputOrder4D()),
+                                 ::testing::Values(ElementType::i64),
+                                 ::testing::Values(config_i64),
+                                 ::testing::ValuesIn(CPUParams4D)),
+                         TransposeLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_staticShapes4DC32_Transpose_i64, TransposeLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(staticInputShapes4DC32),
+                                 ::testing::ValuesIn(inputOrder4D()),
+                                 ::testing::Values(ElementType::i64),
+                                 ::testing::Values(config_i64),
+                                 ::testing::ValuesIn(CPUParams4D)),
+                         TransposeLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_dynamicShapes4D_Transpose_I64, TransposeLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(dynamicInputShapes4D()),
+                                 ::testing::ValuesIn(inputOrder4D()),
+                                 ::testing::Values(ElementType::i64),
+                                 ::testing::Values(config_i64),
                                  ::testing::Values(CPUSpecificParams{})),
                          TransposeLayerCPUTest::getTestCaseName);
 
@@ -122,8 +163,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_staticShapes5DC16_Transpose, TransposeLayerCPUTes
                                  ::testing::ValuesIn(staticInputShapes5DC16),
                                  ::testing::ValuesIn(inputOrder5D),
                                  ::testing::ValuesIn(netPrecisions),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                                 ::testing::Values(additional_config),
+                                 ::testing::Values(empty_config),
                                  ::testing::ValuesIn(CPUParams5D)),
                          TransposeLayerCPUTest::getTestCaseName);
 
@@ -132,8 +172,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_staticShapes5DC32_Transpose, TransposeLayerCPUTes
                                  ::testing::ValuesIn(staticInputShapes5DC32),
                                  ::testing::ValuesIn(inputOrder5D),
                                  ::testing::ValuesIn(netPrecisions),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                                 ::testing::Values(additional_config),
+                                 ::testing::Values(empty_config),
                                  ::testing::ValuesIn(CPUParams5D)),
                          TransposeLayerCPUTest::getTestCaseName);
 
@@ -141,9 +180,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_dynamicShapes5D_Transpose, TransposeLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::ValuesIn(dynamicInputShapes5D),
                                  ::testing::ValuesIn(inputOrder5D),
-                                 ::testing::Values(Precision::BF16),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                                 ::testing::Values(additional_config),
+                                 ::testing::Values(ElementType::bf16),
+                                 ::testing::Values(empty_config),
                                  ::testing::Values(CPUSpecificParams{})),
                          TransposeLayerCPUTest::getTestCaseName);
 
@@ -152,8 +190,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_staticShapes5DC16_PermutePerChannels, TransposeLa
                                  ::testing::ValuesIn(staticInputShapes5DC16),
                                  ::testing::ValuesIn(inputOrderPerChannels5D),
                                  ::testing::ValuesIn(netPrecisionsPerChannels()),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                                 ::testing::Values(additional_config),
+                                 ::testing::Values(empty_config),
                                  ::testing::Values(cpuParams_ndhwc)),
                          TransposeLayerCPUTest::getTestCaseName);
 
@@ -162,8 +199,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_staticShapes5DC32_PermutePerChannels, TransposeLa
                                  ::testing::ValuesIn(staticInputShapes5DC32),
                                  ::testing::ValuesIn(inputOrderPerChannels5D),
                                  ::testing::ValuesIn(netPrecisionsPerChannels()),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                                 ::testing::Values(additional_config),
+                                 ::testing::Values(empty_config),
                                  ::testing::Values(cpuParams_ndhwc)),
                          TransposeLayerCPUTest::getTestCaseName);
 
@@ -172,10 +208,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_dynamicShapes5D_PermutePerChannels, TransposeLaye
                                  ::testing::ValuesIn(dynamicInputShapes5D),
                                  ::testing::ValuesIn(inputOrderPerChannels5D),
                                  ::testing::ValuesIn(netPrecisionsPerChannels()),
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                                 ::testing::Values(additional_config),
+                                 ::testing::Values(empty_config),
                                  ::testing::Values(CPUSpecificParams{})),
                          TransposeLayerCPUTest::getTestCaseName);
 } // namespace
 } // namespace Transpose
-} // namespace CPULayerTestsDefinitions
\ No newline at end of file
+} // namespace CPULayerTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/minimum_maximum.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/minimum_maximum.cpp
new file mode 100644
index 00000000000000..ced17bab6b306c
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/minimum_maximum.cpp
@@ -0,0 +1,170 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/base/ov_subgraph.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+#include <common_test_utils/ov_tensor_utils.hpp>
+#include "test_utils/fusing_test_utils.hpp"
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
+
+
+using namespace CPUTestUtils;
+using namespace ov::test;
+
+namespace CPULayerTestsDefinitions {
+
+typedef std::tuple<
+        std::vector<InputShape>,         // Input shapes
+        ngraph::helpers::MinMaxOpType,   // Operation type
+        ElementType,                     // Net precision
+        ngraph::helpers::InputLayerType, // Second input type: Parameter or Constant
+        ov::AnyMap                       // Additional network configuration
+> basicMinMaxParams;
+
+typedef std::tuple<
+        basicMinMaxParams,
+        CPUSpecificParams> MinMaxLayerCPUTestParamSet;
+
+class MinMaxCPULayerTest : public testing::WithParamInterface<MinMaxLayerCPUTestParamSet>,
+                           virtual public SubgraphBaseTest, public CpuTestWithFusing {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<MinMaxLayerCPUTestParamSet>& obj) {
+        basicMinMaxParams basicParams;
+        CPUSpecificParams cpuParams;
+        std::tie(basicParams, cpuParams) = obj.param;
+
+        std::vector<InputShape> inputShapes;
+        ngraph::helpers::MinMaxOpType opType;
+        ElementType netPrecision;
+        ngraph::helpers::InputLayerType layerType;
+        ov::AnyMap config;
+
+        std::tie(inputShapes, opType, netPrecision, layerType, config) = basicParams;
+
+        std::ostringstream result;
+        result << "IS=(";
+        for (const auto& shape : inputShapes) {
+            result << CommonTestUtils::partialShape2str({shape.first}) << "_";
+        }
+        result << ")_TS=(";
+        for (const auto& shape : inputShapes) {
+            for (const auto& item : shape.second) {
+                result << CommonTestUtils::vec2str(item) << "_";
+            }
+        }
+        if (opType == ngraph::helpers::MinMaxOpType::MINIMUM) {
+            result << "opType=MIN_";
+        } else {
+            result << "opType=MAX_";
+        }
+        result << "netPRC=" << netPrecision << "_";
+        result << "type=" << layerType;
+        for (auto const& configItem : config) {
+            result << "_configItem=" << configItem.first << "_";
+            configItem.second.print(result);
+        }
+
+        result << CPUTestsBase::getTestCaseName(cpuParams);
+
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+
+        basicMinMaxParams basicParams;
+        CPUSpecificParams cpuParams;
+        std::tie(basicParams, cpuParams) = this->GetParam();
+
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+
+        std::vector<InputShape> inputShapes;
+        ngraph::helpers::MinMaxOpType opType;
+        ElementType netPrecision;
+        ngraph::helpers::InputLayerType layerType;
+
+        std::tie(inputShapes, opType, netPrecision, layerType, configuration) = basicParams;
+
+        init_input_shapes(inputShapes);
+
+        auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes);
+        auto paramOuts = ngraph::helpers::convert2OutputVector(
+                ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(params));
+
+        auto maxMinNode = ngraph::builder::makeMinMax(paramOuts[0], paramOuts[1], opType);
+
+        if (netPrecision == ElementType::i64 || netPrecision == ElementType::u64) {
+            auto i64It = configuration.find(InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
+            if (i64It == configuration.end() || i64It->second == InferenceEngine::PluginConfigParams::NO) {
+                selectedType = makeSelectedTypeStr(getPrimitiveType(), ElementType::i32);
+            } else {
+                selectedType = makeSelectedTypeStr(getPrimitiveType(), ElementType::i64);
+            }
+        } else if (netPrecision == ElementType::boolean) {
+            selectedType = makeSelectedTypeStr(getPrimitiveType(), ElementType::i8);
+        } else {
+            selectedType = makeSelectedTypeStr(getPrimitiveType(), netPrecision);
+        }
+
+        function = makeNgraphFunction(netPrecision, params, maxMinNode, "MinMax");
+    }
+};
+
+TEST_P(MinMaxCPULayerTest, CompareWithRefs) {
+    run();
+}
+
+namespace {
+
+const std::vector<ElementType> netPrecisions = { ElementType::f32, ElementType::i32 };
+
+std::vector<std::vector<InputShape>> inShapesStatic = {
+    { {{}, {{2}}}, {{}, {{1}}} },
+    { {{}, {{1, 1, 1, 3}}}, {{}, {{1}}} },
+    { {{}, {{1, 2, 4}}}, {{}, {{1}}} },
+    { {{}, {{1, 4, 4}}}, {{}, {{1}}} },
+    { {{}, {{1, 4, 4, 1}}}, {{}, {{1}}} },
+    { {{}, {{256, 56}}}, {{}, {{256, 56}}} },
+    { {{}, {{8, 1, 6, 1}}}, {{}, {{7, 1, 5}}} }
+};
+
+const std::vector<ngraph::helpers::MinMaxOpType> opType = {
+        ngraph::helpers::MinMaxOpType::MINIMUM,
+        ngraph::helpers::MinMaxOpType::MAXIMUM,
+};
+
+const std::vector<ngraph::helpers::InputLayerType> inputType = {
+        ngraph::helpers::InputLayerType::CONSTANT,
+        ngraph::helpers::InputLayerType::PARAMETER,
+};
+
+ov::AnyMap config = {};
+ov::AnyMap config_i64 = {{InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64, InferenceEngine::PluginConfigParams::YES}};
+
+INSTANTIATE_TEST_SUITE_P(smoke_MinMax, MinMaxCPULayerTest,
+                ::testing::Combine(
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inShapesStatic),
+                                ::testing::ValuesIn(opType),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputType),
+                                ::testing::Values(config)),
+                        testing::Values(emptyCPUSpec)),
+                MinMaxCPULayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_MinMax_I64, MinMaxCPULayerTest,
+                ::testing::Combine(
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inShapesStatic),
+                                ::testing::ValuesIn(opType),
+                                ::testing::Values(ElementType::i64),
+                                ::testing::ValuesIn(inputType),
+                                ::testing::Values(config_i64)),
+                        testing::Values(emptyCPUSpec)),
+                MinMaxCPULayerTest::getTestCaseName);
+
+} // namespace
+} // namespace CPULayerTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/non_max_suppression.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/non_max_suppression.cpp
index 482c9e3cff39ea..e675852f48e491 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/non_max_suppression.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/non_max_suppression.cpp
@@ -10,6 +10,7 @@
 #include <common_test_utils/ov_tensor_utils.hpp>
 #include "test_utils/cpu_test_utils.hpp"
 #include "shared_test_classes/base/utils/ranges.hpp"
+#include <openvino/opsets/opset1.hpp>
 
 using namespace ov::test;
 using namespace ngraph;
@@ -43,9 +44,9 @@ using NmsParams = std::tuple<InputShapeParams,
                              int32_t,                                            // Max output boxes per class
                              ThresholdValues,                                    // IOU, Score, Soft NMS sigma
                              ngraph::helpers::InputLayerType,                    // max_output_boxes_per_class input type
-                             ngraph::op::v9::NonMaxSuppression::BoxEncodingType, // Box encoding
+                             ov::op::v9::NonMaxSuppression::BoxEncodingType,     // Box encoding
                              bool,                                               // Sort result descending
-                             ngraph::element::Type,                              // Output type
+                             ov::element::Type,                                  // Output type
                              std::string>;                                       // Device name
 
 class NmsLayerCPUTest : public testing::WithParamInterface<NmsParams>, virtual public SubgraphBaseTest, public CPUTestsBase {
@@ -129,41 +130,49 @@ class NmsLayerCPUTest : public testing::WithParamInterface<NmsParams>, virtual p
         std::tie(bounds, targetInDims) = inShapeParams;
 
         if (!bounds.empty()) {
-            inputDynamicShapes = std::vector<ngraph::PartialShape>{{bounds[BATCHES], bounds[BOXES], 4}, {bounds[BATCHES], bounds[CLASSES], bounds[BOXES]}};
+            inputDynamicShapes = std::vector<ov::PartialShape>{{bounds[BATCHES], bounds[BOXES], 4}, {bounds[BATCHES], bounds[CLASSES], bounds[BOXES]}};
         } else {
             size_t batches, boxes, classes;
             std::tie(batches, boxes, classes) = targetInDims.front();
             ov::Dimension numBatches(batches), numBoxes(boxes), numClasses(classes);
-            inputDynamicShapes = std::vector<ngraph::PartialShape>{{numBatches, numBoxes, 4}, {numBatches, numClasses, numBoxes}};
+            inputDynamicShapes = std::vector<ov::PartialShape>{{numBatches, numBoxes, 4}, {numBatches, numClasses, numBoxes}};
         }
 
         for (const auto &ts : targetInDims) {
             size_t numBatches, numBoxes, numClasses;
             std::tie(numBatches, numBoxes, numClasses) = ts;
-            targetStaticShapes.push_back(std::vector<ngraph::Shape>{{numBatches, numBoxes, 4}, {numBatches, numClasses, numBoxes}});
+            targetStaticShapes.push_back(std::vector<ov::Shape>{{numBatches, numBoxes, 4}, {numBatches, numClasses, numBoxes}});
             if (maxOutBoxesType == ngraph::helpers::InputLayerType::PARAMETER) {
-                targetStaticShapes.back().push_back(ngraph::Shape{1});
+                targetStaticShapes.back().push_back(ov::Shape{1});
             }
         }
 
-        std::shared_ptr<ngraph::Node> maxOutBoxesPerClassNode;
+        std::shared_ptr<ov::Node> maxOutBoxesPerClassNode;
         auto params = ngraph::builder::makeDynamicParams(paramsPrec, inputDynamicShapes);
         params[0]->set_friendly_name("param_1");
         params[1]->set_friendly_name("param_2");
 
         if (maxOutBoxesType == ngraph::helpers::InputLayerType::PARAMETER) {
-            inputDynamicShapes.push_back(ngraph::PartialShape{1});
-            params.push_back(std::make_shared<ngraph::opset1::Parameter>(element::Type_t::i32, inputDynamicShapes.back()));
+            inputDynamicShapes.push_back(ov::PartialShape{1});
+            if (maxBoxPrec == ElementType::i64) {
+                params.push_back(std::make_shared<ov::opset1::Parameter>(element::Type_t::i64, inputDynamicShapes.back()));
+            } else {
+                params.push_back(std::make_shared<ov::opset1::Parameter>(element::Type_t::i32, inputDynamicShapes.back()));
+            }
             params[1]->set_friendly_name("param_3");
             maxOutBoxesPerClassNode = params.back();
         } else {
-            maxOutBoxesPerClassNode = builder::makeConstant(maxBoxPrec, ngraph::Shape{}, std::vector<int32_t>{maxOutBoxesPerClass});
+            if (maxBoxPrec == ElementType::i64) {
+                maxOutBoxesPerClassNode = builder::makeConstant(maxBoxPrec, ov::Shape{}, std::vector<int64_t>{maxOutBoxesPerClass});
+            } else {
+                maxOutBoxesPerClassNode = builder::makeConstant(maxBoxPrec, ov::Shape{}, std::vector<int32_t>{maxOutBoxesPerClass});
+            }
         }
 
-        auto iouThrNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{iouThr})->output(0);
-        auto scoreThrNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{scoreThr})->output(0);
-        auto softNmsSigmaNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{softNmsSigma})->output(0);
-        auto nms = std::make_shared<ngraph::op::v9::NonMaxSuppression>(params[0], params[1], maxOutBoxesPerClassNode, iouThrNode, scoreThrNode,
+        auto iouThrNode = builder::makeConstant(thrPrec, ov::Shape{}, std::vector<float>{iouThr})->output(0);
+        auto scoreThrNode = builder::makeConstant(thrPrec, ov::Shape{}, std::vector<float>{scoreThr})->output(0);
+        auto softNmsSigmaNode = builder::makeConstant(thrPrec, ov::Shape{}, std::vector<float>{softNmsSigma})->output(0);
+        auto nms = std::make_shared<ov::op::v9::NonMaxSuppression>(params[0], params[1], maxOutBoxesPerClassNode, iouThrNode, scoreThrNode,
                                                                        softNmsSigmaNode, boxEncoding, sortResDescend, outType);
 
         function = makeNgraphFunction(paramsPrec, params, nms, "NMS");
@@ -433,6 +442,22 @@ const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams),
                                           ::testing::Values(CommonTestUtils::DEVICE_CPU)
 );
 
+const auto nmsParams_i64 = ::testing::Combine(::testing::ValuesIn(inShapeParams),
+                                          ::testing::Combine(::testing::Values(ElementType::f32),
+                                                             ::testing::Values(ElementType::i64),
+                                                             ::testing::Values(ElementType::f32)),
+                                          ::testing::ValuesIn(maxOutBoxPerClass),
+                                          ::testing::Combine(::testing::ValuesIn(threshold),
+                                                             ::testing::ValuesIn(threshold),
+                                                             ::testing::ValuesIn(sigmaThreshold)),
+                                          ::testing::ValuesIn(maxBoxInputTypes),
+                                          ::testing::ValuesIn(encodType),
+                                          ::testing::ValuesIn(sortResDesc),
+                                          ::testing::ValuesIn(outType),
+                                          ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
 INSTANTIATE_TEST_SUITE_P(smoke_NmsLayerCPUTest, NmsLayerCPUTest, nmsParams, NmsLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_NmsLayerCPUTest_i64, NmsLayerCPUTest, nmsParams_i64, NmsLayerCPUTest::getTestCaseName);
 
 } // namespace CPULayerTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp
index 3d761c34917275..9e5d53b93353bd 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp
@@ -6,6 +6,7 @@
 #include <common_test_utils/ov_tensor_utils.hpp>
 #include "test_utils/cpu_test_utils.hpp"
 #include "shared_test_classes/base/ov_subgraph.hpp"
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
@@ -20,7 +21,9 @@ using oneHotCPUTestParams = std::tuple<
         size_t,                                            // depth
         float,                                             // on_value
         float,                                             // off_value
-        InferenceEngine::Precision,                        // Output precision
+        ElementType,                                       // Input precision
+        ElementType,                                       // Output precision
+        ov::AnyMap,                                        // Additional network configuration
         CPUSpecificParams>;
 
 class OneHotLayerCPUTest : public testing::WithParamInterface<oneHotCPUTestParams>,
@@ -32,9 +35,10 @@ class OneHotLayerCPUTest : public testing::WithParamInterface<oneHotCPUTestParam
         std::pair<ngraph::helpers::InputLayerType, bool> inputType;
         size_t depth;
         float onValue, offValue;
-        InferenceEngine::Precision outPrc;
+        ElementType inPrc, outPrc;
+        ov::AnyMap additionalConfig;
         CPUSpecificParams cpuParams;
-        std::tie(inputShape, axis, inputType, depth, onValue, offValue, outPrc, cpuParams) = obj.param;
+        std::tie(inputShape, axis, inputType, depth, onValue, offValue, inPrc, outPrc, additionalConfig, cpuParams) = obj.param;
 
         std::ostringstream result;
         if (inputShape.first.size() != 0) {
@@ -54,11 +58,21 @@ class OneHotLayerCPUTest : public testing::WithParamInterface<oneHotCPUTestParam
         }
         result << "OnVal=" << onValue << "_";
         result << "OffVal=" << offValue << "_";
-        result << "outPRC=" << outPrc.name();
+        result << "inPRC=" << inPrc << "_";
+        result << "outPRC=" << outPrc;
+        if (!additionalConfig.empty()) {
+            result << "_PluginConf";
+            for (auto &item : additionalConfig) {
+                result << "_" << item.first << "=";
+                item.second.print(result);
+            }
+        }
         result << CPUTestsBase::getTestCaseName(cpuParams);
+
         return result.str();
     }
-    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
+
+    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
         inputs.clear();
         const auto& funcInputs = function->inputs();
         for (size_t i = 0; i < funcInputs.size(); ++i) {
@@ -67,10 +81,13 @@ class OneHotLayerCPUTest : public testing::WithParamInterface<oneHotCPUTestParam
 
             if (i == 1) {
                 tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
-                auto *dataPtr = tensor.data<int32_t>();
-                dataPtr[0] = Depth;
+                if (funcInput.get_element_type() == ElementType::i64) {
+                    tensor.data<int64_t>()[0] = Depth;
+                } else {
+                    tensor.data<int32_t>()[0] = Depth;
+                }
             } else {
-                tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
+                tensor = utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
             }
 
             inputs.insert({funcInput.get_node_shared_ptr(), tensor});
@@ -82,17 +99,24 @@ class OneHotLayerCPUTest : public testing::WithParamInterface<oneHotCPUTestParam
 
         InputShape inputShape;
         std::pair<ngraph::helpers::InputLayerType, bool> inputType;
-        InferenceEngine::Precision outPrc;
         CPUSpecificParams cpuParams;
-        std::tie(inputShape, Axis, inputType, Depth, OnValue, OffValue, outPrc, cpuParams) = this->GetParam();
+        std::tie(inputShape, Axis, inputType, Depth, OnValue, OffValue, inType, outType, configuration, cpuParams) = this->GetParam();
 
         if (inputType.second && inputType.first == ngraph::helpers::InputLayerType::CONSTANT) {
             generateDepth();
         }
 
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
-        selectedType = std::string("ref_any_I32");
-        outType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrc);
+        if (inType == ElementType::i64 || inType == ElementType::u64) {
+            auto i64Flag = configuration.find(PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
+            if (i64Flag == configuration.end() || i64Flag->second == PluginConfigParams::NO) {
+                selectedType = makeSelectedTypeStr(selectedType, ElementType::i32);
+            } else {
+                selectedType = makeSelectedTypeStr(selectedType, ElementType::i64);
+            }
+        } else {
+            selectedType = makeSelectedTypeStr(selectedType, inType);
+        }
 
         init_input_shapes({inputShape});
         if (inputType.second) {
@@ -102,6 +126,7 @@ class OneHotLayerCPUTest : public testing::WithParamInterface<oneHotCPUTestParam
 
         function = createFunction(inputType.first == ngraph::helpers::InputLayerType::CONSTANT);
     }
+
     void init_ref_function(std::shared_ptr<ov::Model> &funcRef, const std::vector<ov::Shape>& targetInputStaticShapes) override {
         if (function->get_parameters().size() == 2) {
             generateDepth();
@@ -109,6 +134,7 @@ class OneHotLayerCPUTest : public testing::WithParamInterface<oneHotCPUTestParam
         }
         ngraph::helpers::resize_function(funcRef, targetInputStaticShapes);
     }
+
     void validate() override {
             auto actualOutputs = get_plugin_outputs();
         if (function->get_parameters().size() == 2) {
@@ -128,24 +154,26 @@ class OneHotLayerCPUTest : public testing::WithParamInterface<oneHotCPUTestParam
 
         compare(expectedOutputs, actualOutputs);
     }
-    std::shared_ptr<ngraph::Function> createFunction(bool depthConst) {
-        auto params = ngraph::builder::makeDynamicParams(ngraph::element::i32, {inputDynamicShapes.front()});
+
+    std::shared_ptr<ov::Model> createFunction(bool depthConst) {
+        auto params = ngraph::builder::makeDynamicParams(inType, {inputDynamicShapes.front()});
         params.front()->set_friendly_name("ParamsIndices");
         std::shared_ptr<ov::Node> depth;
         if (depthConst) {
-            depth = ngraph::op::Constant::create(ngraph::element::i32, ngraph::Shape{ }, {Depth});
+            depth = ov::op::v0::Constant::create(inType, ov::Shape{ }, {Depth});
         } else {
-            auto depthParam = std::make_shared<ngraph::op::Parameter>(ngraph::element::i32, ngraph::Shape{ });
+            auto depthParam = std::make_shared<ov::op::v0::Parameter>(inType, ov::Shape{ });
             depthParam->set_friendly_name("ParamDepth");
             params.push_back(depthParam);
             depth = depthParam;
         }
-        auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::opset3::Parameter>(params));
-        auto on_value_const = std::make_shared<ngraph::op::Constant>(outType, ngraph::Shape{ }, OnValue);
-        auto off_value_const = std::make_shared<ngraph::op::Constant>(outType, ngraph::Shape{ }, OffValue);
-        auto oneHot = std::make_shared<ngraph::opset5::OneHot>(paramOuts[0], depth, on_value_const, off_value_const, Axis);
-        return makeNgraphFunction(ngraph::element::i32, params, oneHot, "OneHot");
+        auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(params));
+        auto on_value_const = std::make_shared<ov::op::v0::Constant>(outType, ov::Shape{ }, OnValue);
+        auto off_value_const = std::make_shared<ov::op::v0::Constant>(outType, ov::Shape{ }, OffValue);
+        auto oneHot = std::make_shared<ov::op::v1::OneHot>(paramOuts[0], depth, on_value_const, off_value_const, Axis);
+        return makeNgraphFunction(inType, params, oneHot, "OneHot");
     }
+
     void generateDepth() {
         testing::internal::Random random(time(nullptr));
         random.Generate(10);
@@ -163,13 +191,19 @@ TEST_P(OneHotLayerCPUTest, CompareWithRefs) {
 }
 
 namespace {
-const std::vector<Precision> outPrc = {
-        Precision::FP32,
-        Precision::BF16,
-        Precision::I8
-        // Precision::U8  // Precision cannot be wrapped to constant one hot
+const std::vector<ElementType> inPrc = {
+        ElementType::i32,
 };
 
+const std::vector<ElementType> outPrc = {
+        ElementType::f32,
+        ElementType::bf16,
+        ElementType::i8
+        // ElementType::u8  // Precision cannot be wrapped to constant one hot
+};
+
+const CPUSpecificParams cpuParamsRef{{}, {}, {"ref_any"}, "ref_any"};
+
 std::vector<std::pair<ngraph::helpers::InputLayerType, bool>> secondaryInputTypesStaticCase = {
         {ngraph::helpers::InputLayerType::CONSTANT, true},
         {ngraph::helpers::InputLayerType::CONSTANT, false}
@@ -184,6 +218,11 @@ const std::vector<ov::Shape> staticInputShapes0D = {
         { }
 };
 
+const ov::AnyMap i64Config = {
+        {PluginConfigInternalParams::KEY_CPU_NATIVE_I64, PluginConfigParams::YES}
+};
+const ov::AnyMap emptyConfig = {};
+
 // 0d -> 1d, depth
 const auto testCase_1d = ::testing::Combine(
         ::testing::ValuesIn(static_shapes_to_test_representation(staticInputShapes0D)),
@@ -192,11 +231,27 @@ const auto testCase_1d = ::testing::Combine(
         ::testing::Values(3),
         ::testing::Values(1.f),
         ::testing::Values(0.f),
+        ::testing::ValuesIn(inPrc),
         ::testing::ValuesIn(outPrc),
-        ::testing::Values(emptyCPUSpec)
+        ::testing::Values(emptyConfig),
+        ::testing::Values(cpuParamsRef)
 );
 INSTANTIATE_TEST_SUITE_P(smoke_OneHotCPU_1D, OneHotLayerCPUTest, testCase_1d, OneHotLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_OneHotCPU_1D_I64, OneHotLayerCPUTest,
+                ::testing::Combine(
+                        ::testing::ValuesIn(static_shapes_to_test_representation(staticInputShapes0D)),
+                        ::testing::Values(-1, 0),
+                        ::testing::ValuesIn(secondaryInputTypesStaticCase),
+                        ::testing::Values(3),
+                        ::testing::Values(1.f),
+                        ::testing::Values(0.f),
+                        ::testing::Values(ElementType::i64),
+                        ::testing::Values(ElementType::i64),
+                        ::testing::Values(i64Config),
+                        ::testing::Values(cpuParamsRef)),
+                OneHotLayerCPUTest::getTestCaseName);
+
 const std::vector<ov::Shape> staticInputShapes1D = {
         { 3 }
 };
@@ -208,11 +263,27 @@ const auto testCase_2d_static = ::testing::Combine(
         ::testing::Values(6),
         ::testing::Values(1.f),
         ::testing::Values(0.f),
+        ::testing::ValuesIn(inPrc),
         ::testing::ValuesIn(outPrc),
-        ::testing::Values(emptyCPUSpec)
+        ::testing::Values(emptyConfig),
+        ::testing::Values(cpuParamsRef)
 );
 INSTANTIATE_TEST_SUITE_P(smoke_OneHotCPU_2D_Static, OneHotLayerCPUTest, testCase_2d_static, OneHotLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_OneHotCPU_2D_I64_Static, OneHotLayerCPUTest,
+                ::testing::Combine(
+                        ::testing::ValuesIn(static_shapes_to_test_representation(staticInputShapes1D)),
+                        ::testing::Values(-1, 0, 1),
+                        ::testing::ValuesIn(secondaryInputTypesStaticCase),
+                        ::testing::Values(6),
+                        ::testing::Values(1.f),
+                        ::testing::Values(0.f),
+                        ::testing::Values(ElementType::i64),
+                        ::testing::Values(ElementType::i64),
+                        ::testing::Values(i64Config),
+                        ::testing::Values(cpuParamsRef)),
+                OneHotLayerCPUTest::getTestCaseName);
+
 const std::vector<InputShape> dynamicInputShapes1D = {
         {{-1}, {{3}, {4}, {5}}},
         {{{1, 5}}, {{1}, {3}, {5}}},
@@ -225,8 +296,10 @@ const auto testCase_2d_dynamic = ::testing::Combine(
         ::testing::Values(6),
         ::testing::Values(1.f),
         ::testing::Values(0.f),
+        ::testing::ValuesIn(inPrc),
         ::testing::ValuesIn(outPrc),
-        ::testing::Values(emptyCPUSpec)
+        ::testing::Values(emptyConfig),
+        ::testing::Values(cpuParamsRef)
 );
 INSTANTIATE_TEST_SUITE_P(smoke_OneHotCPU_2D_Dynamic, OneHotLayerCPUTest, testCase_2d_dynamic, OneHotLayerCPUTest::getTestCaseName);
 
@@ -241,8 +314,10 @@ const auto testCase_3d_static = ::testing::Combine(
         ::testing::Values(4),
         ::testing::Values(2.f),
         ::testing::Values(-1.f),
+        ::testing::ValuesIn(inPrc),
         ::testing::ValuesIn(outPrc),
-        ::testing::Values(emptyCPUSpec)
+        ::testing::Values(emptyConfig),
+        ::testing::Values(cpuParamsRef)
 );
 INSTANTIATE_TEST_SUITE_P(smoke_OneHotCPU_3D_Static, OneHotLayerCPUTest, testCase_3d_static, OneHotLayerCPUTest::getTestCaseName);
 
@@ -259,8 +334,10 @@ const auto testCase_3d_dynamic = ::testing::Combine(
         ::testing::Values(4),
         ::testing::Values(2.f),
         ::testing::Values(-1.f),
+        ::testing::ValuesIn(inPrc),
         ::testing::ValuesIn(outPrc),
-        ::testing::Values(emptyCPUSpec)
+        ::testing::Values(emptyConfig),
+        ::testing::Values(cpuParamsRef)
 );
 INSTANTIATE_TEST_SUITE_P(smoke_OneHotCPU_3D_Dynamic, OneHotLayerCPUTest, testCase_3d_dynamic, OneHotLayerCPUTest::getTestCaseName);
 
@@ -275,8 +352,10 @@ const auto testCase_4d_static = ::testing::Combine(
         ::testing::Values(4),
         ::testing::Values(1.f),
         ::testing::Values(0.f),
+        ::testing::ValuesIn(inPrc),
         ::testing::ValuesIn(outPrc),
-        ::testing::Values(emptyCPUSpec)
+        ::testing::Values(emptyConfig),
+        ::testing::Values(cpuParamsRef)
 );
 INSTANTIATE_TEST_SUITE_P(smoke_OneHotCPU_4D_Static, OneHotLayerCPUTest, testCase_4d_static, OneHotLayerCPUTest::getTestCaseName);
 
@@ -293,11 +372,27 @@ const auto testCase_4d_dynamic = ::testing::Combine(
         ::testing::Values(4),
         ::testing::Values(1.f),
         ::testing::Values(0.f),
+        ::testing::ValuesIn(inPrc),
         ::testing::ValuesIn(outPrc),
-        ::testing::Values(emptyCPUSpec)
+        ::testing::Values(emptyConfig),
+        ::testing::Values(cpuParamsRef)
 );
 INSTANTIATE_TEST_SUITE_P(smoke_OneHotCPU_4D_Dynamic, OneHotLayerCPUTest, testCase_4d_dynamic, OneHotLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_OneHotCPU_4D_I64_Dynamic, OneHotLayerCPUTest,
+                ::testing::Combine(
+                        ::testing::ValuesIn(dynamicInputShapes3D),
+                        ::testing::Values(-1, 0, 1, 2),
+                        ::testing::ValuesIn(secondaryInputTypesDynamicCase),
+                        ::testing::Values(4),
+                        ::testing::Values(1.f),
+                        ::testing::Values(0.f),
+                        ::testing::Values(ElementType::i64),
+                        ::testing::Values(ElementType::i64),
+                        ::testing::Values(i64Config),
+                        ::testing::Values(cpuParamsRef)),
+                OneHotLayerCPUTest::getTestCaseName);
+
 const std::vector<ov::Shape> staticInputShapes4D = {
         { 1, 3, 2, 3 }
 };
@@ -309,8 +404,10 @@ const auto testCase_5d_static = ::testing::Combine(
         ::testing::Values(4),
         ::testing::Values(1.f),
         ::testing::Values(0.f),
+        ::testing::ValuesIn(inPrc),
         ::testing::ValuesIn(outPrc),
-        ::testing::Values(emptyCPUSpec)
+        ::testing::Values(emptyConfig),
+        ::testing::Values(cpuParamsRef)
 );
 INSTANTIATE_TEST_SUITE_P(smoke_OneHotCPU_5D_Static, OneHotLayerCPUTest, testCase_5d_static, OneHotLayerCPUTest::getTestCaseName);
 
@@ -327,8 +424,10 @@ const auto testCase_5d_dynamic = ::testing::Combine(
         ::testing::Values(4),
         ::testing::Values(1.f),
         ::testing::Values(0.f),
+        ::testing::ValuesIn(inPrc),
         ::testing::ValuesIn(outPrc),
-        ::testing::Values(emptyCPUSpec)
+        ::testing::Values(emptyConfig),
+        ::testing::Values(cpuParamsRef)
 );
 INSTANTIATE_TEST_SUITE_P(smoke_OneHotCPU_5D_Dynamic, OneHotLayerCPUTest, testCase_5d_dynamic, OneHotLayerCPUTest::getTestCaseName);
 
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/scatter_ND_update.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/scatter_ND_update.cpp
index 019c40e390cbd8..a282a46e0ccf8e 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/scatter_ND_update.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/scatter_ND_update.cpp
@@ -165,6 +165,7 @@ const std::vector<ScatterNDUpdateLayerParams> scatterParams = {
 const std::vector<ElementType> inputPrecisions = {
     ElementType::f32,
     ElementType::i32,
+    ElementType::i64
 };
 
 const std::vector<ElementType> constantPrecisions = {
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/scatter_elements_update.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/scatter_elements_update.cpp
index bc6c5b33692077..9e908d375b01f8 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/scatter_elements_update.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/scatter_elements_update.cpp
@@ -156,6 +156,7 @@ const std::vector<ScatterElementsUpdateLayerParams> scatterParams = {
 const std::vector<ElementType> inputPrecisions = {
     ElementType::f32,
     ElementType::i32,
+    ElementType::i64
 };
 
 const std::vector<ElementType> constantPrecisions = {
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/scatter_update.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/scatter_update.cpp
index bc65fd172874d5..ec749f870aea92 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/scatter_update.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/scatter_update.cpp
@@ -5,8 +5,8 @@
 #include "test_utils/cpu_test_utils.hpp"
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include "ngraph_functions/builders.hpp"
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
-using namespace ngraph;
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
 using namespace ov::test;
@@ -25,7 +25,8 @@ struct ScatterUpdateLayerParams {
 using scatterUpdateParams = std::tuple<
     ScatterUpdateLayerParams,
     ElementType,        // input precision
-    ElementType>;       // indices precision
+    ElementType,        // indices precision
+    ov::AnyMap>;        // Additional network configuration
 
 class ScatterUpdateLayerCPUTest : public testing::WithParamInterface<scatterUpdateParams>, public SubgraphBaseTest, public CPUTestsBase {
 public:
@@ -33,7 +34,8 @@ class ScatterUpdateLayerCPUTest : public testing::WithParamInterface<scatterUpda
         ScatterUpdateLayerParams scatterParams;
         ElementType inputPrecision;
         ElementType idxPrecision;
-        std::tie(scatterParams, inputPrecision, idxPrecision) = obj.param;
+        ov::AnyMap config;
+        std::tie(scatterParams, inputPrecision, idxPrecision, config) = obj.param;
         const auto inputShapes = scatterParams.inputShapes;
         const auto indicesDescr = scatterParams.indicesDescriprion;
         const auto axis = scatterParams.axis;
@@ -53,6 +55,12 @@ class ScatterUpdateLayerCPUTest : public testing::WithParamInterface<scatterUpda
         }
         result << "indices_shape=" << indicesDescr.first << "_indices_values=" << CommonTestUtils::vec2str(indicesDescr.second)
                << "axis=" << axis << "_idx_precision=" << idxPrecision;
+
+        for (auto const& configItem : config) {
+            result << "_configItem=" << configItem.first << "_";
+            configItem.second.print(result);
+        }
+
         return result.str();
     }
 
@@ -62,18 +70,28 @@ class ScatterUpdateLayerCPUTest : public testing::WithParamInterface<scatterUpda
         ScatterUpdateLayerParams scatterParams;
         ElementType inputPrecision;
         ElementType idxPrecision;
-        std::tie(scatterParams, inputPrecision, idxPrecision) = this->GetParam();
+        std::tie(scatterParams, inputPrecision, idxPrecision, configuration) = this->GetParam();
         const auto inputShapes = scatterParams.inputShapes;
         const auto indicesDescr = scatterParams.indicesDescriprion;
         const auto axis = scatterParams.axis;
 
         init_input_shapes(inputShapes);
-        selectedType = makeSelectedTypeStr("unknown", inputPrecision);
+
+        if (inputPrecision == ElementType::i64 || inputPrecision == ElementType::u64) {
+            auto i64It = configuration.find(PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
+            if (i64It == configuration.end() || i64It->second == PluginConfigParams::NO) {
+                selectedType = makeSelectedTypeStr("unknown", ElementType::i32);
+            } else {
+                selectedType = makeSelectedTypeStr("unknown", ElementType::i64);
+            }
+        } else {
+            selectedType = makeSelectedTypeStr("unknown", inputPrecision);
+        }
 
         auto params = ngraph::builder::makeDynamicParams(inputPrecision, inputDynamicShapes);
-        auto indicesNode = ngraph::opset1::Constant::create(idxPrecision, indicesDescr.first, indicesDescr.second);
-        auto axis_node = ngraph::opset1::Constant::create(idxPrecision, {}, { axis });
-        auto scatter = std::make_shared<ngraph::opset3::ScatterUpdate>(params[0], indicesNode, params[1], axis_node);
+        auto indicesNode = ov::op::v0::Constant::create(idxPrecision, indicesDescr.first, indicesDescr.second);
+        auto axis_node = ov::op::v0::Constant::create(idxPrecision, {}, { axis });
+        auto scatter = std::make_shared<ov::op::v3::ScatterUpdate>(params[0], indicesNode, params[1], axis_node);
 
         function = makeNgraphFunction(inputPrecision, params, scatter, "ScatterUpdateLayerCPUTest");
     }
@@ -127,9 +145,12 @@ const std::vector<ScatterUpdateLayerParams> scatterParams = {
     },
 };
 
+ov::AnyMap config = {};
+ov::AnyMap config_i64 = {{PluginConfigInternalParams::KEY_CPU_NATIVE_I64, PluginConfigParams::YES}};
+
 const std::vector<ElementType> inputPrecisions = {
     ElementType::f32,
-    ElementType::i32,
+    ElementType::i32
 };
 
 const std::vector<ElementType> constantPrecisions = {
@@ -138,9 +159,19 @@ const std::vector<ElementType> constantPrecisions = {
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs, ScatterUpdateLayerCPUTest,
-    ::testing::Combine(
-        ::testing::ValuesIn(scatterParams),
-        ::testing::ValuesIn(inputPrecisions),
-        ::testing::ValuesIn(constantPrecisions)),
-    ScatterUpdateLayerCPUTest::getTestCaseName);
+        ::testing::Combine(
+                ::testing::ValuesIn(scatterParams),
+                ::testing::ValuesIn(inputPrecisions),
+                ::testing::ValuesIn(constantPrecisions),
+                ::testing::Values(config)),
+        ScatterUpdateLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_I64, ScatterUpdateLayerCPUTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(scatterParams),
+                ::testing::Values(ElementType::i64),
+                ::testing::ValuesIn(constantPrecisions),
+                ::testing::Values(config_i64)),
+        ScatterUpdateLayerCPUTest::getTestCaseName);
+
 } // namespace CPULayerTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/split.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/split.cpp
index e7e9b86aa68088..b369ca19ad649b 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/split.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/split.cpp
@@ -5,7 +5,9 @@
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include "ngraph_functions/builders.hpp"
 #include "test_utils/cpu_test_utils.hpp"
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
+using namespace InferenceEngine;
 using namespace ov::test;
 using namespace CPUTestUtils;
 
@@ -17,6 +19,7 @@ typedef std::tuple<
         ElementType,               // Net precision
         InputShape,                // Input shapes
         std::vector<size_t>,       // Used outputs indices
+        ov::AnyMap,                // Additional network configuration
         CPUSpecificParams
 > splitCPUTestParams;
 
@@ -28,9 +31,10 @@ class SplitLayerCPUTest : public testing::WithParamInterface<splitCPUTestParams>
         int64_t axis;
         ElementType netPrecision;
         InputShape inputShapes;
-        InferenceEngine::SizeVector outIndices;
+        SizeVector outIndices;
+        ov::AnyMap config;
         CPUSpecificParams cpuParams;
-        std::tie(numSplits, axis, netPrecision, inputShapes, outIndices, cpuParams) = obj.param;
+        std::tie(numSplits, axis, netPrecision, inputShapes, outIndices, config, cpuParams) = obj.param;
 
         std::ostringstream result;
         result << "IS=";
@@ -45,7 +49,12 @@ class SplitLayerCPUTest : public testing::WithParamInterface<splitCPUTestParams>
             result << "outIndices" << CommonTestUtils::vec2str(outIndices) << "_";
         }
         result << "netPRC=" << netPrecision << "_";
+        for (auto const& configItem : config) {
+                result << "_configItem=" << configItem.first << "_";
+                configItem.second.print(result);
+        }
         result << CPUTestsBase::getTestCaseName(cpuParams);
+
         return result.str();
     }
 
@@ -56,9 +65,9 @@ class SplitLayerCPUTest : public testing::WithParamInterface<splitCPUTestParams>
         size_t axis, numSplits;
         ElementType netPrecision;
         InputShape inputShapes;
-        InferenceEngine::SizeVector outIndices;
+        SizeVector outIndices;
         CPUSpecificParams cpuParams;
-        std::tie(numSplits, axis, netPrecision, inputShapes, outIndices, cpuParams) = this->GetParam();
+        std::tie(numSplits, axis, netPrecision, inputShapes, outIndices, configuration, cpuParams) = this->GetParam();
         if (outIndices.empty()) {
             for (size_t i = 0; i < numSplits; ++i) {
                 outIndices.push_back(i);
@@ -66,27 +75,37 @@ class SplitLayerCPUTest : public testing::WithParamInterface<splitCPUTestParams>
         }
 
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
-        selectedType += std::string("_") + InferenceEngine::details::convertPrecision(netPrecision).name();
+
+        if (netPrecision == ElementType::i64) {
+            auto i64Flag = configuration.find(PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
+            if (i64Flag == configuration.end() || i64Flag->second == PluginConfigParams::NO) {
+                selectedType = makeSelectedTypeStr(selectedType, ElementType::i32);
+            } else {
+                selectedType = makeSelectedTypeStr(selectedType, ElementType::i64);
+            }
+        } else {
+            selectedType = makeSelectedTypeStr(selectedType, netPrecision);
+        }
 
         init_input_shapes({inputShapes});
 
         auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes);
         auto paramOuts = ngraph::helpers::convert2OutputVector(
-                ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
-        auto split = std::dynamic_pointer_cast<ngraph::opset5::Split>(ngraph::builder::makeSplit(paramOuts[0],
+                ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(params));
+        auto split = std::dynamic_pointer_cast<ov::op::v1::Split>(ngraph::builder::makeSplit(paramOuts[0],
                                                                                                  netPrecision, numSplits, axis));
-        ngraph::ResultVector results;
+        ov::ResultVector results;
 
         for (size_t i = 0; i < outIndices.size(); i++) {
             // This WA is necessary because result nodes connected to the same output of the split node (or any node) are deduplicated
-            // on the CNNNetwork level. It might not be needed when the CPU plugin moves completely to nGraph.
+            // on the CNNNetwork level. It might not be needed when the CPU plugin moves completely to Core model.
             // This is still a single layer test since the Relu nodes are added only as a WA.
 
-            auto fakeEltwise = std::make_shared<ngraph::opset5::Relu>(split->output(outIndices[i]));
-            results.push_back(std::make_shared<ngraph::opset5::Result>(fakeEltwise));
+            auto fakeEltwise = std::make_shared<ov::op::v0::Relu>(split->output(outIndices[i]));
+            results.push_back(std::make_shared<ov::op::v0::Result>(fakeEltwise));
         }
         split->get_rt_info() = getCPUInfo();
-        function = std::make_shared<ngraph::Function>(results, params, "split");
+        function = std::make_shared<ov::Model>(results, params, "split");
     }
 };
 
@@ -120,6 +139,9 @@ const auto blocked16_5D = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {}, "unknown
 const auto blocked16_4D_ref = CPUSpecificParams{{nChw16c}, {nChw16c}, {}, "ref"};
 const auto blocked16_5D_ref = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {}, "ref"};
 
+ov::AnyMap additional_config = {};
+ov::AnyMap i64Config = {{PluginConfigInternalParams::KEY_CPU_NATIVE_I64, PluginConfigParams::YES}};
+
 // List of precisions natively supported by onednn.
 const std::vector<ElementType> netPrecisions = {
         ElementType::i8,
@@ -172,6 +194,18 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Nspc2NcspSpecial, SplitLayerCPUTest,
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::ValuesIn(inputShapes4D_Nspc2NcspSpecial),
                                 ::testing::ValuesIn(outIndices4),
+                                ::testing::Values(additional_config),
+                                ::testing::Values(perChannelsToPlanar_4D)),
+                        SplitLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Nspc2NcspSpecial_I64, SplitLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Values(4),
+                                ::testing::Values(1),
+                                ::testing::Values(ElementType::i64),
+                                ::testing::ValuesIn(inputShapes4D_Nspc2NcspSpecial),
+                                ::testing::ValuesIn(outIndices4),
+                                ::testing::Values(i64Config),
                                 ::testing::Values(perChannelsToPlanar_4D)),
                         SplitLayerCPUTest::getTestCaseName);
 
@@ -206,6 +240,18 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Nspc2NcspSpecial, SplitLayerCPUTest,
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::ValuesIn(inputShapes5D_Nspc2NcspSpecial),
                                 ::testing::ValuesIn(outIndices3),
+                                ::testing::Values(additional_config),
+                                ::testing::Values(perChannelsToPlanar_5D)),
+                        SplitLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Nspc2NcspSpecial_I64, SplitLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Values(3),
+                                ::testing::Values(1),
+                                ::testing::Values(ElementType::i64),
+                                ::testing::ValuesIn(inputShapes5D_Nspc2NcspSpecial),
+                                ::testing::ValuesIn(outIndices3),
+                                ::testing::Values(i64Config),
                                 ::testing::Values(perChannelsToPlanar_5D)),
                         SplitLayerCPUTest::getTestCaseName);
 
@@ -249,6 +295,18 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_planar, SplitLayerCPUTest,
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::ValuesIn(inputShapes4D_planar),
                                 ::testing::ValuesIn(outIndices3),
+                                ::testing::Values(additional_config),
+                                ::testing::Values(planar_4D_ref, perChannels_4D)),
+                        SplitLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_planar_I64, SplitLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Values(3),
+                                ::testing::Values(2, 3),
+                                ::testing::Values(ElementType::i64),
+                                ::testing::ValuesIn(inputShapes4D_planar),
+                                ::testing::ValuesIn(outIndices3),
+                                ::testing::Values(i64Config),
                                 ::testing::Values(planar_4D_ref, perChannels_4D)),
                         SplitLayerCPUTest::getTestCaseName);
 
@@ -292,6 +350,18 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block8, SplitLayerCPUTest,
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::ValuesIn(inputShapes4D_block),
                                 ::testing::ValuesIn(outIndices3),
+                                ::testing::Values(additional_config),
+                                ::testing::Values(blocked8_4D_ref)),
+                        SplitLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block8_I64, SplitLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Values(3),
+                                ::testing::Values(2, 3),
+                                ::testing::Values(ElementType::i64),
+                                ::testing::ValuesIn(inputShapes4D_block),
+                                ::testing::ValuesIn(outIndices3),
+                                ::testing::Values(i64Config),
                                 ::testing::Values(blocked8_4D_ref)),
                         SplitLayerCPUTest::getTestCaseName);
 
@@ -302,6 +372,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block16, SplitLayerCPUTest,
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::ValuesIn(inputShapes4D_block),
                                 ::testing::ValuesIn(outIndices4),
+                                ::testing::Values(additional_config),
                                 ::testing::Values(blocked16_4D_ref)),
                         SplitLayerCPUTest::getTestCaseName);
 
@@ -336,6 +407,18 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_planar, SplitLayerCPUTest,
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::ValuesIn(inputShapes5D_planar),
                                 ::testing::ValuesIn(outIndices3),
+                                ::testing::Values(additional_config),
+                                ::testing::Values(planar_5D_ref, perChannels_5D)),
+                        SplitLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_planar_I64, SplitLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Values(3),
+                                ::testing::Values(2, 3, 4),
+                                ::testing::Values(ElementType::i64),
+                                ::testing::ValuesIn(inputShapes5D_planar),
+                                ::testing::ValuesIn(outIndices3),
+                                ::testing::Values(i64Config),
                                 ::testing::Values(planar_5D_ref, perChannels_5D)),
                         SplitLayerCPUTest::getTestCaseName);
 
@@ -370,6 +453,18 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block8, SplitLayerCPUTest,
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::ValuesIn(inputShapes5D_block),
                                 ::testing::ValuesIn(outIndices3),
+                                ::testing::Values(additional_config),
+                                ::testing::Values(blocked8_5D_ref)),
+                        SplitLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block8_I64, SplitLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Values(3),
+                                ::testing::Values(2, 3, 4),
+                                ::testing::Values(ElementType::i64),
+                                ::testing::ValuesIn(inputShapes5D_block),
+                                ::testing::ValuesIn(outIndices3),
+                                ::testing::Values(i64Config),
                                 ::testing::Values(blocked8_5D_ref)),
                         SplitLayerCPUTest::getTestCaseName);
 
@@ -380,6 +475,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block16, SplitLayerCPUTest,
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::ValuesIn(inputShapes5D_block),
                                 ::testing::ValuesIn(outIndices4),
+                                ::testing::Values(additional_config),
                                 ::testing::Values(blocked16_5D_ref)),
                         SplitLayerCPUTest::getTestCaseName);
 
@@ -414,6 +510,18 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split3D, SplitLayerCPUTest,
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::ValuesIn(inputShapes3D),
                                 ::testing::Values(std::vector<size_t>({})),
+                                ::testing::Values(additional_config),
+                                ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
+                                SplitLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Split3D_I64, SplitLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Values(7),
+                                ::testing::Values(1, 2),
+                                ::testing::Values(ElementType::i64),
+                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::Values(std::vector<size_t>({})),
+                                ::testing::Values(i64Config),
                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                                 SplitLayerCPUTest::getTestCaseName);
 
@@ -448,6 +556,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split2D, SplitLayerCPUTest,
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::ValuesIn(inputShapes2D),
                                 ::testing::Values(std::vector<size_t>({})),
+                                ::testing::Values(additional_config),
                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                         SplitLayerCPUTest::getTestCaseName);
 
@@ -458,8 +567,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split1D_static, SplitLayerCPUTest,
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::Values(InputShape{ {}, {{10}} }),
                                 ::testing::Values(std::vector<size_t>({})),
+                                ::testing::Values(additional_config),
                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"})),
-                            SplitLayerCPUTest::getTestCaseName);
+                        SplitLayerCPUTest::getTestCaseName);
 
 const std::vector<InputShape> inputShapes1D = {
         {
@@ -491,6 +601,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split1D, SplitLayerCPUTest,
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::ValuesIn(inputShapes1D),
                                 ::testing::Values(std::vector<size_t>({})),
+                                ::testing::Values(additional_config),
                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                             SplitLayerCPUTest::getTestCaseName);
 
@@ -513,6 +624,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_by_batch, SplitLayerCPUTest,
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::ValuesIn(inputShapes4D_dynBatch),
                                 ::testing::ValuesIn(outIndices3),
+                                ::testing::Values(additional_config),
                                 ::testing::Values(planar_4D_ref, perChannels_4D)),
                         SplitLayerCPUTest::getTestCaseName);
 
@@ -557,6 +669,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split_CPU_planar_inPlace_0, SplitLayerCPUTest,
                             ::testing::Values(ElementType::f32),
                             ::testing::ValuesIn(inputShapes4D_inPlace_0),
                             ::testing::Values(std::vector<size_t>{}),
+                            ::testing::Values(additional_config),
                             ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"})),
                     SplitLayerCPUTest::getTestCaseName);
 
@@ -573,24 +686,26 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block8inPlace_1, SplitLayerCPUTest,
                                                               {1, 32, 5, 8}
                                                            } }),
                             ::testing::ValuesIn(outIndices4),
+                            ::testing::Values(additional_config),
                             ::testing::Values(planar_4D, blocked8_4D)),
                     SplitLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block16inPlace_1, SplitLayerCPUTest,
-                        ::testing::Combine(
-                                ::testing::Values(3),
-                                ::testing::Values(1),
-                                ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InputShape{ {}, {{1, 48, 5, 6, 3}} },
-                                              InputShape{ {1, 48, -1, -1, 3},
-                                                          {
-                                                              {1, 48, 5, 6, 3},
-                                                              {1, 48, 5, 2, 3},
-                                                              {1, 48, 5, 8, 3}
-                                                           } }),
-                                ::testing::ValuesIn(outIndices3),
-                                ::testing::Values(planar_5D, blocked16_5D)),
-                        SplitLayerCPUTest::getTestCaseName);
+                    ::testing::Combine(
+                            ::testing::Values(3),
+                            ::testing::Values(1),
+                            ::testing::ValuesIn(netPrecisions),
+                            ::testing::Values(InputShape{ {}, {{1, 48, 5, 6, 3}} },
+                                            InputShape{ {1, 48, -1, -1, 3},
+                                                        {
+                                                            {1, 48, 5, 6, 3},
+                                                            {1, 48, 5, 2, 3},
+                                                            {1, 48, 5, 8, 3}
+                                                        } }),
+                            ::testing::ValuesIn(outIndices3),
+                            ::testing::Values(additional_config),
+                            ::testing::Values(planar_5D, blocked16_5D)),
+                    SplitLayerCPUTest::getTestCaseName);
 
 } // namespace
 
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/strided_slice.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/strided_slice.cpp
index 310980153133d7..92f33df8353878 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/strided_slice.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/strided_slice.cpp
@@ -7,6 +7,7 @@
 #include "test_utils/cpu_test_utils.hpp"
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include <common_test_utils/ov_tensor_utils.hpp>
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
@@ -32,6 +33,7 @@ typedef std::tuple<
         StridedSliceParams,
         ngraph::helpers::InputLayerType,    // Secondary input types
         ElementType,                        // Element type
+        ov::AnyMap,                         // Additional network configuration
         CPUSpecificParams> StridedSliceLayerCPUTestParamSet;
 
 class StridedSliceLayerCPUTest : public testing::WithParamInterface<StridedSliceLayerCPUTestParamSet>,
@@ -43,7 +45,8 @@ class StridedSliceLayerCPUTest : public testing::WithParamInterface<StridedSlice
         ngraph::helpers::InputLayerType secondaryInputType;
         ElementType dataType;
         CPUSpecificParams cpuParams;
-        std::tie(shapes, params, secondaryInputType, dataType, cpuParams) = obj.param;
+        ov::AnyMap additionalConfig;
+        std::tie(shapes, params, secondaryInputType, dataType, additionalConfig, cpuParams) = obj.param;
 
         std::ostringstream results;
         results << "IS=" << CommonTestUtils::partialShape2str({shapes.first}) << "_";
@@ -61,13 +64,22 @@ class StridedSliceLayerCPUTest : public testing::WithParamInterface<StridedSlice
         results << "new_axis_m=" << (params.newAxisMask.empty() ? "def" : CommonTestUtils::vec2str(params.newAxisMask)) << "_";
         results << "shrink_m=" << (params.shrinkAxisMask.empty() ? "def" : CommonTestUtils::vec2str(params.shrinkAxisMask)) << "_";
         results << "ellipsis_m=" << (params.ellipsisAxisMask.empty() ? "def" : CommonTestUtils::vec2str(params.ellipsisAxisMask)) << "_";
+
+        if (!additionalConfig.empty()) {
+            results << "_PluginConf";
+            for (auto &item : additionalConfig) {
+                results << "_" << item.first << "=";
+                item.second.print(results);
+            }
+        }
+
         results << CPUTestsBase::getTestCaseName(cpuParams);
 
         return results.str();
     }
 
 protected:
-    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
+    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
         std::vector<void*> inputValues = {ssParams.begin.data(), ssParams.end.data(), ssParams.strides.data()};
 
         inputs.clear();
@@ -88,11 +100,22 @@ class StridedSliceLayerCPUTest : public testing::WithParamInterface<StridedSlice
         InputShape shapes;
         ngraph::helpers::InputLayerType secondaryInputType;
         CPUSpecificParams cpuParams;
-        ov::element::Type dataType;
-        std::tie(shapes, ssParams, secondaryInputType, dataType, cpuParams) = this->GetParam();
+        ElementType dataType;
+        std::tie(shapes, ssParams, secondaryInputType, dataType, configuration, cpuParams) = this->GetParam();
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
 
-        selectedType = makeSelectedTypeStr("ref", dataType);
+        selectedType = "ref";
+        if (dataType == ElementType::i64 || dataType == ElementType::u64) {
+            auto i64It = configuration.find(PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
+            if (i64It == configuration.end() || i64It->second == PluginConfigParams::NO) {
+                selectedType = makeSelectedTypeStr(selectedType, ElementType::i32);
+            } else {
+                selectedType = makeSelectedTypeStr(selectedType, ElementType::i64);
+            }
+        } else {
+            selectedType = makeSelectedTypeStr(selectedType, dataType);
+        }
+
         targetDevice = CommonTestUtils::DEVICE_CPU;
         std::vector<InputShape> input_shapes = {shapes};
 
@@ -104,17 +127,17 @@ class StridedSliceLayerCPUTest : public testing::WithParamInterface<StridedSlice
         }
 
         auto params = ngraph::builder::makeDynamicParams(dataType, inputDynamicShapes);
-        std::shared_ptr<ngraph::Node> ss;
+        std::shared_ptr<ov::Node> ss;
         if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) {
             ov::Shape inShape = {ssParams.begin.size()};
 
-            auto beginNode = std::make_shared<ngraph::opset1::Parameter>(ov::element::i64, inShape);
-            auto endNode = std::make_shared<ngraph::opset1::Parameter>(ov::element::i64, inShape);
-            auto strideNode = std::make_shared<ngraph::opset1::Parameter>(ov::element::i64, inShape);
+            auto beginNode = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, inShape);
+            auto endNode = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, inShape);
+            auto strideNode = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, inShape);
 
-            params.push_back(std::dynamic_pointer_cast<ngraph::opset3::Parameter>(beginNode));
-            params.push_back(std::dynamic_pointer_cast<ngraph::opset3::Parameter>(endNode));
-            params.push_back(std::dynamic_pointer_cast<ngraph::opset3::Parameter>(strideNode));
+            params.push_back(beginNode);
+            params.push_back(std::dynamic_pointer_cast<ov::op::v0::Parameter>(endNode));
+            params.push_back(std::dynamic_pointer_cast<ov::op::v0::Parameter>(strideNode));
 
             ss = ngraph::builder::makeStridedSlice(params[0], beginNode, endNode, strideNode, inType, ssParams.beginMask,
                                                    ssParams.endMask, ssParams.newAxisMask, ssParams.shrinkAxisMask, ssParams.ellipsisAxisMask);
@@ -150,7 +173,8 @@ const auto cpuParams_ncdhw = CPUSpecificParams {{ncdhw}, {ncdhw}, {}, {}};
 const std::vector<ElementType> inputPrecisions = {
         ElementType::f32,
         ElementType::bf16,
-        ElementType::i8
+        ElementType::i8,
+        ElementType::i64
 };
 
 const std::vector<ngraph::helpers::InputLayerType> inputLayerTypes = {
@@ -178,12 +202,29 @@ const std::vector<StridedSliceParams> paramsPlain2D = {
         StridedSliceParams{ { 2 }, { 22 }, { 2 }, { 0 }, { 0 },  { },  { },  { } },
 };
 
+const ov::AnyMap additionalConfig;
+const std::vector<ov::AnyMap> i64Config = {
+        {{InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64, InferenceEngine::PluginConfigParams::YES}},
+        {{InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64, InferenceEngine::PluginConfigParams::NO}}
+};
+
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Static_2D, StridedSliceLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::ValuesIn(static_shapes_to_test_representation({{32, 20}})),
                                  ::testing::ValuesIn(paramsPlain2D),
                                  ::testing::ValuesIn(inputLayerTypes),
                                  ::testing::ValuesIn(inputPrecisions),
+                                 ::testing::Values(additionalConfig),
+                                 ::testing::Values(emptyCPUSpec)),
+                         StridedSliceLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Static_2D_I64, StridedSliceLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(static_shapes_to_test_representation({{32, 20}})),
+                                 ::testing::ValuesIn(paramsPlain2D),
+                                 ::testing::ValuesIn(inputLayerTypes),
+                                 ::testing::Values(ElementType::i64),
+                                 ::testing::ValuesIn(i64Config),
                                  ::testing::Values(emptyCPUSpec)),
                          StridedSliceLayerCPUTest::getTestCaseName);
 
@@ -193,6 +234,17 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Dynamic_2D, StridedSliceLay
                              ::testing::ValuesIn(paramsPlain2D),
                              ::testing::ValuesIn(inputLayerTypes),
                              ::testing::ValuesIn(inputPrecisions),
+                             ::testing::Values(additionalConfig),
+                             ::testing::Values(emptyCPUSpec)),
+                         StridedSliceLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Dynamic_2D_I64, StridedSliceLayerCPUTest,
+                         ::testing::Combine(
+                             ::testing::ValuesIn(inputShapesDynamic2D),
+                             ::testing::ValuesIn(paramsPlain2D),
+                             ::testing::ValuesIn(inputLayerTypes),
+                             ::testing::Values(ElementType::i64),
+                             ::testing::ValuesIn(i64Config),
                              ::testing::Values(emptyCPUSpec)),
                          StridedSliceLayerCPUTest::getTestCaseName);
 
@@ -234,6 +286,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_4D, StridedSliceLay
                                  ::testing::ValuesIn(testCasesCommon4D),
                                  ::testing::ValuesIn(inputLayerTypes),
                                  ::testing::ValuesIn(inputPrecisions),
+                                 ::testing::Values(additionalConfig),
                                  ::testing::ValuesIn(CPUParamsCommon4D)),
                          StridedSliceLayerCPUTest::getTestCaseName);
 
@@ -243,6 +296,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_4D, StridedSliceLa
                              ::testing::ValuesIn(testCasesCommon4D),
                              ::testing::ValuesIn(inputLayerTypes),
                              ::testing::ValuesIn(inputPrecisions),
+                             ::testing::Values(additionalConfig),
                              ::testing::ValuesIn(CPUParamsCommon4D)),
                          StridedSliceLayerCPUTest::getTestCaseName);
 
@@ -306,6 +360,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_4D_Subset1, Strided
                                  ::testing::ValuesIn(testCasesBlocked4DSubset1),
                                  ::testing::ValuesIn(inputLayerTypesBlocked),
                                  ::testing::ValuesIn(inputPrecisions),
+                                 ::testing::Values(additionalConfig),
                                  ::testing::ValuesIn(CPUParamsBlocked4D)),
                          StridedSliceLayerCPUTest::getTestCaseName);
 
@@ -315,6 +370,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_4D_Subset1, Stride
                                  ::testing::ValuesIn(testCasesBlocked4DSubset1),
                                  ::testing::ValuesIn(inputLayerTypesBlocked),
                                  ::testing::ValuesIn(inputPrecisions),
+                                 ::testing::Values(additionalConfig),
                                  ::testing::ValuesIn(CPUParamsBlocked4D)),
                          StridedSliceLayerCPUTest::getTestCaseName);
 
@@ -324,6 +380,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_4D_Subset2, Strided
                                  ::testing::ValuesIn(testCasesBlocked4DSubset2),
                                  ::testing::ValuesIn(inputLayerTypesBlocked),
                                  ::testing::ValuesIn(inputPrecisions),
+                                 ::testing::Values(additionalConfig),
                                  ::testing::ValuesIn(CPUParamsBlocked4D)),
                          StridedSliceLayerCPUTest::getTestCaseName);
 
@@ -333,6 +390,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_4D_Subset2, Stride
                                  ::testing::ValuesIn(testCasesBlocked4DSubset2),
                                  ::testing::ValuesIn(inputLayerTypesBlocked),
                                  ::testing::ValuesIn(inputPrecisions),
+                                 ::testing::Values(additionalConfig),
                                  ::testing::ValuesIn(CPUParamsBlocked4D)),
                          StridedSliceLayerCPUTest::getTestCaseName);
 
@@ -373,6 +431,17 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_5D, StridedSliceLay
                                  ::testing::ValuesIn(testCasesCommon5D),
                                  ::testing::ValuesIn(inputLayerTypes),
                                  ::testing::ValuesIn(inputPrecisions),
+                                 ::testing::Values(additionalConfig),
+                                 ::testing::ValuesIn(CPUParamsCommon5D)),
+                        StridedSliceLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_5D_I64, StridedSliceLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesStatic5D)),
+                                 ::testing::ValuesIn(testCasesCommon5D),
+                                 ::testing::ValuesIn(inputLayerTypes),
+                                 ::testing::Values(ElementType::i64),
+                                 ::testing::ValuesIn(i64Config),
                                  ::testing::ValuesIn(CPUParamsCommon5D)),
                         StridedSliceLayerCPUTest::getTestCaseName);
 
@@ -382,6 +451,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_5D, StridedSliceLa
                                  ::testing::ValuesIn(testCasesCommon5D),
                                  ::testing::ValuesIn(inputLayerTypes),
                                  ::testing::ValuesIn(inputPrecisions),
+                                 ::testing::Values(additionalConfig),
                                  ::testing::ValuesIn(CPUParamsCommon5D)),
                          StridedSliceLayerCPUTest::getTestCaseName);
 
@@ -444,6 +514,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_5D_Subset1, Strided
                                  ::testing::ValuesIn(testCasesBlocked5DSubset1),
                                  ::testing::ValuesIn(inputLayerTypesBlocked),
                                  ::testing::ValuesIn(inputPrecisions),
+                                 ::testing::Values(additionalConfig),
                                  ::testing::ValuesIn(CPUParamsBlocked5D)),
                          StridedSliceLayerCPUTest::getTestCaseName);
 
@@ -453,6 +524,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_5D_Subset1, Stride
                                  ::testing::ValuesIn(testCasesBlocked5DSubset1),
                                  ::testing::ValuesIn(inputLayerTypesBlocked),
                                  ::testing::ValuesIn(inputPrecisions),
+                                 ::testing::Values(additionalConfig),
                                  ::testing::ValuesIn(CPUParamsBlocked5D)),
                          StridedSliceLayerCPUTest::getTestCaseName);
 
@@ -462,6 +534,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_5D_Subset2, Strided
                                  ::testing::ValuesIn(testCasesBlocked4DSubset2),
                                  ::testing::ValuesIn(inputLayerTypesBlocked),
                                  ::testing::ValuesIn(inputPrecisions),
+                                 ::testing::Values(additionalConfig),
                                  ::testing::ValuesIn(CPUParamsBlocked4D)),
                          StridedSliceLayerCPUTest::getTestCaseName);
 
@@ -471,6 +544,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_5D_Subset2, Stride
                                  ::testing::ValuesIn(testCasesBlocked5DSubset2),
                                  ::testing::ValuesIn(inputLayerTypesBlocked),
                                  ::testing::ValuesIn(inputPrecisions),
+                                 ::testing::Values(additionalConfig),
                                  ::testing::ValuesIn(CPUParamsBlocked5D)),
                          StridedSliceLayerCPUTest::getTestCaseName);
 
@@ -502,6 +576,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_StridedSliceLayerDescriptorCPUTest, StridedSliceL
                                  ::testing::ValuesIn(testCasesDescriptors),
                                  ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                                  ::testing::Values(ElementType::f32),
+                                 ::testing::Values(additionalConfig),
                                  ::testing::Values(cpuParams_nChw8c)),
                          StridedSliceLayerDescriptorCPUTest::getTestCaseName);
 
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/tile.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/tile.cpp
index db6c874ba660a0..de4c741df543b3 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/tile.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/tile.cpp
@@ -6,36 +6,38 @@
 #include "ngraph_functions/builders.hpp"
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include <common_test_utils/ov_tensor_utils.hpp>
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
 using namespace CPUTestUtils;
+using namespace ov::test;
 
 namespace CPULayerTestsDefinitions {
 
 using TileLayerTestParamsSet = typename std::tuple<
-        std::vector<ov::test::InputShape>,     // Input shapes
-        std::vector<int64_t>,                  // Repeats
-        ov::element::Type_t,                   // Network precision
-        bool,                                  // Is Repeats input constant
-        std::string>;                          // Device name
+        std::vector<InputShape>,       // Input shapes
+        std::vector<int64_t>,          // Repeats
+        ElementType,                   // Network precision
+        bool,                          // Is Repeats input constant
+        ov::AnyMap>;                   // Additional network configuration
 
 typedef std::tuple<
         TileLayerTestParamsSet,
         CPUSpecificParams> TileLayerCPUTestParamsSet;
 
 class TileLayerCPUTest : public testing::WithParamInterface<TileLayerCPUTestParamsSet>,
-                         virtual public ov::test::SubgraphBaseTest, public CPUTestsBase {
+                         virtual public SubgraphBaseTest, public CPUTestsBase {
 public:
     static std::string getTestCaseName(testing::TestParamInfo<TileLayerCPUTestParamsSet> obj) {
         TileLayerTestParamsSet basicParamsSet;
         CPUSpecificParams cpuParams;
         std::tie(basicParamsSet, cpuParams) = obj.param;
 
-        std::vector<ov::test::InputShape> inputShapes;
+        std::vector<InputShape> inputShapes;
         std::vector<int64_t> repeats;
-        ov::element::Type_t netPrecision;
+        ElementType netPrecision;
         bool isRepeatsConst;
-        std::string deviceName;
-        std::tie(inputShapes, repeats, netPrecision, isRepeatsConst, deviceName) = basicParamsSet;
+        ov::AnyMap config;
+        std::tie(inputShapes, repeats, netPrecision, isRepeatsConst, config) = basicParamsSet;
 
         std::ostringstream result;
         result << "IS=(";
@@ -51,7 +53,11 @@ class TileLayerCPUTest : public testing::WithParamInterface<TileLayerCPUTestPara
         result << "Repeats=" << CommonTestUtils::vec2str(repeats)  << "_";
         result << "netPrec=" << netPrecision << "_";
         result << "constRepeats=" << (isRepeatsConst ? "True" : "False") << "_";
-        result << "trgDev=" << deviceName;
+
+        for (auto const& configItem : config) {
+            result << "_configItem=" << configItem.first << "_";
+            configItem.second.print(result);
+        }
 
         result << CPUTestsBase::getTestCaseName(cpuParams);
 
@@ -60,18 +66,29 @@ class TileLayerCPUTest : public testing::WithParamInterface<TileLayerCPUTestPara
 
 protected:
     void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+
         TileLayerTestParamsSet basicParamsSet;
         CPUSpecificParams cpuParams;
         std::tie(basicParamsSet, cpuParams) = this->GetParam();
 
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
 
-        std::vector<ov::test::InputShape> inputShapes;
-        ov::element::Type_t netPrecision;
+        std::vector<InputShape> inputShapes;
+        ElementType netPrecision;
         bool isRepeatsConst;
-        std::tie(inputShapes, repeatsData, netPrecision, isRepeatsConst, targetDevice) = basicParamsSet;
+        std::tie(inputShapes, repeatsData, netPrecision, isRepeatsConst, configuration) = basicParamsSet;
 
-        selectedType += std::string("_") + InferenceEngine::details::convertPrecision(netPrecision).name();
+        if (netPrecision == ElementType::i64 || netPrecision == ElementType::u64) {
+            auto i64Flag = configuration.find(InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
+            if (i64Flag == configuration.end() || i64Flag->second == InferenceEngine::PluginConfigParams::NO) {
+                selectedType = makeSelectedTypeStr(selectedType, ElementType::i32);
+            } else {
+                selectedType = makeSelectedTypeStr(selectedType, ElementType::i64);
+            }
+        } else {
+            selectedType = makeSelectedTypeStr(selectedType, netPrecision);
+        }
 
         if (inputShapes.front().first.rank() != 0) {
             inputDynamicShapes.push_back(inputShapes.front().first);
@@ -124,10 +141,10 @@ class TileLayerCPUTest : public testing::WithParamInterface<TileLayerCPUTestPara
                 }
             } else {
                 if (funcInput.get_element_type().is_real()) {
-                    tensor = ov::test::utils::create_and_fill_tensor(
+                    tensor = utils::create_and_fill_tensor(
                         funcInput.get_element_type(), targetInputStaticShapes[i], 10, 0, 1000);
                 } else {
-                    tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
+                    tensor = utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
                 }
             }
             inputs.insert({funcInput.get_node_shared_ptr(), tensor});
@@ -159,14 +176,15 @@ const auto cpuParams_ndhwc = CPUSpecificParams{{ndhwc}, {ndhwc}, {}, "ref"};
 /* ========== */
 
 /* PARAMS */
-const std::vector<ov::element::Type_t> netPrecisions = {
+const std::vector<ElementType> netPrecisions = {
     ov::element::f32,
     ov::element::bf16,
     ov::element::i32,
-    ov::element::i8
+    ov::element::i8,
+    ov::element::i64
 };
 
-const std::vector<std::vector<ov::test::InputShape>> staticInputShapes4D = {
+const std::vector<std::vector<InputShape>> staticInputShapes4D = {
     {
         {{},
             { // Static shapes
@@ -182,7 +200,7 @@ const std::vector<std::vector<ov::test::InputShape>> staticInputShapes4D = {
         }
     }
 };
-const std::vector<std::vector<ov::test::InputShape>> dynamicInputShapes4D = {
+const std::vector<std::vector<InputShape>> dynamicInputShapes4D = {
     {
         { // Origin dynamic shapes
             {ov::Dimension(1, 20), ov::Dimension(10, 20), ov::Dimension(1, 20), ov::Dimension(1, 20)},
@@ -204,7 +222,7 @@ const std::vector<std::vector<ov::test::InputShape>> dynamicInputShapes4D = {
     }
 };
 
-const std::vector<std::vector<ov::test::InputShape>> staticInputShapes5D = {
+const std::vector<std::vector<InputShape>> staticInputShapes5D = {
     {
         {{},
             { // Static shapes
@@ -213,7 +231,7 @@ const std::vector<std::vector<ov::test::InputShape>> staticInputShapes5D = {
         }
     }
 };
-const std::vector<std::vector<ov::test::InputShape>> dynamicInputShapes5D = {
+const std::vector<std::vector<InputShape>> dynamicInputShapes5D = {
     {
         { // Origin dynamic shapes
             {ov::Dimension(1, 20), ov::Dimension(1, 20), ov::Dimension(1, 20), ov::Dimension(1, 20), ov::Dimension(1, 70)},
@@ -267,6 +285,12 @@ const std::vector<CPUSpecificParams> CPUParams5D = {
         cpuParams_nCdhw8c,
         cpuParams_ndhwc,
 };
+
+const ov::AnyMap additionalConfig = {};
+const std::vector<ov::AnyMap> i64Config = {
+        {{InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64, InferenceEngine::PluginConfigParams::YES}},
+        {{InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64, InferenceEngine::PluginConfigParams::NO}}
+};
 /* ============= */
 
 /* INSTANCES */
@@ -277,7 +301,18 @@ INSTANTIATE_TEST_CASE_P(smoke_StaticShape4D, TileLayerCPUTest,
                                         ::testing::ValuesIn(repeats4D),
                                         ::testing::ValuesIn(netPrecisions),
                                         ::testing::Values(true),
-                                        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                        ::testing::Values(additionalConfig)),
+                                ::testing::ValuesIn(CPUParams4D)),
+                        TileLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_StaticShape4D_I64, TileLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Combine(
+                                        ::testing::ValuesIn(staticInputShapes4D),
+                                        ::testing::ValuesIn(repeats4D),
+                                        ::testing::Values(ElementType::i64),
+                                        ::testing::Values(true),
+                                        ::testing::ValuesIn(i64Config)),
                                 ::testing::ValuesIn(CPUParams4D)),
                         TileLayerCPUTest::getTestCaseName);
 
@@ -288,11 +323,22 @@ INSTANTIATE_TEST_CASE_P(smoke_DynamicShape4D, TileLayerCPUTest,
                                         ::testing::ValuesIn(repeats4D),
                                         ::testing::ValuesIn(netPrecisions),
                                         ::testing::Values(true, false),
-                                        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                        ::testing::Values(additionalConfig)),
                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                         TileLayerCPUTest::getTestCaseName);
 
-const std::vector<std::vector<ov::test::InputShape>> dynBatchInputShapes4D = {
+INSTANTIATE_TEST_CASE_P(smoke_DynamicShape4D_I64, TileLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Combine(
+                                        ::testing::ValuesIn(dynamicInputShapes4D),
+                                        ::testing::ValuesIn(repeats4D),
+                                        ::testing::Values(ElementType::i64),
+                                        ::testing::Values(true, false),
+                                        ::testing::ValuesIn(i64Config)),
+                                ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
+                        TileLayerCPUTest::getTestCaseName);
+
+const std::vector<std::vector<InputShape>> dynBatchInputShapes4D = {
     { // Origin dynamic shapes
         {
             {{1, 20}, 16, 3, 4},
@@ -312,7 +358,7 @@ INSTANTIATE_TEST_CASE_P(smoke_DynBatch4D, TileLayerCPUTest,
                                     ::testing::Values(std::vector<int64_t>{1, 2, 1, 3}),
                                     ::testing::ValuesIn(netPrecisions),
                                     ::testing::Values(true),
-                                    ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                    ::testing::Values(additionalConfig)),
                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                         TileLayerCPUTest::getTestCaseName);
 
@@ -323,7 +369,18 @@ INSTANTIATE_TEST_CASE_P(smoke_StaticShape5D, TileLayerCPUTest,
                                         ::testing::ValuesIn(repeats5D),
                                         ::testing::ValuesIn(netPrecisions),
                                         ::testing::Values(true),
-                                        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                        ::testing::Values(additionalConfig)),
+                                ::testing::ValuesIn(CPUParams5D)),
+                        TileLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_StaticShape5D_I64, TileLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Combine(
+                                        ::testing::ValuesIn(staticInputShapes5D),
+                                        ::testing::ValuesIn(repeats5D),
+                                        ::testing::Values(ElementType::i64),
+                                        ::testing::Values(true),
+                                        ::testing::ValuesIn(i64Config)),
                                 ::testing::ValuesIn(CPUParams5D)),
                         TileLayerCPUTest::getTestCaseName);
 
@@ -334,7 +391,7 @@ INSTANTIATE_TEST_CASE_P(smoke_DynamicShape5D, TileLayerCPUTest,
                                         ::testing::ValuesIn(repeats5D),
                                         ::testing::ValuesIn(netPrecisions),
                                         ::testing::Values(true, false),
-                                        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                        ::testing::Values(additionalConfig)),
                                 ::testing::Values(CPUSpecificParams{{}, {}, {}, "ref"})),
                         TileLayerCPUTest::getTestCaseName);
 /* ========= */
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/topk.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/topk.cpp
index 5026bd76becead..a2606a7165356c 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/topk.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/topk.cpp
@@ -6,6 +6,8 @@
 #include "test_utils/cpu_test_utils.hpp"
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include "ngraph_functions/builders.hpp"
+#include <openvino/opsets/opset1.hpp>
+#include <openvino/opsets/opset3.hpp>
 
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
@@ -88,6 +90,7 @@ class TopKLayerCPUTest : public testing::WithParamInterface<TopKLayerCPUTestPara
         std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
 
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+        selectedType = getPrimitiveType();
 
         int64_t keepK;
         SortMode mode;
@@ -98,14 +101,16 @@ class TopKLayerCPUTest : public testing::WithParamInterface<TopKLayerCPUTestPara
         sort = std::get<0>(sortTypeStable);
         stable = std::get<1>(sortTypeStable);
 
-        if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES)
-            inPrc = outPrc = netPrecision = ElementType::bf16;
-        else
-            inPrc = outPrc = netPrecision;
+        if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
+            netPrecision = ElementType::bf16; // TODO: KEY_ENFORCE_BF16 does not work?
+            selectedType = makeSelectedTypeStr(selectedType, ElementType::bf16);
+        } else if (netPrecision == ElementType::i64) {
+            selectedType = makeSelectedTypeStr(selectedType, ElementType::i32);
+        } else {
+            selectedType = makeSelectedTypeStr(selectedType, netPrecision);
+        }
         configuration.insert(additionalConfig.begin(), additionalConfig.end());
 
-        selectedType = getPrimitiveType() + "_" + InferenceEngine::details::convertPrecision(netPrecision).name();
-
         staticShape = inputShape.first.rank() == 0;
         if (staticShape) {
             init_input_shapes({inputShape});
@@ -133,12 +138,12 @@ class TopKLayerCPUTest : public testing::WithParamInterface<TopKLayerCPUTestPara
 
         topk->get_rt_info() = getCPUInfo();
 
-        ngraph::ResultVector results;
+        ov::ResultVector results;
         for (size_t i = 0; i < topk->get_output_size(); i++) {
             results.push_back(std::make_shared<ov::op::v0::Result>(topk->output(i)));
         }
 
-        function = std::make_shared<ngraph::Function>(results, params, "TopK");
+        function = std::make_shared<ov::Model>(results, params, "TopK");
     }
 
     void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
@@ -156,7 +161,7 @@ class TopKLayerCPUTest : public testing::WithParamInterface<TopKLayerCPUTestPara
         tensor = ov::test::utils::create_and_fill_tensor(funcInputs[0].get_element_type(), shape);
         size_t size = tensor.get_size();
 
-        if (netPrecision == ElementType::f32 || netPrecision == ElementType::i32) {
+        if (netPrecision == ElementType::f32 || netPrecision == ElementType::i32 || netPrecision == ElementType::i64) {
             std::vector<int> data(size);
 
             // For int32, deliberately set big numbers which are not accurately representable in fp32
@@ -170,14 +175,17 @@ class TopKLayerCPUTest : public testing::WithParamInterface<TopKLayerCPUTestPara
             std::shuffle(data.begin(), data.end(), gen);
 
             if (netPrecision == ElementType::f32) {
-                auto *rawBlobDataPtr = static_cast<float *>(tensor.data());
+                auto rawBlobDataPtr = tensor.data<float>();
                 for (size_t i = 0; i < size; ++i) {
                     rawBlobDataPtr[i] = static_cast<float>(data[i]);
                 }
-            } else {
-                auto *rawBlobDataPtr = static_cast<int32_t *>(tensor.data());
+            } else if (netPrecision == ElementType::i32) {
+                auto rawBlobDataPtr = static_cast<int32_t *>(tensor.data());
+                std::copy(data.begin(), data.end(), rawBlobDataPtr);
+            } else if (netPrecision == ElementType::i64) {
+                auto *rawBlobDataPtr = tensor.data<int64_t>();
                 for (size_t i = 0; i < size; ++i) {
-                    rawBlobDataPtr[i] = static_cast<int32_t>(data[i]);
+                    rawBlobDataPtr[i] = static_cast<int64_t>(data[i]);
                 }
             }
         } else if (netPrecision == ElementType::bf16) {
@@ -351,6 +359,21 @@ INSTANTIATE_TEST_CASE_P(smoke_TopK_int32_dynamic, TopKLayerCPUTest,
         ::testing::Values(additionalConfig[0])),
     TopKLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_CASE_P(smoke_TopK_i64, TopKLayerCPUTest,
+    ::testing::Combine(
+        ::testing::Combine(
+            ::testing::ValuesIn(k_int32),
+            ::testing::ValuesIn(axes),
+            ::testing::ValuesIn(modes),
+            ::testing::ValuesIn(sortTypeStable),
+            ::testing::Values(ElementType::i64),
+            ::testing::Values(ElementType::undefined),
+            ::testing::Values(ElementType::undefined),
+            ::testing::ValuesIn(inputShapes_int32)),
+        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams)),
+        ::testing::Values(additionalConfig[0])),
+    TopKLayerCPUTest::getTestCaseName);
+
 std::vector<ov::test::InputShape> inputShapes_bubble_BLK_on_channel_horiz = {
     {{}, {{2, 2, 2, 2}}},
 };
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/unique.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/unique.cpp
index acfac9a31278a6..e602d9ee6e32d0 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/unique.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/unique.cpp
@@ -6,6 +6,7 @@
 #include "ngraph_functions/builders.hpp"
 #include "test_utils/cpu_test_utils.hpp"
 #include <common_test_utils/ov_tensor_utils.hpp>
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 
 using namespace CPUTestUtils;
 using namespace ov::test;
@@ -18,7 +19,7 @@ typedef std::tuple<
         bool,                                // Sorted
         ElementType,                         // Data precision
         CPUSpecificParams,                   // CPU specific params
-        std::map<std::string, std::string>   // Additional config
+        ov::AnyMap                           // Additional config
 > UniqueLayerTestCPUParams;
 
 class UniqueLayerTestCPU : public testing::WithParamInterface<UniqueLayerTestCPUParams>,
@@ -30,7 +31,7 @@ class UniqueLayerTestCPU : public testing::WithParamInterface<UniqueLayerTestCPU
         bool sorted;
         ElementType dataPrecision;
         CPUSpecificParams cpuParams;
-        std::map<std::string, std::string> additionalConfig;
+        ov::AnyMap additionalConfig;
 
         std::tie(inputShapes, flatOrAxis, sorted, dataPrecision, cpuParams, additionalConfig) = obj.param;
 
@@ -59,9 +60,9 @@ class UniqueLayerTestCPU : public testing::WithParamInterface<UniqueLayerTestCPU
 
         if (!additionalConfig.empty()) {
             result << "_PluginConf";
-            for (auto &item : additionalConfig) {
-                if (item.second == InferenceEngine::PluginConfigParams::YES)
-                    result << "_" << item.first << "=" << item.second;
+            for (auto& configItem : additionalConfig) {
+                result << "_" << configItem.first << "=";
+                configItem.second.print(result);
             }
         }
 
@@ -76,7 +77,7 @@ class UniqueLayerTestCPU : public testing::WithParamInterface<UniqueLayerTestCPU
         int axis;
         ElementType dataPrecision;
         CPUSpecificParams cpuParams;
-        std::map<std::string, std::string> additionalConfig;
+        ov::AnyMap additionalConfig;
 
         std::tie(inputShapes, flatOrAxis, sorted, dataPrecision, cpuParams, additionalConfig) = this->GetParam();
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
@@ -143,15 +144,18 @@ const std::vector<ElementType> dataPrecisionSmoke = {
 };
 const std::vector<ElementType> dataPrecisionNightly = {
         ElementType::bf16,
-        ElementType::i8
+        ElementType::i8,
+        ElementType::i64
 };
 
 std::vector<std::tuple<bool, int>> flatOrAxis { {true, 0}, {false, 0}, {false, 1}, {false, -1} };
 
 std::vector<bool> sorted { true, false};
 
-std::vector<std::map<std::string, std::string>> additionalConfig
-    = {{{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO}},
+ov::AnyMap empty_config = {};
+ov::AnyMap config_i64 = {{InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64, InferenceEngine::PluginConfigParams::YES}};
+std::vector<ov::AnyMap> config_bf16 =
+      {{{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO}},
        {{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::YES}}};
 
 std::vector<CPUSpecificParams> getCPUInfo() {
@@ -177,9 +181,19 @@ INSTANTIATE_TEST_SUITE_P(smoke_static_1D, UniqueLayerTestCPU,
                      ::testing::ValuesIn(sorted),
                      ::testing::ValuesIn(dataPrecisionSmoke),
                      ::testing::ValuesIn(getCPUInfo()),
-                     ::testing::Values(additionalConfig[0])),
+                     ::testing::Values(empty_config)),
              UniqueLayerTestCPU::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_static_1D_I64, UniqueLayerTestCPU,
+            ::testing::Combine(
+                    ::testing::ValuesIn(statShapes1D),
+                    ::testing::ValuesIn(std::vector<std::tuple<bool, int>>{{true, 0}, {false, 0}}),
+                    ::testing::ValuesIn(sorted),
+                    ::testing::Values(ElementType::i64),
+                    ::testing::ValuesIn(getCPUInfo()),
+                    ::testing::Values(config_i64)),
+            UniqueLayerTestCPU::getTestCaseName);
+
 std::vector<std::vector<InputShape>> getStaticShapes() {
     std::vector<std::vector<InputShape>> result = {
         { { {}, { {1, 1, 1} } } },    // Static shapes
@@ -226,7 +240,17 @@ INSTANTIATE_TEST_SUITE_P(smoke_static, UniqueLayerTestCPU,
                         ::testing::ValuesIn(sorted),
                         ::testing::ValuesIn(dataPrecisionSmoke),
                         ::testing::ValuesIn(getCPUInfo()),
-                        ::testing::Values(additionalConfig[0])),
+                        ::testing::Values(empty_config)),
+                UniqueLayerTestCPU::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_static_I64, UniqueLayerTestCPU,
+                ::testing::Combine(
+                        ::testing::ValuesIn(getStaticShapes()),
+                        ::testing::ValuesIn(flatOrAxis),
+                        ::testing::ValuesIn(sorted),
+                        ::testing::Values(ElementType::i64),
+                        ::testing::ValuesIn(getCPUInfo()),
+                        ::testing::Values(config_i64)),
                 UniqueLayerTestCPU::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(nightly_static, UniqueLayerTestCPU,
@@ -236,7 +260,7 @@ INSTANTIATE_TEST_SUITE_P(nightly_static, UniqueLayerTestCPU,
                         ::testing::ValuesIn(sorted),
                         ::testing::ValuesIn(dataPrecisionNightly),
                         ::testing::ValuesIn(getCPUInfo()),
-                        ::testing::Values(additionalConfig[0])),
+                        ::testing::ValuesIn(config_bf16)),
                 UniqueLayerTestCPU::getTestCaseName);
 
 const std::vector<std::vector<InputShape>> dynamicInSapes = {
@@ -265,7 +289,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_dynamic, UniqueLayerTestCPU,
                              ::testing::ValuesIn(sorted),
                              ::testing::ValuesIn(dataPrecisionSmoke),
                              ::testing::ValuesIn(getCPUInfo()),
-                             ::testing::Values(additionalConfig[0])),
+                             ::testing::Values(empty_config)),
                      UniqueLayerTestCPU::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(nightly_dynamic, UniqueLayerTestCPU,
@@ -275,7 +299,7 @@ INSTANTIATE_TEST_SUITE_P(nightly_dynamic, UniqueLayerTestCPU,
                                  ::testing::ValuesIn(sorted),
                                  ::testing::ValuesIn(dataPrecisionNightly),
                                  ::testing::ValuesIn(getCPUInfo()),
-                                 ::testing::Values(additionalConfig[0])),
+                                 ::testing::ValuesIn(config_bf16)),
                          UniqueLayerTestCPU::getTestCaseName);
 } // namespace
 } // namespace CPULayerTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp
index 5593385d70bd71..d611f8f7123753 100644
--- a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp
+++ b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp
@@ -144,14 +144,14 @@ void CPUTestsBase::CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov:
             IE_ASSERT(rtInfo.end() != it);
             return it->second.as<std::string>();
         };
-        auto getExecValueOutputsLayout = [] (const std::shared_ptr<ngraph::Node>& node) -> std::string {
+        auto getExecValueOutputsLayout = [] (const std::shared_ptr<ov::Node>& node) -> std::string {
             auto rtInfo = node->get_rt_info();
             auto it = rtInfo.find(ExecGraphInfoSerialization::OUTPUT_LAYOUTS);
             IE_ASSERT(rtInfo.end() != it);
             return it->second.as<std::string>();
         };
         // skip policy
-        auto should_be_skipped = [] (const ngraph::PartialShape &partialShape, cpu_memory_format_t fmt) {
+        auto should_be_skipped = [] (const ov::PartialShape &partialShape, cpu_memory_format_t fmt) {
             if (partialShape.is_dynamic()) {
                 return false;
             }
@@ -328,45 +328,75 @@ CPUTestsBase::makeCPUInfo(const std::vector<cpu_memory_format_t>& inFmts,
     return cpuInfo;
 }
 
-std::shared_ptr<ngraph::Function>
-CPUTestsBase::makeNgraphFunction(const ngraph::element::Type &ngPrc, ngraph::ParameterVector &params,
-                                 const std::shared_ptr<ngraph::Node> &lastNode, std::string name) {
+std::shared_ptr<ov::Model>
+CPUTestsBase::makeNgraphFunction(const ov::element::Type &ngPrc, ov::ParameterVector &params,
+                                 const std::shared_ptr<ov::Node> &lastNode, std::string name) {
    auto newLastNode = modifyGraph(ngPrc, params, lastNode);
-   ngraph::ResultVector results;
+   ov::ResultVector results;
 
    for (size_t i = 0; i < newLastNode->get_output_size(); i++)
-        results.push_back(std::make_shared<ngraph::opset1::Result>(newLastNode->output(i)));
+        results.push_back(std::make_shared<ov::op::v0::Result>(newLastNode->output(i)));
 
-   return std::make_shared<ngraph::Function>(results, params, name);
+   return std::make_shared<ov::Model>(results, params, name);
 }
 
-std::shared_ptr<ngraph::Node>
-CPUTestsBase::modifyGraph(const ngraph::element::Type &ngPrc, ngraph::ParameterVector &params, const std::shared_ptr<ngraph::Node> &lastNode) {
+std::shared_ptr<ov::Node>
+CPUTestsBase::modifyGraph(const ov::element::Type &ngPrc, ov::ParameterVector &params, const std::shared_ptr<ov::Node> &lastNode) {
     lastNode->get_rt_info() = getCPUInfo();
     return lastNode;
 }
 
-std::string CPUTestsBase::makeSelectedTypeStr(std::string implString, ngraph::element::Type_t elType) {
+std::string CPUTestsBase::makeSelectedTypeStr(std::string implString, ov::element::Type_t elType) {
     implString.push_back('_');
     implString += InferenceEngine::details::convertPrecision(elType).name();
     return implString;
 }
 
-std::vector<CPUSpecificParams> filterCPUSpecificParams(const std::vector<CPUSpecificParams> &paramsVector) {
-    auto adjustBlockedFormatByIsa = [](std::vector<cpu_memory_format_t>& formats) {
-        for (auto& format : formats) {
-            if (format == nCw16c)
-                format = nCw8c;
-            if (format == nChw16c)
-                format = nChw8c;
-            if (format == nCdhw16c)
-                format = nCdhw8c;
+std::vector<CPUSpecificParams> filterCPUSpecificParams(const std::vector<CPUSpecificParams> &paramsVector, const ov::element::Type &prc) {
+    auto adjustBlockedFormatAvx512 = [&](std::vector<cpu_memory_format_t>& formats) {
+        if (prc.size() == 8) {
+            for (size_t i = 0; i < formats.size(); i++) {
+                if (formats[i] == nCw16c) {
+                    formats[i] = nCw8c;
+                } else if (formats[i] == nChw16c) {
+                    formats[i] = nChw8c;
+                } else if (formats[i] == nCdhw16c) {
+                    formats[i] = nCdhw8c;
+                }
+            }
+        }
+    };
+    auto adjustBlockedFormatByIsa = [&](std::vector<cpu_memory_format_t>& formats) {
+        if (prc.size() == 8) {
+            for (size_t i = 0; i < formats.size(); i++) {
+                if (formats[i] == nCw16c || formats[i] == nCw8c) {
+                    formats[i] = ncw;
+                } else if (formats[i] == nChw16c || formats[i] == nChw8c) {
+                    formats[i] = nchw;
+                } else if (formats[i] == nCdhw16c || formats[i] == nCdhw8c) {
+                    formats[i] = ncdhw;
+                }
+            }
+        } else {
+            for (auto& format : formats) {
+                if (format == nCw16c)
+                    format = nCw8c;
+                if (format == nChw16c)
+                    format = nChw8c;
+                if (format == nCdhw16c)
+                    format = nCdhw8c;
+            }
         }
     };
 
     std::vector<CPUSpecificParams> filteredParamsVector = paramsVector;
 
-    if (!InferenceEngine::with_cpu_x86_avx512f()) {
+    if (InferenceEngine::with_cpu_x86_avx512f()) {
+        for (auto& param : filteredParamsVector) {
+            adjustBlockedFormatAvx512(std::get<0>(param));
+            adjustBlockedFormatAvx512(std::get<1>(param));
+        }
+    } else {
         for (auto& param : filteredParamsVector) {
             adjustBlockedFormatByIsa(std::get<0>(param));
             adjustBlockedFormatByIsa(std::get<1>(param));
@@ -376,9 +406,9 @@ std::vector<CPUSpecificParams> filterCPUSpecificParams(const std::vector<CPUSpec
     return filteredParamsVector;
 }
 
-inline void CheckNumberOfNodesWithTypeImpl(std::shared_ptr<const ov::Model> function,
+inline void CheckNumberOfNodesWithTypeImpl(const std::shared_ptr<const ov::Model>& function,
                                            const std::unordered_set<std::string>& nodeTypes,
-                                           size_t expectedCount) {
+                                           const size_t expectedCount) {
     ASSERT_NE(nullptr, function);
     size_t actualNodeCount = 0;
     for (const auto &node : function->get_ops()) {
diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp
index 7d161658660826..5e633e9cb258f8 100644
--- a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp
+++ b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp
@@ -127,12 +127,12 @@ class CPUTestsBase {
                                const std::vector<cpu_memory_format_t>& outFmts,
                                const std::vector<std::string>& priority);
    //TODO: change to setter method
-    static std::string makeSelectedTypeStr(std::string implString, ngraph::element::Type_t elType);
+    static std::string makeSelectedTypeStr(std::string implString, ov::element::Type_t elType);
 
     CPUInfo getCPUInfo() const;
-    std::shared_ptr<ngraph::Function> makeNgraphFunction(const ngraph::element::Type &ngPrc,
-                                                         ngraph::ParameterVector &params,
-                                                         const std::shared_ptr<ngraph::Node> &lastNode,
+    std::shared_ptr<ov::Model> makeNgraphFunction(const ov::element::Type &ngPrc,
+                                                         ov::ParameterVector &params,
+                                                         const std::shared_ptr<ov::Node> &lastNode,
                                                          std::string name);
 
     void CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, const std::set<std::string>& nodeType) const;
@@ -151,9 +151,9 @@ class CPUTestsBase {
      * @param lastNode The last node of the initial graph.
      * @return The last node of the modified graph.
      */
-    virtual std::shared_ptr<ngraph::Node> modifyGraph(const ngraph::element::Type &ngPrc,
-                                                      ngraph::ParameterVector &params,
-                                                      const std::shared_ptr<ngraph::Node> &lastNode);
+    virtual std::shared_ptr<ov::Node> modifyGraph(const ov::element::Type &ngPrc,
+                                                      ov::ParameterVector &params,
+                                                      const std::shared_ptr<ov::Node> &lastNode);
 
     virtual bool primTypeCheck(std::string primType) const;
 
@@ -175,7 +175,8 @@ const std::map<std::string, std::string> cpuBF16PluginConfig =
 
 
 // utility functions
-std::vector<CPUSpecificParams> filterCPUSpecificParams(const std::vector<CPUSpecificParams>& paramsVector);
+std::vector<CPUSpecificParams> filterCPUSpecificParams(const std::vector<CPUSpecificParams>& paramsVector,
+                                                       const ov::element::Type &prc = ov::element::Type_t::f32);
 std::vector<CPUSpecificParams> filterCPUInfoForDevice(const std::vector<CPUSpecificParams>& CPUParams);
 void CheckNumberOfNodesWithType(const ov::CompiledModel &compiledModel, const std::string& nodeType, size_t expectedCount);
 void CheckNumberOfNodesWithType(InferenceEngine::ExecutableNetwork &execNet, const std::string& nodeType, size_t expectedCount);
diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn
index 33bb2b261d3829..4839324791aeef 160000
--- a/src/plugins/intel_cpu/thirdparty/onednn
+++ b/src/plugins/intel_cpu/thirdparty/onednn
@@ -1 +1 @@
-Subproject commit 33bb2b261d3829162395aaa9bbe8c1c5b139e855
+Subproject commit 4839324791aeef1195b0337ecd1fcc322e6f3c26
diff --git a/src/plugins/template/backend/ops/reduce.cpp b/src/plugins/template/backend/ops/reduce.cpp
new file mode 100644
index 00000000000000..380c729278badc
--- /dev/null
+++ b/src/plugins/template/backend/ops/reduce.cpp
@@ -0,0 +1,74 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "evaluate_node.hpp"
+#include <ngraph/runtime/reference/reduce_l1.hpp>
+#include <ngraph/runtime/reference/reduce_l2.hpp>
+#include <openvino/core/type/element_type.hpp>
+#include <openvino/op/reduce_l1.hpp>
+#include <openvino/op/reduce_l2.hpp>
+#include <ngraph/util.hpp>
+#include <ngraph/validation_util.hpp>
+
+template <ov::element::Type_t ET>
+bool evaluate(const std::shared_ptr<ov::op::v4::ReduceL1>& op, const ov::HostTensorVector& outputs, const ov::HostTensorVector& inputs) {
+    using T = typename ov::element_type_traits<ET>::value_type;
+
+    const auto axes_vector = host_tensor_2_vector<int64_t>(inputs[1]);
+    const auto normalized_axes = ov::normalize_axes(op->get_friendly_name(), axes_vector, inputs[0]->get_partial_shape().rank());
+    const auto reduction_axes = ov::AxisSet{normalized_axes};
+
+    ngraph::runtime::reference::reduce_l1<T>(inputs[0]->get_data_ptr<T>(),
+                                             outputs[0]->get_data_ptr<T>(),
+                                             inputs[0]->get_shape(),
+                                             reduction_axes);
+    return true;
+}
+
+template <ov::element::Type_t ET>
+bool evaluate(const std::shared_ptr<ov::op::v4::ReduceL2>& op, const ov::HostTensorVector& outputs, const ov::HostTensorVector& inputs) {
+    using T = typename ov::element_type_traits<ET>::value_type;
+
+    const auto axes_vector = host_tensor_2_vector<int64_t>(inputs[1]);
+    const auto normalized_axes = ov::normalize_axes(op->get_friendly_name(), axes_vector, inputs[0]->get_partial_shape().rank());
+    const auto reduction_axes = ov::AxisSet{normalized_axes};
+
+    ngraph::runtime::reference::reduce_l2<T>(inputs[0]->get_data_ptr<T>(),
+                                             outputs[0]->get_data_ptr<T>(),
+                                             inputs[0]->get_shape(),
+                                             reduction_axes);
+    return true;
+}
+
+template <>
+bool evaluate_node<ov::op::v4::ReduceL1>(std::shared_ptr<ov::Node> node,
+                                         const ov::HostTensorVector& outputs,
+                                         const ov::HostTensorVector& inputs) {
+    const ov::element::Type_t element_type = node->get_output_element_type(0);
+    auto reduce_node = ov::as_type_ptr<ov::op::v4::ReduceL1>(node);
+
+    switch (element_type) {
+    case ov::element::Type_t::i64:
+        return evaluate<ov::element::Type_t::i64>(reduce_node, outputs, inputs);
+    default:
+        OPENVINO_THROW(std::string("Unhandled data type ") + node->get_element_type().get_type_name() +
+                       std::string("in evaluate_node()"));
+    }
+}
+
+template <>
+bool evaluate_node<ov::op::v4::ReduceL2>(std::shared_ptr<ov::Node> node,
+                                         const ov::HostTensorVector& outputs,
+                                         const ov::HostTensorVector& inputs) {
+    const ov::element::Type_t element_type = node->get_output_element_type(0);
+    auto reduce_node = ov::as_type_ptr<ov::op::v4::ReduceL2>(node);
+
+    switch (element_type) {
+        case ov::element::Type_t::i64:
+            return evaluate<ov::element::Type_t::i64>(reduce_node, outputs, inputs);
+        default:
+            OPENVINO_THROW(std::string("Unhandled data type ") + node->get_element_type().get_type_name() +
+                           std::string("in evaluate_node()"));
+    }
+}
diff --git a/src/plugins/template/backend/opset_int_tbl.hpp b/src/plugins/template/backend/opset_int_tbl.hpp
index f65bf093a800cb..cefd1602fc4947 100644
--- a/src/plugins/template/backend/opset_int_tbl.hpp
+++ b/src/plugins/template/backend/opset_int_tbl.hpp
@@ -82,6 +82,8 @@ _OPENVINO_OP_REG(CTCLoss, op::v4)
 _OPENVINO_OP_REG(LSTMCell, op::v4)
 _OPENVINO_OP_REG(NonMaxSuppression, op::v4)
 _OPENVINO_OP_REG(Proposal, op::v4)
+_OPENVINO_OP_REG(ReduceL1, op::v4)
+_OPENVINO_OP_REG(ReduceL2, op::v4)
 
 _OPENVINO_OP_REG(BatchNormInference, op::v5)
 _OPENVINO_OP_REG(GatherND, op::v5)
diff --git a/src/tests/ngraph_helpers/ngraph_functions/src/non_max_suppression.cpp b/src/tests/ngraph_helpers/ngraph_functions/src/non_max_suppression.cpp
index 94d29d88869bfa..b8416fd68a8683 100644
--- a/src/tests/ngraph_helpers/ngraph_functions/src/non_max_suppression.cpp
+++ b/src/tests/ngraph_helpers/ngraph_functions/src/non_max_suppression.cpp
@@ -19,8 +19,12 @@ std::shared_ptr<ngraph::Node> makeNms(const ngraph::Output<Node>& boxes,
                                       const bool& isCenter,
                                       const bool& sortResDescend,
                                       const ngraph::element::Type& outType) {
-    auto maxOutBoxesPerClassNode =
-        makeConstant(maxBoxesPrec, ngraph::Shape{}, std::vector<int32_t>{maxOutBoxesPerClass})->output(0);
+    std::shared_ptr<ov::Node> maxOutBoxesPerClassNode;
+    if (maxBoxesPrec == element::i64) {
+        maxOutBoxesPerClassNode = makeConstant(maxBoxesPrec, ngraph::Shape{}, std::vector<int64_t>{maxOutBoxesPerClass});
+    } else {
+        maxOutBoxesPerClassNode = makeConstant(maxBoxesPrec, ngraph::Shape{}, std::vector<int32_t>{maxOutBoxesPerClass});
+    }
     auto iouThrNode = makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{iouThr})->output(0);
     auto scoreThrNode = makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{scoreThr})->output(0);
     auto softNmsSigmaNode = makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{softNmsSigma})->output(0);
@@ -30,7 +34,7 @@ std::shared_ptr<ngraph::Node> makeNms(const ngraph::Output<Node>& boxes,
 
     return std::make_shared<NmsOperation>(boxes,
                                           scores,
-                                          maxOutBoxesPerClassNode,
+                                          maxOutBoxesPerClassNode->output(0),
                                           iouThrNode,
                                           scoreThrNode,
                                           softNmsSigmaNode,