From 89db8484efd5b07c28994e1fe4a8f6bfd07e84b7 Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Mon, 23 Mar 2026 03:59:45 -0400
Subject: [PATCH 1/5] Enable BF16 dynamic quantization path for compressed FC

---
 .../dnnl/dnnl_fullyconnected_primitive.cpp    | 21 ++--
 .../intel_cpu/src/nodes/fullyconnected.cpp    |  4 +-
 .../classes/matmul_weights_decompression.cpp  | 98 ++++++++++++-------
 .../classes/matmul_weights_decompression.hpp  |  7 ++
 .../src/x64/matmul_weights_decompression.cpp  | 28 ++++++
 src/plugins/intel_cpu/thirdparty/onednn       |  2 +-
 6 files changed, 117 insertions(+), 43 deletions(-)

diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp
index e358d24e5e0626..eb34c835951842 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp
@@ -41,6 +41,7 @@
 #include "utils/cpu_utils.hpp"
 #include "utils/debug_capabilities.h"
 #include "utils/general_utils.h"
+#include "utils/precision_support.h"
 
 namespace ov::intel_cpu {
 
@@ -166,12 +167,20 @@ static bool useDynamicQuantizationImpl(size_t dqGroupSize,
         return false;
     }
 
-    if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni) &&
-        !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_vnni)) {
-        return false;
-    }
-
-    if (srcDesc->getPrecision() != ov::element::f32) {
+    // BF16 dynamic-quant path requires native BF16 HW support AND an AVX512-VNNI
+    // impl in oneDNN (only avx512_core_vnni instance is registered for bf16 src).
+    const auto srcPrecision = srcDesc->getPrecision();
+    if (srcPrecision == ov::element::bf16) {
+        if (!hasHardwareSupport(ov::element::bf16) ||
+            !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_vnni)) {
+            return false;
+        }
+    } else if (srcPrecision == ov::element::f32) {
+        if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni) &&
+            !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_vnni)) {
+            return false;
+        }
+    } else {
         return false;
     }
 
diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
index aa317305d77b2c..e40ad95c68f5a6 100644
--- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
+++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
@@ -94,9 +94,7 @@ ov::element::TypeVector FullyConnected::getSupportedCompressedActivationsTypes()
         return {Type_t::f32, Type_t::f16};
     }
 #if defined(OPENVINO_ARCH_X86_64)
-    // @todo enable for bf16 as well
-    // after EnforceInferencePrecision is replaced with ConvertPrecision
-    return {Type_t::f32};
+    return {Type_t::f32, Type_t::bf16};
 #elif defined(OV_CPU_WITH_KLEIDIAI)
     return {Type_t::f32};
 #else
diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.cpp
index aa3dbe620052b2..34a79a7303c078 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.cpp
@@ -3,6 +3,7 @@
 //
 
 #include "matmul_weights_decompression.hpp"
+
 #include "common_test_utils/subgraph_builders/weights_decompression_builders.hpp"
 #include "openvino/runtime/intel_cpu/properties.hpp"
 
@@ -11,10 +12,19 @@ using namespace CPUTestUtils;
 namespace ov {
 namespace test {
 
-std::string MatmulWeightsDecompression::getTestCaseName(const testing::TestParamInfo<MatmulWeightsDecompressionParams>& obj) {
-    const auto& [shape_params, weights_precision, decompression_precision, scale_precision, transpose,
-                 decompression_multiply_type, decompression_subtract_type, reshape_on_decompression, additional_config,
-                 fusing_params, should_fuse] = obj.param;
+std::string MatmulWeightsDecompression::getTestCaseName(
+    const testing::TestParamInfo<MatmulWeightsDecompressionParams>& obj) {
+    const auto& [shape_params,
+                 weights_precision,
+                 decompression_precision,
+                 scale_precision,
+                 transpose,
+                 decompression_multiply_type,
+                 decompression_subtract_type,
+                 reshape_on_decompression,
+                 additional_config,
+                 fusing_params,
+                 should_fuse] = obj.param;
     std::ostringstream result;
     result << shape_params << "_";
     result << "weights_precision=" << weights_precision << "_";
@@ -36,37 +46,46 @@ std::string MatmulWeightsDecompression::getTestCaseName(const testing::TestParam
     return result.str();
 }
 
-std::shared_ptr<ov::Model> MatmulWeightsDecompression::initSubgraph(const ov::PartialShape& data_shape,
-                                                                    const ov::Shape& weights_shape,
-                                                                    const int group_size,
-                                                                    const ov::element::Type data_precision,
-                                                                    const ov::element::Type weights_precision,
-                                                                    const ov::element::Type decompression_precision,
-                                                                    const ov::element::Type scale_precision,
-                                                                    const bool transpose_weights,
-                                                                    const ov::test::utils::DecompressionType decompression_multiply_type,
-                                                                    const ov::test::utils::DecompressionType decompression_subtract_type,
-                                                                    const bool reshape_on_decompression) {
+std::shared_ptr<ov::Model> MatmulWeightsDecompression::initSubgraph(
+    const ov::PartialShape& data_shape,
+    const ov::Shape& weights_shape,
+    const int group_size,
+    const ov::element::Type data_precision,
+    const ov::element::Type weights_precision,
+    const ov::element::Type decompression_precision,
+    const ov::element::Type scale_precision,
+    const bool transpose_weights,
+    const ov::test::utils::DecompressionType decompression_multiply_type,
+    const ov::test::utils::DecompressionType decompression_subtract_type,
+    const bool reshape_on_decompression) {
     ov::ParameterVector params{std::make_shared<ov::op::v0::Parameter>(data_precision, data_shape)};
     const auto weights_subgraph = initMatMulDecompressionSubgraph(weights_shape,
-                                                                    group_size,
-                                                                    data_precision,
-                                                                    weights_precision,
-                                                                    decompression_precision,
-                                                                    scale_precision,
-                                                                    transpose_weights,
-                                                                    decompression_multiply_type,
-                                                                    decompression_subtract_type,
-                                                                    reshape_on_decompression);
+                                                                  group_size,
+                                                                  data_precision,
+                                                                  weights_precision,
+                                                                  decompression_precision,
+                                                                  scale_precision,
+                                                                  transpose_weights,
+                                                                  decompression_multiply_type,
+                                                                  decompression_subtract_type,
+                                                                  reshape_on_decompression);
     auto matMul = std::make_shared<ov::op::v0::MatMul>(params[0], weights_subgraph);
     return create_ov_model(data_precision, params, matMul, "MatmulWeightsDecompression");
 }
 
-void MatmulWeightsDecompression::SetUp() {
+void MatmulWeightsDecompression::setUpWithDataPrecision(const ov::element::Type data_precision) {
     targetDevice = ov::test::utils::DEVICE_CPU;
-    const auto& [shape_params, weights_precision, decompression_precision, scale_precision, transpose_weights,
-                 decompression_multiply_type, decompression_subtract_type, reshape_on_decompression, additional_config,
-                 fusing_params, should_fuse] = GetParam();
+    const auto& [shape_params,
+                 weights_precision,
+                 decompression_precision,
+                 scale_precision,
+                 transpose_weights,
+                 decompression_multiply_type,
+                 decompression_subtract_type,
+                 reshape_on_decompression,
+                 additional_config,
+                 fusing_params,
+                 should_fuse] = GetParam();
     configuration.insert(additional_config.begin(), additional_config.end());
     std::tie(postOpMgrPtr, fusedOps) = fusing_params;
     init_input_shapes({shape_params.data_shape});
@@ -78,7 +97,7 @@ void MatmulWeightsDecompression::SetUp() {
     // if dynamic quantization is enabled
     if (configuration.count(ov::hint::dynamic_quantization_group_size.name()) &&
         configuration.at(ov::hint::dynamic_quantization_group_size.name()) != 0) {
-        abs_threshold = 0.1;
+        abs_threshold = data_precision == ov::element::bf16 ? 0.2 : 0.1;
     }
 
     if (configuration.count(ov::hint::inference_precision.name()) &&
@@ -86,7 +105,7 @@ void MatmulWeightsDecompression::SetUp() {
         abs_threshold = 0.2;
     }
 
-    ElementType netType = ov::element::f32;
+    const ElementType netType = data_precision;
     inType = outType = netType;
 
     function = initSubgraph(inputDynamicShapes[0],
@@ -102,6 +121,14 @@ void MatmulWeightsDecompression::SetUp() {
                             reshape_on_decompression);
 }
 
+void MatmulWeightsDecompression::SetUp() {
+    setUpWithDataPrecision(ov::element::f32);
+}
+
+void MatmulWeightsDecompressionBF16::SetUp() {
+    setUpWithDataPrecision(ov::element::bf16);
+}
+
 void MatmulWeightsDecompression::check_results() {
     const auto& test_param = GetParam();
     const ov::element::Type compressed_weights_precision = std::get<1>(test_param);
@@ -118,9 +145,8 @@ void MatmulWeightsDecompression::check_results() {
     type = fc->get_rt_info().at(ov::exec_model_info::LAYER_TYPE).as<std::string>();
     EXPECT_EQ(type, "FullyConnected");
 
-    const auto& expected_weights_precision = use_matmul_decompression_impl
-                                                    ? compressed_weights_precision
-                                                    : fc->get_input_element_type(0);
+    const auto& expected_weights_precision =
+        use_matmul_decompression_impl ? compressed_weights_precision : fc->get_input_element_type(0);
     EXPECT_EQ(fc->get_input_element_type(1), expected_weights_precision);
 }
 
@@ -130,5 +156,11 @@ TEST_P(MatmulWeightsDecompression, CompareWithRefs) {
     check_results();
 }
 
+TEST_P(MatmulWeightsDecompressionBF16, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    run();
+    check_results();
+}
+
 }  // namespace test
 }  // namespace ov
diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.hpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.hpp
index 114c3ab8ebe7ff..cf55ba68e2f399 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.hpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.hpp
@@ -76,6 +76,13 @@ class MatmulWeightsDecompression : public testing::WithParamInterface<MatmulWeig
     void SetUp() override;
 
     void check_results();
+
+    void setUpWithDataPrecision(const ov::element::Type data_precision);
+};
+
+class MatmulWeightsDecompressionBF16 : public MatmulWeightsDecompression {
+protected:
+    void SetUp() override;
 };
 
 }  // namespace test
diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp
index 9903d149efba67..ebe12069e40a01 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp
@@ -27,6 +27,7 @@ std::vector<ov::AnyMap> filter_additional_config_amx() {
 }
 
 const std::vector<ov::test::ElementType> decompression_precisions = {ov::element::f32};
+const std::vector<ov::test::ElementType> decompression_precisions_bf16 = {ov::element::bf16};
 const std::vector<ov::test::ElementType> weights_precisions = {ov::element::u8,
                                                                ov::element::u4,
                                                                ov::element::i4,
@@ -302,6 +303,18 @@ std::vector<ov::AnyMap> filter_additional_config_dyn_quant() {
     return additional_config;
 }
 
+std::vector<ov::AnyMap> filter_additional_config_dyn_quant_bf16() {
+    std::vector<ov::AnyMap> additional_config = {};
+    if (ov::with_cpu_x86_bfloat16() && ov::with_cpu_x86_avx512_core_vnni() && !ov::with_cpu_x86_avx512_core_amx()) {
+        additional_config = {
+            {{ov::hint::dynamic_quantization_group_size(0)}},
+            {{ov::hint::dynamic_quantization_group_size(16)}},
+            {{ov::hint::dynamic_quantization_group_size(128)}},
+        };
+    }
+    return additional_config;
+}
+
 INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_non_default_dyn_quant_group_sizes,
                          MatmulWeightsDecompression,
                          ::testing::Combine(::testing::ValuesIn(input_shapes_basic_dyn_quant),
@@ -317,6 +330,21 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_non_default_dyn_quant_gro
                                             ::testing::Values(true)),
                          MatmulWeightsDecompression::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_non_default_dyn_quant_group_sizes_bf16,
+                         MatmulWeightsDecompressionBF16,
+                         ::testing::Combine(::testing::ValuesIn(input_shapes_basic_dyn_quant),
+                                            ::testing::ValuesIn(weights_precisions_dyn_quant),
+                                            ::testing::ValuesIn(decompression_precisions_bf16),
+                                            ::testing::Values(ov::element::dynamic),
+                                            ::testing::Values(true),
+                                            ::testing::Values(DecompressionType::full),
+                                            ::testing::ValuesIn(decompression_subtract_type),
+                                            ::testing::Values(false),
+                                            ::testing::ValuesIn(filter_additional_config_dyn_quant_bf16()),
+                                            ::testing::ValuesIn(fusing_params_dyn_quant),
+                                            ::testing::Values(true)),
+                         MatmulWeightsDecompression::getTestCaseName);
+
 INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_non_default_dyn_quant_group_sizes_u2,
                          MatmulWeightsDecompression,
                          ::testing::Combine(::testing::ValuesIn(input_shapes_basic_dyn_quant_u2),
diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn
index 587b6d7ae4d77b..fc147eb569285f 160000
--- a/src/plugins/intel_cpu/thirdparty/onednn
+++ b/src/plugins/intel_cpu/thirdparty/onednn
@@ -1 +1 @@
-Subproject commit 587b6d7ae4d77b0a2b438c23e757ccfd352e5bed
+Subproject commit fc147eb569285f7a1479aebcdc327df2bd39aa2e

From 0e3f79a04d1575ba6b79f7380d0619b0c5bb291e Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Mon, 11 May 2026 02:08:35 -0400
Subject: [PATCH 2/5] Apply suggestions from code review

---
 .../executors/dnnl/dnnl_fullyconnected_primitive.cpp      | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp
index eb34c835951842..c81acde8ba3d61 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp
@@ -41,7 +41,6 @@
 #include "utils/cpu_utils.hpp"
 #include "utils/debug_capabilities.h"
 #include "utils/general_utils.h"
-#include "utils/precision_support.h"
 
 namespace ov::intel_cpu {
 
@@ -167,12 +166,11 @@ static bool useDynamicQuantizationImpl(size_t dqGroupSize,
         return false;
     }
 
-    // BF16 dynamic-quant path requires native BF16 HW support AND an AVX512-VNNI
-    // impl in oneDNN (only avx512_core_vnni instance is registered for bf16 src).
+    // BF16 dynamic-quant path requires native x86 BF16 HW support (AVX512_BF16) AND an
+    // AVX512-VNNI impl in oneDNN (only avx512_core_vnni instance is registered for bf16 src).
     const auto srcPrecision = srcDesc->getPrecision();
     if (srcPrecision == ov::element::bf16) {
-        if (!hasHardwareSupport(ov::element::bf16) ||
-            !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_vnni)) {
+        if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16)) {
             return false;
         }
     } else if (srcPrecision == ov::element::f32) {

From ebdc7177b57c64fe7fb376a4b7d82b6709fcdbb5 Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Tue, 19 May 2026 03:59:03 -0400
Subject: [PATCH 3/5] [CPU] Apply suggestions from code review

* Gate BF16 dyn-quant entry on AMX-capable HW (two layers: node-level
  getSupportedCompressedActivationsTypes + primitive-level
  useDynamicQuantizationImpl), since AMX BF16 TMUL handles long
  prompts (prefill) more efficiently than VNNI int8 dyn-quant.

* Drive the BF16 dyn-quant test through the inference_precision hint
  on an f32 IR; remove the MatmulWeightsDecompressionBF16 subclass and
  decompression_precisions_bf16.
---
 .../dnnl/dnnl_fullyconnected_primitive.cpp    |  5 +++
 .../intel_cpu/src/nodes/fullyconnected.cpp    |  7 ++++
 .../classes/matmul_weights_decompression.cpp  | 33 +++++++------------
 .../classes/matmul_weights_decompression.hpp  | 25 +++++---------
 .../src/x64/matmul_weights_decompression.cpp  | 15 +++++----
 5 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp
index c81acde8ba3d61..65d4473cbebf0f 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp
@@ -173,6 +173,11 @@ static bool useDynamicQuantizationImpl(size_t dqGroupSize,
         if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16)) {
             return false;
         }
+        // On AMX-capable HW, AMX BF16 TMUL outperforms VNNI int8 dyn-quant for bf16 src.
+        // Disable the bf16 dyn-quant entry here so the AMX BF16 path stays in use.
+        if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
+            return false;
+        }
     } else if (srcPrecision == ov::element::f32) {
         if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni) &&
             !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_vnni)) {
diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
index e40ad95c68f5a6..6d9d2d75319e3f 100644
--- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
+++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
@@ -94,6 +94,13 @@ ov::element::TypeVector FullyConnected::getSupportedCompressedActivationsTypes()
         return {Type_t::f32, Type_t::f16};
     }
 #if defined(OPENVINO_ARCH_X86_64)
+    // BF16 compressed-activations path is intended for SIMD (avx512_vnni)
+    // dynamic-quant kernels. On AMX-capable HW, AMX BF16 TMUL outperforms
+    // VNNI int8 on prefill, so keep f32 here and let the existing AMX BF16
+    // path handle bf16 inference precision.
+    if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
+        return {Type_t::f32};
+    }
     return {Type_t::f32, Type_t::bf16};
 #elif defined(OV_CPU_WITH_KLEIDIAI)
     return {Type_t::f32};
diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.cpp
index 34a79a7303c078..596303c5ba444d 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.cpp
@@ -73,7 +73,7 @@ std::shared_ptr<ov::Model> MatmulWeightsDecompression::initSubgraph(
     return create_ov_model(data_precision, params, matMul, "MatmulWeightsDecompression");
 }
 
-void MatmulWeightsDecompression::setUpWithDataPrecision(const ov::element::Type data_precision) {
+void MatmulWeightsDecompression::SetUp() {
     targetDevice = ov::test::utils::DEVICE_CPU;
     const auto& [shape_params,
                  weights_precision,
@@ -90,22 +90,27 @@ void MatmulWeightsDecompression::setUpWithDataPrecision(const ov::element::Type
     std::tie(postOpMgrPtr, fusedOps) = fusing_params;
     init_input_shapes({shape_params.data_shape});
 
+    const bool dyn_quant_enabled = configuration.count(ov::hint::dynamic_quantization_group_size.name()) &&
+                                   configuration.at(ov::hint::dynamic_quantization_group_size.name()) != 0;
+    const auto inference_precision_hint =
+        configuration.count(ov::hint::inference_precision.name())
+            ? configuration.at(ov::hint::inference_precision.name()).as<ov::element::Type>()
+            : ov::element::dynamic;
+
     if (!configuration.count(ov::hint::dynamic_quantization_group_size.name())) {
         abs_threshold = 5e-3;
     }
 
     // if dynamic quantization is enabled
-    if (configuration.count(ov::hint::dynamic_quantization_group_size.name()) &&
-        configuration.at(ov::hint::dynamic_quantization_group_size.name()) != 0) {
-        abs_threshold = data_precision == ov::element::bf16 ? 0.2 : 0.1;
+    if (dyn_quant_enabled) {
+        abs_threshold = inference_precision_hint == ov::element::bf16 ? 0.2 : 0.1;
     }
 
-    if (configuration.count(ov::hint::inference_precision.name()) &&
-        configuration.at(ov::hint::inference_precision.name()) == ov::element::f16) {
+    if (inference_precision_hint == ov::element::f16) {
         abs_threshold = 0.2;
     }
 
-    const ElementType netType = data_precision;
+    ElementType netType = ov::element::f32;
     inType = outType = netType;
 
     function = initSubgraph(inputDynamicShapes[0],
@@ -121,14 +126,6 @@ void MatmulWeightsDecompression::setUpWithDataPrecision(const ov::element::Type
                             reshape_on_decompression);
 }
 
-void MatmulWeightsDecompression::SetUp() {
-    setUpWithDataPrecision(ov::element::f32);
-}
-
-void MatmulWeightsDecompressionBF16::SetUp() {
-    setUpWithDataPrecision(ov::element::bf16);
-}
-
 void MatmulWeightsDecompression::check_results() {
     const auto& test_param = GetParam();
     const ov::element::Type compressed_weights_precision = std::get<1>(test_param);
@@ -156,11 +153,5 @@ TEST_P(MatmulWeightsDecompression, CompareWithRefs) {
     check_results();
 }
 
-TEST_P(MatmulWeightsDecompressionBF16, CompareWithRefs) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-    run();
-    check_results();
-}
-
 }  // namespace test
 }  // namespace ov
diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.hpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.hpp
index cf55ba68e2f399..0fc457682c74c1 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.hpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/matmul_weights_decompression.hpp
@@ -3,9 +3,9 @@
 //
 
 #include "common_test_utils/ov_tensor_utils.hpp"
+#include "common_test_utils/subgraph_builders/weights_decompression_builders.hpp"
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include "shared_test_classes/subgraph/weights_decompression_params.hpp"
-#include "common_test_utils/subgraph_builders/weights_decompression_builders.hpp"
 #include "utils/cpu_test_utils.hpp"
 #include "utils/fusing_test_utils.hpp"
 
@@ -42,14 +42,14 @@ namespace test {
  *              Bias
  */
 typedef std::tuple<MatMulDecompressionShapeParams,
-                   ov::test::ElementType,      // weights precision
-                   ov::test::ElementType,      // decompression precision
-                   ov::test::ElementType,      // scale precision
-                   bool,                       // transpose on weights
-                   ov::test::utils::DecompressionType,          // decompression multiply type
-                   ov::test::utils::DecompressionType,          // decompression subtract type
-                   bool,                       // reshape on decompression constants
-                   ov::AnyMap,                 // additional config
+                   ov::test::ElementType,               // weights precision
+                   ov::test::ElementType,               // decompression precision
+                   ov::test::ElementType,               // scale precision
+                   bool,                                // transpose on weights
+                   ov::test::utils::DecompressionType,  // decompression multiply type
+                   ov::test::utils::DecompressionType,  // decompression subtract type
+                   bool,                                // reshape on decompression constants
+                   ov::AnyMap,                          // additional config
                    fusingSpecificParams,
                    bool>  // should use decompression implementation
     MatmulWeightsDecompressionParams;
@@ -76,13 +76,6 @@ class MatmulWeightsDecompression : public testing::WithParamInterface<MatmulWeig
     void SetUp() override;
 
     void check_results();
-
-    void setUpWithDataPrecision(const ov::element::Type data_precision);
-};
-
-class MatmulWeightsDecompressionBF16 : public MatmulWeightsDecompression {
-protected:
-    void SetUp() override;
 };
 
 }  // namespace test
diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp
index ebe12069e40a01..44f08d39f745d1 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp
@@ -27,7 +27,6 @@ std::vector<ov::AnyMap> filter_additional_config_amx() {
 }
 
 const std::vector<ov::test::ElementType> decompression_precisions = {ov::element::f32};
-const std::vector<ov::test::ElementType> decompression_precisions_bf16 = {ov::element::bf16};
 const std::vector<ov::test::ElementType> weights_precisions = {ov::element::u8,
                                                                ov::element::u4,
                                                                ov::element::i4,
@@ -304,12 +303,16 @@ std::vector<ov::AnyMap> filter_additional_config_dyn_quant() {
 }
 
 std::vector<ov::AnyMap> filter_additional_config_dyn_quant_bf16() {
+    // Drive the BF16 dynamic-quant compressed-FC path through the inference_precision
+    // hint on top of an f32 IR. The ConvertPrecision pipeline is responsible for
+    // adjusting the decompression chain to bf16; the test should not pre-bake bf16
+    // into the IR.
     std::vector<ov::AnyMap> additional_config = {};
     if (ov::with_cpu_x86_bfloat16() && ov::with_cpu_x86_avx512_core_vnni() && !ov::with_cpu_x86_avx512_core_amx()) {
         additional_config = {
-            {{ov::hint::dynamic_quantization_group_size(0)}},
-            {{ov::hint::dynamic_quantization_group_size(16)}},
-            {{ov::hint::dynamic_quantization_group_size(128)}},
+            {ov::hint::dynamic_quantization_group_size(0), ov::hint::inference_precision(ov::element::bf16)},
+            {ov::hint::dynamic_quantization_group_size(16), ov::hint::inference_precision(ov::element::bf16)},
+            {ov::hint::dynamic_quantization_group_size(128), ov::hint::inference_precision(ov::element::bf16)},
         };
     }
     return additional_config;
@@ -331,10 +334,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_non_default_dyn_quant_gro
                          MatmulWeightsDecompression::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_non_default_dyn_quant_group_sizes_bf16,
-                         MatmulWeightsDecompressionBF16,
+                         MatmulWeightsDecompression,
                          ::testing::Combine(::testing::ValuesIn(input_shapes_basic_dyn_quant),
                                             ::testing::ValuesIn(weights_precisions_dyn_quant),
-                                            ::testing::ValuesIn(decompression_precisions_bf16),
+                                            ::testing::ValuesIn(decompression_precisions),
                                             ::testing::Values(ov::element::dynamic),
                                             ::testing::Values(true),
                                             ::testing::Values(DecompressionType::full),

From b2ce2d11abdba012e3ab0c5224a59c01232210a1 Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Sat, 23 May 2026 10:24:25 -0400
Subject: [PATCH 4/5] Apply suggestions from code review

---
 .../subgraph_tests/src/x64/matmul_weights_decompression.cpp     | 2 +-
 src/plugins/intel_cpu/thirdparty/onednn                         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp
index 44f08d39f745d1..0f870a141eebb9 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp
@@ -308,7 +308,7 @@ std::vector<ov::AnyMap> filter_additional_config_dyn_quant_bf16() {
     // adjusting the decompression chain to bf16; the test should not pre-bake bf16
     // into the IR.
     std::vector<ov::AnyMap> additional_config = {};
-    if (ov::with_cpu_x86_bfloat16() && ov::with_cpu_x86_avx512_core_vnni() && !ov::with_cpu_x86_avx512_core_amx()) {
+    if (ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx512_core_amx()) {
         additional_config = {
             {ov::hint::dynamic_quantization_group_size(0), ov::hint::inference_precision(ov::element::bf16)},
             {ov::hint::dynamic_quantization_group_size(16), ov::hint::inference_precision(ov::element::bf16)},
diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn
index fc147eb569285f..6b31ed2b862ab3 160000
--- a/src/plugins/intel_cpu/thirdparty/onednn
+++ b/src/plugins/intel_cpu/thirdparty/onednn
@@ -1 +1 @@
-Subproject commit fc147eb569285f7a1479aebcdc327df2bd39aa2e
+Subproject commit 6b31ed2b862ab3e216d81433db0c7a8b3a0792d0

From 88e17364c1221df1589cc75ee6611381da865724 Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Wed, 27 May 2026 06:15:06 -0400
Subject: [PATCH 5/5] onednn commit message reword

---
 src/plugins/intel_cpu/thirdparty/onednn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn
index 6b31ed2b862ab3..f82d833de6f13f 160000
--- a/src/plugins/intel_cpu/thirdparty/onednn
+++ b/src/plugins/intel_cpu/thirdparty/onednn
@@ -1 +1 @@
-Subproject commit 6b31ed2b862ab3e216d81433db0c7a8b3a0792d0
+Subproject commit f82d833de6f13fac4bb1926d521ca8fec4f4ae01