From 68e056d6b219e4c1a98ae1518e575744c5935a1b Mon Sep 17 00:00:00 2001 From: alexey-varyzgin Date: Fri, 1 Apr 2022 12:58:42 +0300 Subject: [PATCH] [CPU][BF16] Functional filures fixes for 2022 R2 --- src/plugins/intel_cpu/src/graph.cpp | 2 +- src/plugins/intel_cpu/src/graph_optimizer.cpp | 35 +++++++++++-------- src/plugins/intel_cpu/src/nodes/deconv.cpp | 3 ++ src/plugins/intel_cpu/thirdparty/mkl-dnn | 2 +- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 4ad70ceaae30f8..5d463f478e7e07 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -1361,7 +1361,7 @@ void Graph::EnforceBF16() { if (nodesToSkip.count(node) && !node->enforceBF16evenForGraphTail) continue; - if (node->getType() != Type::Input && node->getType() != Type::Output) { + if (!ov::intel_cpu::one_of(node->getType(), Type::Input, Type::Output, Type::MemoryInput, Type::MemoryOutput)) { for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) { const auto &parent = node->getParentEdgesAtPort(i)[0]->getParent(); /* Skip BF16 enforcement for nodes after Constant Inputs for maintaining precision for fusing. diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index ab32a7272e74db..3c2ad3fd50cebf 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -289,6 +289,20 @@ void GraphOptimizer::FuseConvolutionMatMulAndBias(Graph &graph) { } } +/** + * @todo FQ fusing was disabled for BF16 output since oneDNN primitives lack support + * for bf16 depthwise postops. + * This is not the case anymore, because after migration to oneDNN 2.3 FQ will be fused as + * multiple binary post ops. + * This check can already be removed for FC fusing, but should be kept for Convolution, + * which still uses legacy depthwise postops for performance reasons. + */ +static bool BF16QuantizeNodeFusing(const NodePtr& parentNode, const NodePtr& childNode) { + return childNode->getType() == Type::FakeQuantize && + one_of(Precision::BF16, + parentNode->getOriginalOutputPrecisionAtPort(0), + childNode->getOriginalOutputPrecisionAtPort(0)); +} void GraphOptimizer::FuseDeconvolutionAndSimpleOperation(Graph &graph) { auto& graphNodes = graph.GetNodes(); @@ -328,6 +342,12 @@ void GraphOptimizer::FuseDeconvolutionAndSimpleOperation(Graph &graph) { continue; } + // BF16 Quantize Layer Fusing Disabling + if (BF16QuantizeNodeFusing(parentNode, childNode)) { + parent++; + continue; + } + childNode->fuseInto(parentNode); auto parentEdges = childNode->parentEdges; @@ -715,21 +735,6 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) { } } -/** - * @todo FQ fusing was disabled for BF16 output since oneDNN primitives lack support - * for bf16 depthwise postops. - * This is not the case anymore, because after migration to oneDNN 2.3 FQ will be fused as - * multiple binary post ops. - * This check can already be removed for FC fusing, but should be kept for Convolution, - * which still uses legacy depthwise postops for performance reasons. - */ -static bool BF16QuantizeNodeFusing(const NodePtr& parentNode, const NodePtr& childNode) { - return childNode->getType() == Type::FakeQuantize && - one_of(Precision::BF16, - parentNode->getOriginalOutputPrecisionAtPort(0), - childNode->getOriginalOutputPrecisionAtPort(0)); -} - void GraphOptimizer::FuseFullyConnectedAndSimpleOperation(Graph &graph) { auto& graphNodes = graph.GetNodes(); diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index a6fc40bc33ad28..717137a9506a36 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -295,6 +295,9 @@ void Deconvolution::getSupportedDescriptors() { inputDataType = outputDataType = memory::data_type::bf16; if (!fusedWith.empty()) { outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0)); + // TODO: We have to extend jit_avx512_core_x8s8s32x_deconv_fwd_kernel from oneDNN to support BF16 output data type + if (isInt8 && outputDataType == memory::data_type::bf16) + outputDataType = memory::data_type::f32; } if (getParentEdges().size() != 2 && getParentEdges().size() != 3) diff --git a/src/plugins/intel_cpu/thirdparty/mkl-dnn b/src/plugins/intel_cpu/thirdparty/mkl-dnn index 82ca2f931c1d58..ede2dfc7f8df8c 160000 --- a/src/plugins/intel_cpu/thirdparty/mkl-dnn +++ b/src/plugins/intel_cpu/thirdparty/mkl-dnn @@ -1 +1 @@ -Subproject commit 82ca2f931c1d588b67d154d873136d4af1ffb3a8 +Subproject commit ede2dfc7f8df8c0eb0e0945851dbd8ec4666ac5c