Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 12 additions & 29 deletions src/plugins/intel_cpu/src/nodes/rnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -616,9 +616,11 @@ void RNN::fillCellDesc() {
inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(shapeS, inDataTypes[cIdx], memory::format_tag::nc));
outCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(shapeS, outDataTypes[coIdx], memory::format_tag::nc));
}

// The weight and weights_iter would expose nc layout to avoid unnecessary reorder.
// The onednn would determine the final layout when prepareParams.
inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(WShape, inDataTypes[wIdx], memory::format_tag::nc));
inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(RShape, inDataTypes[rIdx], memory::format_tag::nc));

inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(BShape, inDataTypes[bIdx], memory::format_tag::x));

if (haveAttention(cell_type)) {
Expand Down Expand Up @@ -721,8 +723,11 @@ void RNN::fillSequenceDesc() {
}

inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(TShape, inDataTypes[sIdx], memory::format_tag::x)); // sequence lengths
inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(WShape, inDataTypes[wIdx], memory::format_tag::ntc)); // W
inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(RShape, inDataTypes[rIdx], memory::format_tag::ntc)); // R
// The weight and weights_iter would expose tnc layout to avoid unnecessary reorder.
// The onednn would determine the final layout when prepareParams.
inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(WShape, inDataTypes[wIdx], memory::format_tag::tnc)); // W
inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(RShape, inDataTypes[rIdx], memory::format_tag::tnc)); // R

inCandidate.emplace_back(std::make_shared<DnnlBlockedMemoryDesc>(BShape, inDataTypes[bIdx], memory::format_tag::nc)); // B

if (haveAttention(cell_type)) {
Expand Down Expand Up @@ -891,9 +896,6 @@ void RNN::copyWeightsData() {
if (one_of(dataType, memory::data_type::bf16, memory::data_type::f16)) {
fillWeights<uint16_t>(gate_map, wIdx, rIdx);
} else if (dataType == memory::data_type::f32) {
// WA To avoid different weights layer and iter formats in FP32 case
if (T.minVal > 1 || N.maxVal < optimalBatchSize)
wFormat = dnnl::memory::format_tag::ldigo;
fillWeights<float>(gate_map, wIdx, rIdx);
} else if (dataType == memory::data_type::u8 || dataType == memory::data_type::s8) {
fillWeights<int8_t>(gate_map, wIdx, rIdx);
Expand Down Expand Up @@ -1032,9 +1034,11 @@ void RNN::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
since internalBlobs are used for the execution, not the initial weights */
const auto& targetWeightDataType = weightsByinputDataType.at(inDataTypes[xIdx]);
auto weightsDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, DC, G, SC });
wDescs[0] = dnnl::memory::desc(weightsDims, targetWeightDataType, wFormat);
//onednn determines the preferred weight layout.
wDescs[0] = dnnl::memory::desc(weightsDims, targetWeightDataType, memory::format_tag::any);
auto statesDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, SC, G, SC });
wDescs[1] = dnnl::memory::desc(statesDims, targetWeightDataType, wFormat);
//onednn determines the preferred weights_iter layout.
wDescs[1] = dnnl::memory::desc(statesDims, targetWeightDataType, memory::format_tag::any);
auto biasDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, Gb, SC });
wDescs[2] = dnnl::memory::desc(biasDims, inDataTypes[bIdx], memory::format_tag::ldgo);

Expand Down Expand Up @@ -1109,27 +1113,6 @@ void RNN::prepareParams() {
inDataDescs[2] = std::make_shared<DnnlBlockedMemoryDesc>(Shape{SL, B, 1}, inDataTypes[aIdx], memory::format_tag::tnc);
}

bool wFormatWasChanged = false;
// WA To avoid different weights layer and iter formats in FP32 case.
if (one_of(inDataTypes[xIdx], memory::data_type::f32) &&
(SL != 1 || B < optimalBatchSize)) {
if (wFormat != dnnl::memory::format_tag::ldigo) {
wFormat = dnnl::memory::format_tag::ldigo;
wFormatWasChanged = true;
}
} else if (wFormat != dnnl::memory::format_tag::any) {
wFormat = dnnl::memory::format_tag::any;
wFormatWasChanged = true;
}

if (wFormatWasChanged) {
auto weightsDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, DC, G, SC });
const auto& targetWeightDataType = weightsByinputDataType.at(inDataTypes[xIdx]);
wDescs[0] = dnnl::memory::desc(weightsDims, targetWeightDataType, wFormat);
auto statesDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, SC, G, SC });
wDescs[1] = dnnl::memory::desc(statesDims, targetWeightDataType, wFormat);
}

const auto attr = initPrimitiveAttr();
RNNKey key = { inDataDescs, outDataDescs, wDescs, cell_type, cell_act, direction, *attr };

Expand Down
3 changes: 0 additions & 3 deletions src/plugins/intel_cpu/src/nodes/rnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,6 @@ class RNN : public Node {
/** activation type for vanilla RNN cell */
dnnl::algorithm cell_act = dnnl::algorithm::undef;

/** Weights data and state memory format: ldigo or any */
dnnl::memory::format_tag wFormat = dnnl::memory::format_tag::any;

struct Interval {
Interval() = default;

Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "openvino/pass/constant_folding.hpp"
#include "openvino/op/fake_quantize.hpp"
#include "openvino/pass/manager.hpp"
#include "common/pass/reshape_fc_fusion.hpp"
#include "common/pass/align_matmul_input_ranks.hpp"
#include "transformations/common_optimizations/reshape_prelu.hpp"
#include "common/pass/convert_broadcast_to_tiles.hpp"
Expand Down Expand Up @@ -42,9 +41,6 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr<ov::Model> &nGraphFunc) {
CPU_REGISTER_PASS_COMMON(manager, ConvertToLeakyRelu);
CPU_REGISTER_PASS_COMMON(manager, ConvertToSwishCPU);
CPU_REGISTER_PASS_COMMON(manager, OptimizeSequenceTransposes);
if (!ov::op::util::has_op_with_type<ov::op::v0::FakeQuantize>(nGraphFunc)) {
CPU_REGISTER_PASS_COMMON(manager, ReshapeFullyConnectedFusion);
}
// after transformation "MoveEltwiseUpThroughDataMov" there can be reshaped sequences that should be eliminated or fused
CPU_REGISTER_PASS_COMMON(manager, ov::pass::ReshapeSequenceFusion);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConstantFolding);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,16 @@ std::vector<std::string> disabledTestPatterns() {
retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16.*)");
retVector.emplace_back(R"(.*smoke_Snippets_MHAEnforceBF16.*)");
}

// RNN/LSTM/GRU/AUGRU BF16 tests on avx512 core ISA would fail when gemm_avx512 fall back to gemm_avx2
if (InferenceEngine::with_cpu_x86_avx512_core()&& !InferenceEngine::with_cpu_x86_avx512_core_amx_bf16()) {
retVector.emplace_back(R"(smoke.*(AUGRUCellCPUTest|GRUCellCPUTest|LSTMCellLayerCPUTest).CompareWithRefs.*ENFORCE_BF16=YES.*)");
retVector.emplace_back(R"(nightly.*bf16.*/(AUGRUSequenceCPUTest|GRUSequenceCPUTest|LSTMSequenceCPUTest|RNNSequenceCPUTest).CompareWithRefs.*ENFORCE_BF16=YES.*)");
}
if (InferenceEngine::with_cpu_x86_avx512_core_amx_bf16()) {
// GRUCell and GRUSequence BF16 tests on SPR would fail when gemm_avx512 fall back to gemm_avx2
retVector.emplace_back(R"(.*/GRU.*ENFORCE_BF16=YES.*)");
// GroupDeconv 3D would fail on BF16 when gemm_avx512 fall back to gemm_avx2
retVector.emplace_back(R"(nightly_GroupDeconv_3D_Planar_BF16/GroupDeconvolutionLayerCPUTest\.CompareWithRefs/IS=\[\?.12.\?.\?.\?\]_TS=\(\(2.12.7.7.7\)\)_\(\(2.12.5.7.7\)\)_\(\(1.12.9.4.9\)\)_\(\(2.12.5.7.7\)\)_PRC=f32.*S=\(2.2.2\)_PB=\(0.0.0\)_PE=\(0.0.0\)_D=\(1.1.1\)_OP=\(\)_O=6_G.*primitive=jit_gemm.*_ENFORCE_BF16=YES)");
}
return retVector;
}
3 changes: 3 additions & 0 deletions src/plugins/intel_cpu/thirdparty/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ function(ov_add_onednn)
set(DNNL_CPU_RUNTIME "${THREADING}" CACHE STRING "" FORCE)
set(DNNL_GPU_RUNTIME "NONE" CACHE STRING "" FORCE)
set(DNNL_BLAS_VENDOR "NONE" CACHE STRING "" FORCE)
#Only build gemm driver for AVX2 and below ISAs
set(ONEDNN_ENABLE_GEMM_KERNELS_ISA "AVX2" CACHE STRING "" FORCE)

# plugin does not use onednn graph
set(ONEDNN_BUILD_GRAPH OFF CACHE BOOL "" FORCE)
# select needed primitives
Expand Down