diff --git a/cmake/templates/OpenVINOConfig.cmake.in b/cmake/templates/OpenVINOConfig.cmake.in index 9eed376f8da6ae..70ab53b1923615 100644 --- a/cmake/templates/OpenVINOConfig.cmake.in +++ b/cmake/templates/OpenVINOConfig.cmake.in @@ -369,7 +369,7 @@ macro(_ov_find_itt) endmacro() macro(_ov_find_intel_cpu_dependencies) - set(_OV_ENABLE_CPU_ACL "@DNNL_USE_ACL@") + set(_OV_ENABLE_CPU_ACL "@DNNL_AARCH64_USE_ACL@") if(_OV_ENABLE_CPU_ACL) set(_ov_in_install_tree "@PACKAGE_OPENVINO_LIB_DIR@") if(_ov_in_install_tree) diff --git a/src/cmake/openvino.cmake b/src/cmake/openvino.cmake index 8bea7347e8e0e9..c0023c526baf0f 100644 --- a/src/cmake/openvino.cmake +++ b/src/cmake/openvino.cmake @@ -227,7 +227,7 @@ endif() # build tree -if(DNNL_USE_ACL) +if(DNNL_AARCH64_USE_ACL) list(APPEND BUILD_PATH_VARS "FIND_ACL_PATH;CMAKE_ARCHIVE_OUTPUT_DIRECTORY") set(FIND_ACL_PATH "${intel_cpu_thirdparty_SOURCE_DIR}") endif() diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt index 48401a7df31bc1..f65b3f9eb6a45b 100644 --- a/src/plugins/intel_cpu/CMakeLists.txt +++ b/src/plugins/intel_cpu/CMakeLists.txt @@ -87,12 +87,7 @@ set(OV_CPU_ARM_TARGET_GENERIC_ARCHS armv8a armv8.6-a armv8.6-a-sve armv8.6-a-sve2 armv8.6-a-sve2-sme2 armv8r64 # the same as armv8.4-a ) -if(ARM) - set(OV_CPU_ARM_TARGET_ARCH_DEFAULT armv7a) - set(OV_CPU_ARM_TARGET_ARCHS armv7a armv7a-hf - # requires estate=32 - ${OV_CPU_ARM_TARGET_GENERIC_ARCHS}) -elseif(AARCH64) +if(AARCH64) if(APPLE) set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8.2-a) else() @@ -153,7 +148,7 @@ if(OV_CPU_WITH_DNNL) add_definitions(-DOV_CPU_WITH_DNNL) endif() -if(DNNL_USE_ACL) +if(DNNL_AARCH64_USE_ACL) add_definitions(-DOV_CPU_WITH_ACL) set(OV_CPU_WITH_ACL ON) endif() @@ -183,7 +178,7 @@ if(NOT X86_64) ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/tpp/x64/*) endif() -if(NOT (AARCH64 OR ARM)) +if(NOT AARCH64) list(APPEND EXCLUDE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/cpu_opset/arm/* ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/tpp/aarch64/* ${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/plugin/aarch64/* @@ -239,7 +234,7 @@ ov_add_plugin(NAME ${TARGET_NAME} ADD_CLANG_TIDY) # give a different file name depending on target platform architecture -if(ARM OR AARCH64) +if(AARCH64) set_target_properties(${TARGET_NAME} PROPERTIES OUTPUT_NAME "openvino_arm_cpu_plugin") elseif(RISCV64) set_target_properties(${TARGET_NAME} PROPERTIES OUTPUT_NAME "openvino_riscv_cpu_plugin") diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 0c63b0d35464a4..d52ffcc5b4187c 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -496,18 +496,16 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { if (!inferencePrecisionSetExplicitly) { if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { inferencePrecision = ov::element::f32; -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) if (hasHardwareSupport(ov::element::f16)) { inferencePrecision = ov::element::f16; } -# if defined(OPENVINO_ARCH_ARM64) // enforce fp32 inference precision for dynamic quantization // to preserve fp32 matmul output precision if (fcDynamicQuantizationGroupSizeSetExplicitly && fcDynamicQuantizationGroupSize == std::numeric_limits::max()) { inferencePrecision = ov::element::f32; } -# endif #endif if (mayiuse(avx512_core_bf16)) { inferencePrecision = ov::element::bf16; diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index 574ff0c02632c7..a7f91fb64f044b 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -1039,39 +1039,7 @@ static void configure_arm64_linux_threads(Config& config, } #endif -#if defined(OPENVINO_ARCH_ARM) && defined(__linux__) -void configure_arm_linux_threads(Config& config, - const std::vector>& proc_type_table, - const ov::MemBandwidthPressure& tolerance, - bool int8_intensive, - bool is_LLM) { - using namespace ThreadPreferenceConstants; - config.modelPreferThreadsThroughput = ARM_THREADS_DEFAULT; - - if (tolerance.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) { - if (tolerance.ratio_compute_convs == ov::MemBandwidthPressure::ALL) { - config.modelPreferThreadsThroughput = ARM_THREADS_HIGH; - } - } else if ((tolerance.max_mem_tolerance < ov::MemBandwidthPressure::LIMITED) && - ((tolerance.ratio_mem_limited_deconvs > ov::MemBandwidthPressure::LIMITED) || - (tolerance.ratio_mem_limited_gemms > ov::MemBandwidthPressure::LIMITED))) { - config.modelPreferThreadsThroughput = ARM_THREADS_HIGH; - } - - const int main_cores = proc_type_table[0][MAIN_CORE_PROC]; - const int efficient_cores = proc_type_table[0][EFFICIENT_CORE_PROC]; - - bool use_all_cores = should_use_all_cores_for_latency(main_cores, efficient_cores, int8_intensive); - - if (use_all_cores && (!is_LLM || should_use_ecores_for_llm(efficient_cores, main_cores))) { - config.modelPreferThreadsLatency = main_cores + efficient_cores; - } else { - config.modelPreferThreadsLatency = main_cores; - } -} -#endif - -#if (defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__) +#if (defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__) void configure_apple_threads(Config& config, const std::vector>& proc_type_table, const ov::MemBandwidthPressure& tolerance, @@ -1244,10 +1212,7 @@ int get_model_prefer_threads(const int num_streams, memThresholdAssumeLimitedForISA, config.inferencePrecision); -# if defined(OPENVINO_ARCH_ARM) && defined(__linux__) - configure_arm_linux_threads(config, proc_type_table, networkToleranceForLowCache, int8_intensive, is_LLM); - -# elif (defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__) +# if (defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__) configure_apple_threads(config, proc_type_table, networkToleranceForLowCache, diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp index 2f9e45c16698a0..8df4939847b50c 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp @@ -124,15 +124,7 @@ void get_num_streams(int streams, const std::shared_ptr& model, Confi void sort_table_by_numa_node_id(int current_numa_node, std::vector>& proc_type_table); // Internal configure_* helpers are declared below and are publicly callable. -#if defined(OPENVINO_ARCH_ARM) && defined(__linux__) -void configure_arm_linux_threads(Config& config, - const std::vector>& proc_type_table, - const ov::MemBandwidthPressure& tolerance, - bool int8_intensive, - bool is_LLM); -#endif - -#if (defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__) +#if (defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__) void configure_apple_threads(Config& config, const std::vector>& proc_type_table, const ov::MemBandwidthPressure& tolerance, diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp index ad884147605c4f..f4d4431f129554 100644 --- a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp +++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp @@ -487,7 +487,7 @@ void DnnlPostOpsComposer::appendBinary(const dnnl::algorithm alg, const std::vec DEBUG_LOG("Append binary post op with algorithm: ", convert_to_c(alg), " Shape: ", Shape(*pdims)); ov::element::Type binaryType = ov::element::f32; -#if defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM) +#if defined(OPENVINO_ARCH_ARM64) if (outDataType == dnnl::memory::data_type::f16) { // ACL executor is not able to handle different precisions between convolution output and post op input // in this case original post op tensor is f32 even the model runs in f16 precision diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 0853daf9b3afd3..eee49661f34248 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -92,7 +92,7 @@ # include "openvino/runtime/properties.hpp" #endif -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) # include # include "onednn/iml_type_mapper.h" @@ -606,7 +606,7 @@ static bool isReorderAvailable(const MemoryDescPtr& parentDesc, dstMemDesc.get(), eng.get(), attr.get()); -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) // temporary WA for slow FP32->FP16 conversion reorder in oneDNN on ARM // pretend the reorder is not available to use Convert node instead if (hasHardwareSupport(ov::element::f16) && (result != nullptr) && diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 79897cb6e774a5..f65157b2090d70 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -101,7 +101,7 @@ void GraphOptimizer::ApplyCommonGraphOptimizations(Graph& graph) { // The order of applying scales and shifts is different for ARM to get specific postops order: // postops order on ARM: bias, scale, fq // postops order on x86: scale, bias, fq -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndBias"); FuseConvolutionMatMulDeconvAndBias(graph); graph.RemoveDroppedNodes(); @@ -280,7 +280,7 @@ void GraphOptimizer::FuseConvMatmulFCDeconvAndDQScales(Graph& graph) { return false; } // The order of applying scales and shifts is different for ARM, so bias could be already fused here for ARM -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) return any_of(parentNode->getParentEdges().size(), 2U, 3U); #else return (parentNode->getParentEdges().size() == 2); @@ -932,7 +932,7 @@ void GraphOptimizer::FuseFCAndTransposeOnWeights(Graph& graph) { void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph& graph) { const auto& graphNodes = graph.GetNodes(); // zero points fusing is skipped on ARM platforms because oneDNN is not involved into int8 convolution inference -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) return; #endif @@ -3414,7 +3414,7 @@ void GraphOptimizer::TailNodesPrecisionOptimize(Graph& graph) { if (inferPrec != ov::element::f16) { return; } -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) return; // precision of configured by ov::pass::ConvertPrecision #endif const std::vector outputNodes = [&] { diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected_utils.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected_utils.cpp index 383e5c5482176a..6c1fc8bb5e430d 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected_utils.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected_utils.cpp @@ -5,19 +5,23 @@ #include #include +#include #include #include #include #include #include #include +#include #include #include #include +#include #include #include #include +#include #include #include #include @@ -29,7 +33,6 @@ #include "acl_utils.hpp" #include "common/primitive_desc_iface.hpp" -#include "cpu/acl/acl_utils.hpp" #include "cpu_memory.h" #include "cpu_shape.h" #include "cpu_types.h" @@ -53,6 +56,71 @@ #include "utils/cpu_utils.hpp" #include "utils/debug_capabilities.h" +namespace { + +dnnl::impl::dim_t roundUpToBlock(dnnl::impl::dim_t value, int block) { + OPENVINO_ASSERT(block > 0, "Unsupported ACL weight format block size: ", block); + return ((value + block - 1) / block) * block; +} + +void setAclStride(arm_compute::Strides& strides, size_t dim, size_t value) { + if (value > std::numeric_limits::max()) { + OPENVINO_THROW("ACL weight format stride is too large: ", value); + } + strides.set(dim, value); +} + +void reorderToAclFcWeightFormat(arm_compute::TensorInfo& info, + dnnl::impl::memory_desc_t& md, + arm_compute::WeightFormat weightFormat, + dnnl::impl::dim_t inputDim, + dnnl::impl::dim_t outputDim) { + md.format_kind = dnnl::impl::format_kind::blocked; + md.format_desc.blocking = dnnl::impl::blocking_desc_t{}; + + const int interleavedBy = arm_compute::interleave_by(weightFormat); + const int blockBy = arm_compute::block_by(weightFormat); + + md.format_desc.blocking.strides[inputDim] = interleavedBy * blockBy; + md.padded_dims[inputDim] = roundUpToBlock(md.dims[inputDim], blockBy); + + const dnnl::impl::dim_t ldb = interleavedBy * md.padded_dims[inputDim]; + md.format_desc.blocking.strides[outputDim] = ldb; + md.padded_dims[outputDim] = roundUpToBlock(md.dims[outputDim], interleavedBy); + + const dnnl::impl::dim_t innermostBatchStride = md.padded_dims[inputDim] * md.padded_dims[outputDim]; + + if (interleavedBy > 1) { + md.format_desc.blocking.inner_nblks = 1 + static_cast(blockBy > 1); + md.format_desc.blocking.inner_idxs[0] = outputDim; + md.format_desc.blocking.inner_blks[0] = interleavedBy; + if (blockBy > 1) { + md.format_desc.blocking.inner_idxs[1] = inputDim; + md.format_desc.blocking.inner_blks[1] = blockBy; + } + } + + if (arm_compute::is_fixed_format_fast_math(weightFormat)) { + md.data_type = dnnl_bf16; + info.set_data_type(arm_compute::DataType::BFLOAT16); + } + + info.set_data_layout(arm_compute::DataLayout::UNKNOWN); + + arm_compute::Strides newStridesInBytes = info.strides_in_bytes(); + setAclStride(newStridesInBytes, 1, static_cast(ldb) * info.element_size()); + setAclStride(newStridesInBytes, 2, static_cast(innermostBatchStride) * info.element_size()); + + info.init(info.tensor_shape(), + info.num_channels(), + info.data_type(), + newStridesInBytes, + info.offset_first_element_in_bytes(), + dnnl::impl::memory_desc_wrapper(md).size()); +} + +} // namespace + namespace ov::intel_cpu { VectorDims acl_fc_executor::makeDummyInputDims(const Shape& inShape, const Shape& wShape) { @@ -284,15 +352,8 @@ MemoryPtr acl_fc_executor::prepareWeightMemory(const MemoryArgs& memory, if (isNeededReorder) { dnnl::impl::dim_t o_dim = 0; dnnl::impl::dim_t inner_dim = 1; - std::vector remaining_dims = {}; auto* weights_md_ = dnnlDstDesc->getDnnlDesc().get(); - dnnl::impl::cpu::acl::acl_utils::reorder_to_weight_format(weiTensorInfo, - *weights_md_, - expectedWeightFormat, - inner_dim, - o_dim, - remaining_dims, - {}); + reorderToAclFcWeightFormat(weiTensorInfo, *weights_md_, expectedWeightFormat, inner_dim, o_dim); if (aclfcAttrs.weightsNonTransposed) { dnnlSrcDesc = makeTransposedWeightDescriptor(dnnlSrcDesc, dnnlDstDesc); } diff --git a/src/plugins/intel_cpu/src/nodes/executors/convolution_implementations.cpp b/src/plugins/intel_cpu/src/nodes/executors/convolution_implementations.cpp index 8c219f782114c8..b7b77196559819 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/convolution_implementations.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/convolution_implementations.cpp @@ -61,7 +61,7 @@ static const TypeMapping dnnlConvTypeMapping { {{_f16, _bf16, _any, _any}, {bypass(), bypass(), use<0>(), use<0>()}}, // quantization configuration is not applicable for ARM // because there is the dedicated low-precision implementation for ARM -#if !defined(OPENVINO_ARCH_ARM64) && !defined(OPENVINO_ARCH_ARM) +#if !defined(OPENVINO_ARCH_ARM64) // int8 conv does not support f16 output and bias {{_u8 | _i8, _i8, _quant |_bf16 | _f32 | _i32 | _dynamic, _quant | _bf16 | _f32 | _i32 | _dynamic}, {bypass(), bypass(), bypass(), bypass()}}, {{_u8 | _i8, _i8, _f16, _u8 | _i8 | _i32 | _bf16 | _f32}, {bypass(), bypass(), just(), bypass()}}, diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp index f45604af0971c8..de3ee329fff52d 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp @@ -245,7 +245,7 @@ static DnnlPrimitiveAttrs createPrimitiveAttrs(const MatMulAttrs& attrs, } // by default fp16 matmul ACL kernels accumulate into fp32 // the default behaviour is changed by using f16 accumulator to improve performance -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) if (srcDesc->getPrecision() == ov::element::f16 && weiDesc->getPrecision() == ov::element::f16 && dstDesc->getPrecision() == ov::element::f16) { primAttrs.attr.set_accumulation_mode(dnnl::accumulation_mode::f16); diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index de4d702fea7e11..1fa93b40c034d6 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -23,9 +23,6 @@ #include "cpu_memory.h" #include "cpu_types.h" #include "graph_context.h" -#if defined(OPENVINO_ARCH_ARM) -# include "memory_desc/cpu_blocked_memory_desc.h" -#endif #if defined(OV_CPU_WITH_ACL) # include "memory_desc/blocked_memory_desc.h" #endif @@ -35,10 +32,6 @@ #include "nodes/common/cpu_convert.h" #include "nodes/common/cpu_memcpy.h" #include "nodes/common/reorder_prim.h" -#if defined(OPENVINO_ARCH_ARM) -# include "nodes/executors/transpose.hpp" -# include "nodes/executors/transpose_list.hpp" -#endif #if defined(OV_CPU_WITH_ACL) # include # include @@ -170,57 +163,6 @@ void Reorder::executeDynamicImpl(const dnnl::stream& strm) { execute(strm); } -#if defined(OPENVINO_ARCH_ARM) -void Reorder::prepareReorderAsTranspose(const MemoryDescPtr& parentDesc, const MemoryDescPtr& childDesc) { - auto getOrderAndBlockedDims = [](const MemoryDesc& lhs, - const MemoryDesc& rhs) -> std::pair, std::vector> { - const auto& in = lhs.as()->getBlockDims(); - const auto rank = lhs.getShape().getRank(); - - if (lhs.hasLayoutType(LayoutType::ncsp) && rhs.hasLayoutType(LayoutType::nspc)) { - if (rank == 4) { - return {{0, 2, 3, 1}, {in[0], in[2], in[3], in[1]}}; - } - return {{0, 2, 1}, {in[0], in[2], in[1]}}; - } - if (lhs.hasLayoutType(LayoutType::nspc) && rhs.hasLayoutType(LayoutType::ncsp)) { - if (rank == 4) { - return {{0, 3, 1, 2}, {in[0], in[3], in[1], in[2]}}; - } - return {{0, 2, 1}, {in[0], in[2], in[1]}}; - } - if (rank == 4) { - return {{0, 1, 2, 3}, in}; - } - return {{0, 1, 2}, in}; - }; - - auto order = getOrderAndBlockedDims(*parentDesc, *childDesc); - const auto& transposeOrder = order.first; - const auto& transposedBlockDims = order.second; - - auto transposedDesc = - std::make_shared(parentDesc->getPrecision(), Shape{transposedBlockDims}); - - TransposeParams transposeParams; - transposeParams.permuteParams.src_block_dims = parentDesc->as()->getBlockDims(); - transposeParams.permuteParams.src_block_order = parentDesc->as()->getOrder(); - transposeParams.permuteParams.dst_block_dims = transposedBlockDims; - transposeParams.permuteParams.dst_block_order = transposeParams.permuteParams.src_block_order; - transposeParams.permuteParams.order = transposeOrder; - transposeParams.permuteParams.data_size = parentDesc->getPrecision().size(); - - auto transpose_context = std::make_shared(context, getImplPriority()); - auto factory = std::make_shared(transposeParams, - std::vector{parentDesc}, - std::vector{transposedDesc}, - transpose_context); - dnnl::primitive_attr attr; - transposeExecutor = factory->makeExecutor(transposeParams, {parentDesc}, {transposedDesc}, attr); - getSelectedPrimitiveDescriptor()->setImplementationType(transposeExecutor->implType()); -} -#endif - #if defined(OV_CPU_WITH_ACL) bool Reorder::prepareAclCopy(const MemoryDescPtr& parentDesc, const MemoryDescPtr& childDesc) { if (!parentDesc->isCompatible(*childDesc)) { @@ -285,16 +227,6 @@ void Reorder::prepareParams() { const auto& parentDesc = srcMemPtr->getDescPtr(); const auto& childDesc = dstMemPtr->getDescPtr(); -#if defined(OPENVINO_ARCH_ARM) - if (all_of(ov::element::f16, parentDesc->getPrecision(), childDesc->getPrecision()) && - ((parentDesc->hasLayoutType(LayoutType::ncsp) && childDesc->hasLayoutType(LayoutType::nspc)) || - (parentDesc->hasLayoutType(LayoutType::nspc) && childDesc->hasLayoutType(LayoutType::ncsp))) && - any_of(parentDesc->getShape().getRank(), 3U, 4U)) { - prepareReorderAsTranspose(parentDesc, childDesc); - return; - } -#endif - #if defined(OV_CPU_WITH_ACL) if (prepareAclCopy(parentDesc, childDesc)) { return; @@ -479,15 +411,6 @@ void Reorder::optimizedNspc2Ncsp() { } void Reorder::execute(const dnnl::stream& strm) { -#if defined(OPENVINO_ARCH_ARM) - if (transposeExecutor) { - auto dstMemPtr = getDstMemoryAtPort(0); - auto srcMemPtr = getSrcMemoryAtPort(0); - transposeExecutor->exec({srcMemPtr}, {dstMemPtr}); - return; - } -#endif - #if defined(OV_CPU_WITH_ACL) if (useAclCopy) { auto dstMemPtr = getDstMemoryAtPort(0); diff --git a/src/plugins/intel_cpu/src/nodes/reorder.h b/src/plugins/intel_cpu/src/nodes/reorder.h index ddfc6f83404e19..d383c7856f8406 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.h +++ b/src/plugins/intel_cpu/src/nodes/reorder.h @@ -21,9 +21,6 @@ #include "openvino/core/node.hpp" #include "thread_pool_imp.hpp" -#if defined(OPENVINO_ARCH_ARM) -# include "nodes/executors/transpose.hpp" -#endif #if defined(OV_CPU_WITH_ACL) # include # include @@ -101,11 +98,6 @@ class Reorder : public Node { void optimizedNcsp2Nspc(); void createReorderPrimitive(const DnnlMemoryDescPtr& srcDesc, const DnnlMemoryDescPtr& dstDesc); -#if defined(OPENVINO_ARCH_ARM) - void prepareReorderAsTranspose(const MemoryDescPtr& parentDesc, const MemoryDescPtr& childDesc); - TransposeExecutorPtr transposeExecutor; -#endif - #if defined(OV_CPU_WITH_ACL) bool prepareAclCopy(const MemoryDescPtr& parentDesc, const MemoryDescPtr& childDesc); std::unique_ptr aclCopy; diff --git a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp index 7e4be0f6b11f3e..682a92b6a88ead 100644 --- a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp +++ b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp @@ -44,7 +44,7 @@ #if defined(OPENVINO_ARCH_X86_64) || defined(OPENVINO_ARCH_X86) # include "openvino/core/type/bfloat16.hpp" # include "openvino/core/type/float16.hpp" -#elif defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#elif defined(OPENVINO_ARCH_ARM64) # include "openvino/core/type/float16.hpp" #endif @@ -1571,7 +1571,7 @@ struct ScaledDotProductAttention::AttentionExecutor : public ScaledDotProductAtt // This unifies quantized (u8/u4) and raw paths through a single codec-generic pipeline. // temporary OV_TURBOQ_LEGACY_ATTN=1 reverts to the old per-type dispatch. // ARM SIMD abstraction not yet wired through mha_kv_cache — fall back to legacy. -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) const bool use_new_pipeline = false; #else static const bool force_legacy = std::getenv("OV_CPU_LEGACY_ATTN") != nullptr; diff --git a/src/plugins/intel_cpu/src/nodes/transpose.cpp b/src/plugins/intel_cpu/src/nodes/transpose.cpp index 66cd9912452ee5..e3742c94625ff9 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/transpose.cpp @@ -247,7 +247,7 @@ void Transpose::createPrimitive() { performAsReorder = true; } -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) // Avoid using reference implementation of non-fp32 reorders on arm platforms if (prec != ov::element::f32) { performAsReorder = false; diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 8d8df9c1f0e4de..c16cc85e58c281 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -75,7 +75,7 @@ static std::string getDeviceFullName() { #elif defined(OPENVINO_ARCH_RISCV64) // TODO: extract actual device name brand_string = "RISCV-64 CPU"; -#elif defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#elif defined(OPENVINO_ARCH_ARM64) # if defined(__APPLE__) || defined(__MACOSX) { auto read_sysctl_str = [](const char* name) -> std::string { @@ -607,7 +607,7 @@ ov::Any Plugin::get_ro_property(const std::string& name, [[maybe_unused]] const if (ov::internal::supported_properties == name) { return decltype(ov::internal::supported_properties)::value_type{ ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, -#if !defined(OPENVINO_ARCH_ARM) && !(defined(__APPLE__) || defined(__MACOSX)) +#if !(defined(__APPLE__) || defined(__MACOSX)) ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO}, #endif ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, @@ -677,8 +677,6 @@ ov::Any Plugin::get_ro_property(const std::string& name, [[maybe_unused]] const return decltype(ov::device::architecture)::value_type{"intel64"}; #elif defined(OPENVINO_ARCH_X86) return decltype(ov::device::architecture)::value_type{"ia32"}; -#elif defined(OPENVINO_ARCH_ARM) - return decltype(ov::device::architecture)::value_type{"armhf"}; #elif defined(OPENVINO_ARCH_ARM64) return decltype(ov::device::architecture)::value_type{"arm64"}; #elif defined(OPENVINO_ARCH_RISCV64) @@ -810,7 +808,7 @@ std::shared_ptr Plugin::deserialize_model(ModelDeserializer& using namespace ov::intel_cpu; -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) static const ov::Version version = {CI_BUILD_NUMBER, "openvino_arm_cpu_plugin"}; #elif defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) static const ov::Version version = {CI_BUILD_NUMBER, "openvino_intel_cpu_plugin"}; diff --git a/src/plugins/intel_cpu/src/transformations/defs.hpp b/src/plugins/intel_cpu/src/transformations/defs.hpp index e502c24baeba9b..c2bfb85e5ded4b 100644 --- a/src/plugins/intel_cpu/src/transformations/defs.hpp +++ b/src/plugins/intel_cpu/src/transformations/defs.hpp @@ -46,19 +46,10 @@ namespace ov::intel_cpu { #endif // OPENVINO_ARCH_X86 -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) -# if defined(OPENVINO_ARCH_ARM) -# define CPU_REGISTER_PASS_ARM64(MANAGER, PASS, ...) -# define CPU_REGISTER_PASS_ARM32(MANAGER, PASS, ...) CPU_REGISTER_PASS_COMMON(MANAGER, PASS, __VA_ARGS__) -# define CPU_DISABLE_PASS_ARM64(MANAGER, PASS) -# endif - -# if defined(OPENVINO_ARCH_ARM64) -# define CPU_REGISTER_PASS_ARM64(MANAGER, PASS, ...) CPU_REGISTER_PASS_COMMON(MANAGER, PASS, __VA_ARGS__) -# define CPU_REGISTER_PASS_ARM32(MANAGER, PASS, ...) -# define CPU_DISABLE_PASS_ARM64(MANAGER, PASS) CPU_DISABLE_PASS_COMMON(MANAGER, PASS) -# endif +# define CPU_REGISTER_PASS_ARM64(MANAGER, PASS, ...) CPU_REGISTER_PASS_COMMON(MANAGER, PASS, __VA_ARGS__) +# define CPU_DISABLE_PASS_ARM64(MANAGER, PASS) CPU_DISABLE_PASS_COMMON(MANAGER, PASS) # define CPU_REGISTER_PASS_ARM(MANAGER, PASS, ...) CPU_REGISTER_PASS_COMMON(MANAGER, PASS, __VA_ARGS__) # define CPU_DISABLE_PASS_ARM(MANAGER, PASS) CPU_DISABLE_PASS_COMMON(MANAGER, PASS) @@ -68,13 +59,12 @@ namespace ov::intel_cpu { #else # define CPU_REGISTER_PASS_ARM64(MANAGER, PASS, ...) -# define CPU_REGISTER_PASS_ARM32(MANAGER, PASS, ...) # define CPU_REGISTER_PASS_ARM(MANAGER, PASS, ...) # define CPU_DISABLE_PASS_ARM64(MANAGER, PASS) # define CPU_DISABLE_PASS_ARM(MANAGER, PASS) # define CPU_ENABLE_PASS_ARM(MANAGER, PASS) # define CPU_SET_CALLBACK_ARM(MANAGER, CALLBACK, ...) -#endif // OPENVINO_ARCH_ARM || OPENVINO_ARCH_ARM64 +#endif // OPENVINO_ARCH_ARM64 } // namespace ov::intel_cpu diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 301d29f65a4a99..732980bcaedc54 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -220,7 +220,7 @@ # include "transformations/utils/utils.hpp" #endif -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) # include "low_precision/avg_pool.hpp" # include "low_precision/convolution.hpp" # include "low_precision/convolution_backprop_data.hpp" @@ -265,10 +265,6 @@ # include "transformations/op_conversions/hsigmoid_decomposition.hpp" #endif -#if defined(OPENVINO_ARCH_ARM) -# include "transformations/cpu_opset/arm/pass/mish_decomposition.hpp" -#endif - #if defined(OPENVINO_ARCH_RISCV64) # include "nodes/kernels/riscv64/cpu_isa_traits.hpp" # include "openvino/op/power.hpp" @@ -554,7 +550,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis const auto precisions = get_convert_precisions(); if (config.inferencePrecision == ov::element::f16) { precisions_map fp_convert_precision_map = {{ov::element::f32, ov::element::f16}}; -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) type_to_fuse_map fuse_map = {}; #else type_to_fuse_map fuse_map = {{ov::op::PagedAttentionExtension::get_type_info_static(), fuse_type_to_pa}}; @@ -677,7 +673,6 @@ void Transformations::PreLpt(const std::vector& defaultPrecis CPU_REGISTER_PASS_X64(manager, ConvertInteractionInt8); CPU_REGISTER_PASS_ARM(manager, ConvertReduceNoKeepDims); CPU_REGISTER_PASS_ARM(manager, ConvertReduceMultiAxis); - CPU_REGISTER_PASS_ARM32(manager, MishDecomposition); CPU_REGISTER_PASS_ARM(manager, ConvertConv1D); CPU_REGISTER_PASS_ARM(manager, ConvertGroupConv1D); CPU_REGISTER_PASS_ARM(manager, ConvertGroupConvolution); @@ -914,7 +909,7 @@ void Transformations::runLptPasses(const std::vector& default using namespace ov::pass::low_precision; ov::pass::Manager lptManager("CPU:LPT"); -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) auto quantizationRestrictions = std::vector( {QuantizationGranularityRestriction::create({0})}); auto supportedPrecisions = std::vector({ @@ -964,7 +959,7 @@ void Transformations::runLptPasses(const std::vector& default supportedPrecisions, quantizationRestrictions, LayerTransformation::Params(true, ov::element::f32, defaultPrecisions)); -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) lowPrecPass->add_markup(); #endif CPU_REGISTER_PASS_ARM(lptManager, ConvertConvolutionBias); diff --git a/src/plugins/intel_cpu/src/utils/arch_macros.h b/src/plugins/intel_cpu/src/utils/arch_macros.h index b5dd003c1324e3..ea459e95bbfc4e 100644 --- a/src/plugins/intel_cpu/src/utils/arch_macros.h +++ b/src/plugins/intel_cpu/src/utils/arch_macros.h @@ -5,13 +5,6 @@ #pragma once #if defined(OV_CPU_WITH_ACL) -# if defined(OPENVINO_ARCH_ARM) -# define OV_CPU_INSTANCE_ACL32(...) {__VA_ARGS__}, -# define OV_CPU_ACL32(...) __VA_ARGS__ -# else -# define OV_CPU_INSTANCE_ACL32(...) -# define OV_CPU_ACL32(...) -# endif # if defined(OPENVINO_ARCH_ARM64) # define OV_CPU_INSTANCE_ACL64(...) {__VA_ARGS__}, # define OV_CPU_ACL64(...) __VA_ARGS__ @@ -19,7 +12,7 @@ # define OV_CPU_INSTANCE_ACL64(...) # define OV_CPU_ACL64(...) # endif -# if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +# if defined(OPENVINO_ARCH_ARM64) # define OV_CPU_INSTANCE_ACL(...) {__VA_ARGS__}, # define OV_CPU_ACL(...) __VA_ARGS__ # else @@ -27,8 +20,6 @@ # define OV_CPU_ACL(...) # endif #else -# define OV_CPU_INSTANCE_ACL32(...) -# define OV_CPU_ACL32(...) # define OV_CPU_INSTANCE_ACL64(...) # define OV_CPU_ACL64(...) # define OV_CPU_INSTANCE_ACL(...) diff --git a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp index def19880073f8a..b321468971a990 100644 --- a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp +++ b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp @@ -134,7 +134,7 @@ void DebugLogEnabled::break_at(const std::string& log) { std::cout << "[ DEBUG ] Debug log breakpoint hit\n"; #if defined(_MSC_VER) __debugbreak(); -#elif defined(__APPLE__) || defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_RISCV64) +#elif defined(__APPLE__) || defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_RISCV64) __builtin_trap(); #else asm("int3"); diff --git a/src/plugins/intel_cpu/src/utils/precision_support.cpp b/src/plugins/intel_cpu/src/utils/precision_support.cpp index e69f43d9445abd..5f9c222fb13510 100644 --- a/src/plugins/intel_cpu/src/utils/precision_support.cpp +++ b/src/plugins/intel_cpu/src/utils/precision_support.cpp @@ -17,7 +17,7 @@ static bool hasFP16HardwareSupport() { #if defined(OPENVINO_ARCH_X86_64) return dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_fp16) || dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2); -#elif defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#elif defined(OPENVINO_ARCH_ARM64) return with_cpu_neon_fp16(); #else return false; diff --git a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt index a5798d03f657d4..cdd64a298a990c 100644 --- a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt +++ b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt @@ -65,14 +65,14 @@ if(NOT (X86 OR X86_64)) ${CMAKE_CURRENT_SOURCE_DIR}/utils/x64) endif() -if(NOT (ARM OR AARCH64)) +if(NOT AARCH64) list(APPEND EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests/instances/arm ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/arm ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instances/low_precision_transformations/aarch64 ${CMAKE_CURRENT_SOURCE_DIR}/utils/arm) else() - # temporary disable all custom tests for ARM + # temporary disable all custom tests for AArch64 list(APPEND EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests) diff --git a/src/plugins/intel_cpu/tests/functional/cmake/specific_tests.cmake b/src/plugins/intel_cpu/tests/functional/cmake/specific_tests.cmake index ca1d25790989bf..399cd91b7f1658 100644 --- a/src/plugins/intel_cpu/tests/functional/cmake/specific_tests.cmake +++ b/src/plugins/intel_cpu/tests/functional/cmake/specific_tests.cmake @@ -39,7 +39,7 @@ if(DEFINED ENABLE_CPU_SUBSET_TESTS_PATH) ${CMAKE_CURRENT_SOURCE_DIR}/utils/transformations/insert_requantize.cpp ${CPU_SUBSET_TEST_ABS_PATH}) -if(NOT (ARM OR AARCH64)) +if(NOT AARCH64) list(APPEND EXCLUDED_SOURCE_PATHS_FOR_SUBSET_TEST ${CMAKE_CURRENT_SOURCE_DIR}/utils/arm) endif() if(NOT RISCV64) diff --git a/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake b/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake index 94f8cc7469cad3..474f24d20adb49 100644 --- a/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake +++ b/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake @@ -22,7 +22,7 @@ function(create_target_per_test_for_directory TEST_DIR TARGET_PREFIX) if(X86_64) list(APPEND REQUIRED_OBJECT_FILES ${CMAKE_CURRENT_SOURCE_DIR}/utils/x64/filter_cpu_info.cpp) -elseif(ARM OR AARCH64) +elseif(AARCH64) list(APPEND REQUIRED_OBJECT_FILES ${CMAKE_CURRENT_SOURCE_DIR}/utils/arm/filter_cpu_info.cpp) elseif(RISCV64) @@ -66,7 +66,7 @@ endif() # find all the source files with the name of a class file if(X86_64) file(GLOB_RECURSE LIST_OF_TEST_ARCH_INSTANCES ${TEST_DIR}/instances/x64/${TEST_CLASS_FILE_NAME}) - elseif(ARM OR AARCH64) + elseif(AARCH64) file(GLOB_RECURSE LIST_OF_TEST_ARCH_INSTANCES ${TEST_DIR}/instances/arm/${TEST_CLASS_FILE_NAME}) elseif(RISCV64) file(GLOB_RECURSE LIST_OF_TEST_ARCH_INSTANCES ${TEST_DIR}/instances/riscv64/${TEST_CLASS_FILE_NAME}) diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp index c7f95f1d08bc33..0f10105a7347d5 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp @@ -162,7 +162,7 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigStreamsNum) { ASSERT_GT(value, 0); // value has been configured automatically } -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) const auto expected_precision_for_performance_mode = ov::intel_cpu::hasHardwareSupport(ov::element::f16) ? ov::element::f16 : ov::element::f32; #else const auto expected_precision_for_performance_mode = ov::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32; @@ -327,8 +327,6 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginCheckCPUDeviceArchitecture) { ASSERT_EQ(value.as(), "intel64"); #elif defined(OPENVINO_ARCH_X86) ASSERT_EQ(value.as(), "ia32"); -#elif defined(OPENVINO_ARCH_ARM) - ASSERT_EQ(value.as(), "armhf"); #elif defined(OPENVINO_ARCH_ARM64) ASSERT_EQ(value.as(), "arm64"); #elif defined(OPENVINO_ARCH_RISCV64) diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp index 821c06bb0b88b3..a858f34c6e56e7 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp @@ -145,16 +145,9 @@ void ActivationLayerCPUTest::SetUp() { const auto primitiveType = getPrimitiveType(activationType, inType, inputShapes); selectedType = primitiveType.empty() ? "" : primitiveType + "_" + netPrecision.to_string(); -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) -# if defined(OPENVINO_ARCH_ARM) - if (activationType == utils::ActivationTypes::GeluErf) // @todo tmp fallback to ref, gelu erf is disabled for 32bit ARM - selectedType = std::string("ref_") + netPrecision.to_string(); -# endif +#if defined(OPENVINO_ARCH_ARM64) if ((primitiveType != "jit") && (activationType == utils::ActivationTypes::SoftSign || // @todo not supported by ACL, can be decomposed with transformation -#if defined(OPENVINO_ARCH_ARM) - activationType == utils::ActivationTypes::GeluTanh || // @todo not supported by ACL, can be decomposed with transformation -#endif inputShapes.front().first.rank().get_length() > 5)) // @todo tmp fallback to ref, remove after 6D+ ranks are properly supported selectedType = std::string("ref_") + netPrecision.to_string(); #else @@ -174,7 +167,7 @@ void ActivationLayerCPUTest::SetUp() { auto activation = utils::make_activation(params, netPrecision, activationType, activationShapes, constantsValue); activation->get_rt_info() = getCPUInfo(); function = std::make_shared(ov::OutputVector{activation}, ov::ParameterVector{params}, "Activation"); -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) if (netPrecision == ov::element::f32 && outPrecision == ov::element::f32) { abs_threshold = 8e-4; } @@ -300,10 +293,7 @@ const std::map>>& activat {Ceiling, {{}}}, {Negative, {{}}}, {Swish, {{0.1f}}}, -// On arm32 Mish is decomposed -#if !defined(OPENVINO_ARCH_ARM) {Mish, {{}}}, -#endif // On other platforms HSigmoid is decomposed #if defined(OPENVINO_ARCH_X86_64) || defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_RISCV64) {HSigmoid, {{}}}, diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/comparison.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/comparison.cpp index 4cfebcb68f8a9e..e30b9455ccd256 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/comparison.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/comparison.cpp @@ -83,11 +83,6 @@ std::string ComparisonLayerCPUTest::getPrimitiveType(const utils::ComparisonType #if defined(OPENVINO_ARCH_ARM64) return "jit"; #endif -#if defined(OPENVINO_ARCH_ARM) && defined(OV_CPU_WITH_ACL) - // TODO [171225] : On ARM there is ACL executor support which requires U8 on output. - // This requirement is not met in Eltwise CPU node - ref impl is used. - return modelType == ov::element::i32 ? "acl" : "ref"; -#endif #if defined(OPENVINO_ARCH_RISCV64) if (ov::intel_cpu::riscv64::mayiuse(ov::intel_cpu::riscv64::gv)) { if (ov::intel_cpu::any_of(type, utils::ComparisonTypes::EQUAL, diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.cpp index c5a7ec770e459a..98cc49b17aa461 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.cpp @@ -74,7 +74,7 @@ bool ConvertCPULayerTest::isInOutPrecisionSupported(ov::element::Type inPrc, ov: return false; #endif // ACL does not support specific in-out precision pairs -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) if ((inPrc == ov::element::i8 && outPrc == ov::element::u8) || (inPrc == ov::element::u8 && outPrc == ov::element::i8) || (inPrc == ov::element::f32 && (outPrc == ov::element::u8 || outPrc == ov::element::i8)) || diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp index 2a3908d6febbf4..d43f2542e8f350 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp @@ -231,7 +231,7 @@ TEST_P(ConvolutionLayerCPUTest, CompareWithRefs) { // FIXME: ACL output shape check fails if kernel, stride and padding equal to 1 // CpuGemm::validate checks that 2nd and 3rd dimention of the input and output shapes are equal and fails (ticket 114201) -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) if (std::all_of(kernel.begin(), kernel.end(), [](size_t i){return i == 1;}) && std::all_of(stride.begin(), stride.end(), [](size_t i){return i == 1;}) && std::all_of(padBegin.begin(), padBegin.end(), [](ptrdiff_t i){return i == 1;})) { diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution_backprop_data.hpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution_backprop_data.hpp index 26f8b2bc7a9a3c..f4ed542e1a3f5e 100755 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution_backprop_data.hpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution_backprop_data.hpp @@ -57,7 +57,7 @@ void SetUp() override; /* COMMON PARAMS */ const std::vector fusingParamsSet{ emptyFusingSpec, -#if !defined(OPENVINO_ARCH_ARM64) && !defined(OPENVINO_ARCH_ARM) +#if !defined(OPENVINO_ARCH_ARM64) fusingAddPerChannel #endif }; diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp index 9e0e0696fbdd3f..b7182afb0d0385 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp @@ -167,19 +167,6 @@ void EltwiseLayerCPUTest::SetUp() { rel_threshold = 0.05f; } -#if defined(OPENVINO_ARCH_ARM) - // ARM32-only: Sub/Div may be decomposed or routed to different implementations - // causing variability (binary->unary or ACL/ref). Keep tests stable but localized here. - if (ov::intel_cpu::any_of(eltwiseType, utils::EltwiseTypes::SUBTRACT, utils::EltwiseTypes::DIVIDE)) { - // If format expectations specify two inputs, but transforms reduce arity, limit to single input. - if (inFmts.size() > 1) { - inFmts.resize(1); - } - // Do not enforce specific primType (ACL vs REF) for Sub/Div on ARM32. - selectedType = CPUTestsBase::any_type; - } -#endif - shapes.resize(2); switch (opType) { case ov::test::utils::OpType::SCALAR: { @@ -203,7 +190,7 @@ void EltwiseLayerCPUTest::SetUp() { netType, configuration); // selectedType = makeSelectedTypeStr(getPrimitiveType(), netType); -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) if (eltwiseType == utils::POWER) { selectedType = std::regex_replace(selectedType, std::regex("acl"), "ref"); } diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/logical.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/logical.cpp index c5ad5b889593a3..936bcca67abf15 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/logical.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/logical.cpp @@ -85,9 +85,6 @@ std::string LogicalLayerCPUTest::getPrimitiveType(const utils::LogicalTypes& typ #if defined(OPENVINO_ARCH_ARM64) return "jit"; #endif -#if defined(OPENVINO_ARCH_ARM) - return "ref"; -#endif #if defined(OPENVINO_ARCH_RISCV64) if (ov::intel_cpu::riscv64::mayiuse(ov::intel_cpu::riscv64::gv)) { return "jit"; diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/matmul.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/matmul.cpp index dbfd44bcea60b7..f318b2189bd6a0 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/matmul.cpp @@ -105,7 +105,7 @@ void MatMulLayerCPUTest::SetUp() { rel_threshold = abs_threshold = 1e-2f; } else if (inference_precision == ov::element::f16) { inType = outType = netType = ElementType::f16; -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) // rel_threshold = abs_threshold = 1e-2f; // Temporarily created the following rel_threshold because of this bug CVS-144523 and // https://github.com/ARM-software/ComputeLibrary/issues/1112 diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/pooling.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/pooling.cpp index f7d0a29c73a9f3..d118a5ba15aa2c 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/pooling.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/pooling.cpp @@ -9,7 +9,7 @@ #include "openvino/op/avg_pool.hpp" #include "openvino/op/max_pool.hpp" -#if defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM) +#if defined(OPENVINO_ARCH_ARM64) # include "openvino/op/convert.hpp" # include "openvino/op/fake_quantize.hpp" # include "openvino/op/matmul.hpp" @@ -21,7 +21,7 @@ using namespace CPUTestUtils; namespace ov { namespace test { -#if defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM) +#if defined(OPENVINO_ARCH_ARM64) namespace { std::shared_ptr addInt8FQAndMatMul(const ov::ParameterVector& params, @@ -99,12 +99,7 @@ void PoolingLayerCPUTest::SetUp() { selectedType = getPrimitiveType(); } if (isInt8) -#if defined(OPENVINO_ARCH_ARM) - // int8 pooling on arm32 is executed with fp32 - selectedType = selectedType + "_f32"; -#else selectedType = selectedType + "_I8"; -#endif else selectedType = makeSelectedTypeStr(selectedType, deduce_expected_precision(inPrc, configuration)); @@ -130,7 +125,7 @@ void PoolingLayerCPUTest::SetUp() { pooling->get_rt_info() = getCPUInfo(); // On ARM architectures, attach FQ->MatMul after int8 AvgPool. -#if defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM) +#if defined(OPENVINO_ARCH_ARM64) if (isInt8) { function = addInt8FQAndMatMul(params, pooling, inPrc, targetStaticShapes); return; @@ -205,7 +200,7 @@ void AvgPoolingV14LayerCPUTest::SetUp() { pooling->get_rt_info() = getCPUInfo(); // On ARM architectures, attach FQ->MatMul after int8 Pooling -#if defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM) +#if defined(OPENVINO_ARCH_ARM64) if (isInt8) { function = addInt8FQAndMatMul(params, pooling, inPrc, targetStaticShapes); return; @@ -369,7 +364,7 @@ namespace Pooling { // The combination of parameters: NCHW + CEIL gives an accuracy problem in ACL AvgPool ov::op::RoundingType expectedAvgRoundingType(const ov::op::RoundingType ceil_type) { -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) return ov::op::RoundingType::FLOOR; #else return ceil_type; @@ -755,7 +750,7 @@ const std::vector& inputShapes4D_int8() { } const CPUSpecificParams& expectedCpuConfigAnyLayout() { -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) static const CPUSpecificParams acl = CPUSpecificParams{{}, {}, {"acl"}, "acl"}; return acl; #else diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/conversion.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/conversion.cpp index bc942c7d2492f7..ecb55a52de532a 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/conversion.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/conversion.cpp @@ -15,7 +15,7 @@ static std::string expectedPrimitiveType() { #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) return "ref"; #endif -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) return "acl"; #endif return {}; diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp index 82002728450d2b..8403b4920773e4 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp @@ -1318,7 +1318,7 @@ class MatMul3DWeightLayerCPUTest : public MatMulLayerCPUTest { rel_threshold = abs_threshold = 1e-2f; } else if (inference_precision == ov::element::f16) { inType = outType = netType = ElementType::f16; -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) // rel_threshold = abs_threshold = 1e-2f; // Temporarily created the following rel_threshold because of this bug CVS-144523 and // https://github.com/ARM-software/ComputeLibrary/issues/1112 diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/conv_maxpool_activ.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/conv_maxpool_activ.cpp index b986dc46819870..c95304ffe15382 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/conv_maxpool_activ.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/conv_maxpool_activ.cpp @@ -61,9 +61,7 @@ void ConvPoolActivTest::SetUp() { paddingType); } -#if defined(OPENVINO_ARCH_ARM) - selectedType = makeSelectedTypeStr("ref_any", element::f32); -#elif defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) selectedType = makeSelectedTypeStr("gemm_acl", element::f32); #else selectedType = makeSelectedTypeStr(getPrimitiveType(), element::f32); @@ -72,9 +70,7 @@ void ConvPoolActivTest::SetUp() { } bool ConvPoolActivTest::primTypeCheck(std::string primType) const { -#if defined(OPENVINO_ARCH_ARM) - return primType == makeSelectedTypeStr(std::string("ref_any"), element::f32); -#elif defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) return primType == makeSelectedTypeStr(std::string("gemm_acl"), element::f32); #else auto isaType = getISA(true); diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/fuse_transpose_reorder.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/fuse_transpose_reorder.cpp index 5331b640ed79b6..2c2d7d94410673 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/fuse_transpose_reorder.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/fuse_transpose_reorder.cpp @@ -248,7 +248,7 @@ void FuseTransposeAndReorderTest3::create_model() { TEST_P(FuseTransposeAndReorderTest3, CompareWithRefs) { run(); -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) //on ARM there is reorder instead of transpose check_transpose_count(0); #else diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/matmul_decompress_convert.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/matmul_decompress_convert.cpp index 09aa78e391fac0..18252c45668142 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/matmul_decompress_convert.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/matmul_decompress_convert.cpp @@ -267,7 +267,7 @@ std::vector filter_additional_config_bf16() { std::vector filter_specific_params(bool trySetMlas) { std::vector specificParams; -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) specificParams.push_back(CPUSpecificParams{{}, {}, {"acl"}, "acl"}); #else if (trySetMlas) { diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/merge_transpose_reorder.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/merge_transpose_reorder.cpp index 420bf8d5e69bf8..7a5ae2de17dac5 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/merge_transpose_reorder.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/merge_transpose_reorder.cpp @@ -157,13 +157,9 @@ std::vector static_shapes = { InputShape{{}, {{1, 32, 16, 16}}}, }; -#if defined(OPENVINO_ARCH_ARM) -const ExpectedResult successfull_fuse_result{1, 1, 3}; -#else const ExpectedResult successfull_fuse_result{1, 1, 2}; -#endif -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) const ExpectedResult unsuccessfull_fuse_result{3, 3, 3}; #else const ExpectedResult unsuccessfull_fuse_result{3, 3, 2}; diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/reshape_fc.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/reshape_fc.cpp index 17eab3f9381171..929aa2e5fd74b3 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/reshape_fc.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/reshape_fc.cpp @@ -96,7 +96,7 @@ static std::vector filterFusingParams(const std::vector fusingParamsSet{emptyFusingSpec, fusingBias, fusingMultiplyPerChannel}; -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) const auto gemmParam = CPUSpecificParams{{}, {}, {"acl"}, "acl"}; #elif defined(OV_CPU_WITH_MLAS) const auto gemmParam = CPUSpecificParams{{}, {}, {"gemm_mlas"}, "gemm_mlas"}; diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/cpu_reservation_test.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/cpu_reservation_test.cpp index d96c0e8e51286f..1a8382792cce82 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/cpu_reservation_test.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/cpu_reservation_test.cpp @@ -24,7 +24,7 @@ using Device = std::string; using Config = ov::AnyMap; using CpuReservationTest = ::testing::Test; -#if !(defined(OPENVINO_ARCH_ARM) || defined(__APPLE__) || defined(__EMSCRIPTEN__)) +#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__)) TEST_F(CpuReservationTest, smoke_Mutiple_CompiledModel_Reservation) { std::vector> models; diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/plugin_name.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/plugin_name.cpp index a69f1b08b6ae67..4fafbe63848639 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/plugin_name.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/plugin_name.cpp @@ -4,7 +4,7 @@ #include "openvino/core/visibility.hpp" -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) +#if defined(OPENVINO_ARCH_ARM64) const char * cpu_plugin_file_name = "openvino_arm_cpu_plugin"; #elif defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) const char * cpu_plugin_file_name = "openvino_intel_cpu_plugin"; diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 2ddafca99fe501..da02a3ed9e0351 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -279,14 +279,14 @@ const std::vector& disabled_test_patterns() { std::regex(R"(.*smoke_ConcatSDPTransposeTestWrongBeamIdx.*)"), // Disabled due to dependency on tests execution order, issue: 178036 -#if !(defined(__APPLE__) && defined(__MACH__)) && (defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM)) +#if !(defined(__APPLE__) && defined(__MACH__)) && (defined(OPENVINO_ARCH_ARM64)) std::regex(R"(.*smoke_ScaledAttn_CPU\/ScaledAttnLayerCPUTest.CompareWithRefs\/netPRC=f32_IS=\[\?.8.\?.64]_\[\?.8.\?.64]_\[\?.1.\?.\?\]_TS=\(1.8.100.64\)_\(1.8.1.64\)_\(2.8.10.64\)_\(2.8.10.64\)_\(1.8.100.64\)_\(1.8.1.64\)_\(2.8.10.64\)_\(2.8.10.64\)_\(1.1.1.100\)_\(1.1.1.1\)_\(2.1.1.10\)_\(2.1.10.10\)_is_causal=0_has_attn=0_has_scale=0_trgDev=CPU_primitive=ref_any.*)"), #endif #if defined(OPENVINO_ARCH_X86) std::regex(R"(.*DetectionOutputLayerTest.*)"), // WIP: plugin cannot be loaded for some reason std::regex(R"(.*IEClassBasicTestP.*)"), -#elif defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM) +#elif defined(OPENVINO_ARCH_ARM64) std::regex(R"(smoke_CompareWithRefs_static_check_collapsing/EltwiseLayerTest.Inference/IS.*_eltwise_op_type=Div_secondary_input_type=PARAMETER_opType=VECTOR_model_type=i32_InType=dynamic_OutType=dynamic_trgDev=CPU.*)"), // Issue: 123321 std::regex(R"(.*smoke_RNNSequenceCommonZeroClip/RNNSequenceTest.Inference.*hidden_size=1.*relu.*direction=reverse.*)"), @@ -313,6 +313,8 @@ const std::vector& disabled_test_patterns() { std::regex(R"(.*smoke_AvgPoolV14_CPU_4D/AvgPoolingV14LayerCPUTest.CompareWithRefs.*)"), // Ticket: 168931 std::regex(R"(.*smoke_Reduce_OneAxis_dynamic_CPU/ReduceCPULayerTest.CompareWithRefs.*)"), + // ACL f16 ReduceProd accuracy on ARM64 + std::regex(R"(.*smoke_Reduce_OneAxis_CPU/ReduceCPULayerTest.CompareWithRefs.*type=Prod.*INFERENCE_PRECISION_HINT=f16.*)"), // invalid test: checks u8 precision for runtime graph, while it should be f32 std::regex(R"(smoke_NegativeQuantizedMatMulMultiplyFusion.*)"), // int8 specific @@ -333,70 +335,6 @@ const std::vector& disabled_test_patterns() { std::regex(R"(.*CPU/CoreThreadingTest.smoke_QueryModel.*)"), std::regex(R"(.*WeightlessCacheAccuracy.*)"), #endif -#if defined(OPENVINO_ARCH_ARM) - // Issue: 144998 - std::regex(R"(.*smoke_CachingSupportCase_CPU.*_(i8|u8).*)"), - std::regex(R"(.*smoke_Hetero_CachingSupportCase.*_(i8|u8).*)"), - // TODO: rounding errors - std::regex(R"(.*iv_secondaryInputType=PARAMETER_opType=VECTOR_NetType=i32.*)"), - // not supported - std::regex(R"(.*fma.*EltwiseLayerCPUTest.*)"), - std::regex(R"(.*int_jit.*EltwiseLayerCPUTest.*)"), - std::regex(R"(.*dyn.*EltwiseChainTest.*)"), - std::regex(R"(.*smoke_EltwiseChain_MergeConvert_int8/.*InPRC0=i8.*Conversion=i8.*)"), - std::regex(R"(.*smoke_EltwiseChain_MergeConvert_int8/.*InPRC0=u8.*Conversion=i8.*)"), - std::regex(R"(.*smoke_EltwiseChain_MergeConvert_int8/.*InPRC0=i16.*Conversion=i8.*)"), - std::regex(R"(.*smoke_EltwiseChain_MergeConvert_int8/.*InPRC0=u16.*Conversion=i8.*)"), - std::regex(R"(.*smoke_EltwiseChain_MergeConvert_int8/.*InPRC0=i32.*Conversion=i8.*)"), - // by calc abs_threshold with expected value - std::regex(R"(.*smoke_CompareWithRefs_static/EltwiseLayerTest.*_eltwise_op_type=Div_.*_model_type=i32_.*)"), - // int8 / code-generation specific - std::regex(R"(smoke_LPT.*)"), - std::regex(R"(.*smoke_RoPETest.*)"), - std::regex(R"(.*ActivationLayerTest.*Inference.*)"), - std::regex(R"(.*AddConvertToReorderTest.*smoke_TestAddReorder_CPU.*)"), - std::regex(R"(.*AddOutputsTest.*smoke_CheckOutputExist.*)"), - std::regex(R"(.*CompileModelCacheRuntimePropertiesTestBase.*CanLoadFromFileWithoutException.*)"), - std::regex(R"(.*CompileModelCacheTestBase.*CompareWithRefImpl.*2InputSubtract_f.*)"), - std::regex(R"(.*CompileModelCacheTestBase.*CompareWithRefImpl.*ConvPoolRelu_f.*)"), - std::regex(R"(.*CompileModelCacheTestBase.*CompareWithRefImpl.*MatMulBias_f.*)"), - std::regex(R"(.*CompileModelCacheTestBase.*CompareWithRefImpl.*SimpleFunctionRelu_f.*)"), - std::regex(R"(.*CompileModelCacheTestBase.*CompareWithRefImpl/MatMulBias_f32_batch1_CPU)"), - std::regex(R"(.*CompileModelLoadFromCacheTest.*CanGetCorrectLoadedFromCacheProperty.*)"), - std::regex(R"(.*CompileModelLoadFromFileTestBase.*CanCreateCacheDirAndDumpBinariesUnicodePath.*)"), - std::regex(R"(.*CompileModelLoadFromFileTestBase.*CanLoadFromFileWithoutException.*)"), - std::regex(R"(.*CompileModelLoadFromMemoryTestBase.*CanLoadFromMemoryWithoutExecption.*)"), - std::regex(R"(.*CompileModelLoadFromMemoryTestBase.*CanLoadFromMemoryWithoutWeightsANdExecption.*)"), - std::regex(R"(.*CompileModelWithCacheEncryptionTest.*CanImportModelWithoutException.*)"), - std::regex(R"(.*ConcatMultiQuerySDPTest.*f16.*)"), - std::regex(R"(.*ConcatSDPTest.*f16.*)"), - std::regex(R"(.*FakeConvertLayerTest.*f16.*)"), - std::regex(R"(.*CoreThreadingTestsWithCacheEnabled.*smoke_compiled_model_cache_enabled.*)"), - std::regex(R"(.*CoreThreadingTestsWithIter.*smoke_CompileModel.*)"), - std::regex(R"(.*CustomOpConvertI64CPUTest.*CompareWithRefs.*)"), - std::regex(R"(.*EltwiseLayerCPUTest.*CompareWithRefs.*INFERENCE_PRECISION_HINT=f16.*)"), - std::regex(R"(.*EltwiseLayerTest.*Inference.*)"), - std::regex(R"(.*ExecGraphDuplicateInputsOutputsNames.*CheckOutputsMatch.*)"), - std::regex(R"(.*ExecGraphKeepAssignNode.*KeepAssignNode.*)"), - std::regex(R"(.*ExecGraphRemoveParameterNode.*RemoveParameterNode.*)"), - std::regex(R"(.*IndexAddTest.*CompareWithRefs.*)"), - std::regex(R"(.*InterpolateLayerCPUTest.*CompareWithRefs.*INFERENCE_PRECISION_HINT=f16.*)"), - std::regex(R"(.*MatMulLayerCPUTest.*CompareWithRefs.*)"), - std::regex(R"(.*MatmulWeightsDecompression.*CompareWithRefs.*)"), - std::regex(R"(.*MvnLayerCPUTest.*CompareWithRefs.*INFERENCE_PRECISION_HINT=f16.*)"), - std::regex(R"(.*NonInputInPlaceTest.*CompareWithRefs.*)"), - std::regex(R"(.*OVClassCompiledModelGetPropertyTest_EXEC_DEVICES.*CanGetExecutionDeviceInfo.*)"), - std::regex(R"(.*OVClassConfigTestCPU.*smoke_.*)"), - std::regex(R"(.*OVClassConfigTestCPU.*smoke_CpuExecNetwork.*)"), - std::regex(R"(.*OVInferenceChaining.*StaticOutputToDynamicInput.*)"), - std::regex(R"(.*OVInferenceChaining.*StaticOutputToStaticInput.*)"), - std::regex(R"(.*OVInferenceChainingStatic.*StaticOutputToStaticInput.*)"), - std::regex(R"(.*ReduceCPULayerTest.*CompareWithRefs.*INFERENCE_PRECISION_HINT=f16.*)"), - // Issue: 164799 - std::regex(R"(.*CompileModelCacheTestBase.*CompareWithRefImpl.*)"), - // Issue 167685 - std::regex(R"(.*importExportModelWithTypeRelaxedExt.*)"), -#endif #if defined(OPENVINO_ARCH_RISCV64) // object is not initialized std::regex(R"(.*StaticLoopDynamicSubgraphCPUTest.smoke_StaticLoopWithDynSubgraph.*)"), @@ -668,7 +606,7 @@ const std::vector& disabled_test_patterns() { patterns.emplace_back(std::regex(R"(.*ConcatSDPTest.*f16.*)")); patterns.emplace_back(std::regex(R"(.*ConvertCPULayerTest.*f16.*)")); } -#elif defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM) +#elif defined(OPENVINO_ARCH_ARM64) if (!ov::intel_cpu::hasIntDotProductSupport()) { patterns.emplace_back(std::regex(R"(.*smoke_MatMulCompressedWeights_Kleidiai.*)")); } diff --git a/src/plugins/intel_cpu/tests/functional/utils/arm/filter_cpu_info.cpp b/src/plugins/intel_cpu/tests/functional/utils/arm/filter_cpu_info.cpp index dffd9250116d0a..63280bf8ca6d83 100644 --- a/src/plugins/intel_cpu/tests/functional/utils/arm/filter_cpu_info.cpp +++ b/src/plugins/intel_cpu/tests/functional/utils/arm/filter_cpu_info.cpp @@ -26,11 +26,6 @@ std::vector filterCPUInfoForArch(const std::vector> proc_type_table = {{8, 4, 4, 0, 0, 0, 0}}; - auto model = make_dummy_model(); - Config config; - config.modelType = Config::ModelType::CNN; - config.modelPreferThreads = -1; - - int result = get_model_prefer_threads(0, proc_type_table, model, config, 1, 2.0f); - EXPECT_GE(config.modelPreferThreadsThroughput, 1); - EXPECT_EQ(result, config.modelPreferThreadsThroughput); -} - -TEST_F(ModelPreferThreadsIntegrationTest, direct_arm_linux_throughput_branches) { - Config config; - std::vector> proc_type_table = {{8, 4, 4, 0, 0, 0, 0}}; - ov::MemBandwidthPressure tolerance; - // UNKNOWN should allow high throughput when compute-limited - tolerance.max_mem_tolerance = ov::MemBandwidthPressure::UNKNOWN; - configure_arm_linux_threads(config, proc_type_table, tolerance, false, false); - EXPECT_GE(config.modelPreferThreadsThroughput, 1); -} -#endif - -#if (defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__) +#if (defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__) TEST_F(ModelPreferThreadsIntegrationTest, apple_size_one_special_latency_choice) { std::vector> proc_type_table = {{10, 6, 4, 0, 0, 0, 0}}; auto model = make_dummy_model(); @@ -416,4 +389,4 @@ TEST_F(ModelPreferThreadsIntegrationTest, direct_apple_special_latency_and_throu #endif -} // namespace \ No newline at end of file +} // namespace diff --git a/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake b/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake index 6a29f07fc5a6e9..07f0832ebee790 100644 --- a/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake +++ b/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake @@ -93,7 +93,7 @@ elseif(ENABLE_ARM_COMPUTE_CMAKE) endif() # Multi-ISA support with SME - if(NOT ARM AND OV_CPU_AARCH64_USE_MULTI_ISA) + if(OV_CPU_AARCH64_USE_MULTI_ISA) add_compile_definitions(ENABLE_SME ARM_COMPUTE_ENABLE_SME ARM_COMPUTE_ENABLE_SME2) endif() @@ -182,8 +182,6 @@ elseif(NOT TARGET arm_compute::arm_compute) if(ANDROID_ABI STREQUAL "arm64-v8a") set(android_triple_prefix "aarch64-linux-android") - elseif(ANDROID_ABI STREQUAL "armeabi-v7a") - set(android_triple_prefix "armv7a-linux-androideabi") elseif(ANDROID_ABI STREQUAL "x86") set(android_triple_prefix "i686-linux-android") elseif(ANDROID_ABI STREQUAL "x86_64") @@ -318,7 +316,7 @@ elseif(NOT TARGET arm_compute::arm_compute) endif() # Multi-ISA support - if(NOT ARM AND OV_CPU_AARCH64_USE_MULTI_ISA) + if(OV_CPU_AARCH64_USE_MULTI_ISA) set(local_extra_cxx_flags "${local_extra_cxx_flags} -DENABLE_SME -DARM_COMPUTE_ENABLE_SME -DARM_COMPUTE_ENABLE_SME2") endif() @@ -357,13 +355,9 @@ elseif(NOT TARGET arm_compute::arm_compute) endif() # Architecture configuration - if(ARM) - ov_arm_compute_add_option("estate" "32") - else() - ov_arm_compute_add_option("estate" "64") - if(OV_CPU_AARCH64_USE_MULTI_ISA) - ov_arm_compute_add_option("multi_isa" "1") - endif() + ov_arm_compute_add_option("estate" "64") + if(OV_CPU_AARCH64_USE_MULTI_ISA) + ov_arm_compute_add_option("multi_isa" "1") endif() # Install directory diff --git a/src/plugins/intel_cpu/thirdparty/CMakeLists.txt b/src/plugins/intel_cpu/thirdparty/CMakeLists.txt index 0c82f03025b567..48e79edf2d4fc6 100644 --- a/src/plugins/intel_cpu/thirdparty/CMakeLists.txt +++ b/src/plugins/intel_cpu/thirdparty/CMakeLists.txt @@ -93,16 +93,14 @@ function(ov_add_onednn) set(DNNL_TARGET_ARCH "X86" CACHE STRING "" FORCE) elseif(RISCV64) set(DNNL_TARGET_ARCH "RV64" CACHE STRING "" FORCE) - elseif(ARM) - set(DNNL_TARGET_ARCH "ARM" CACHE STRING "" FORCE) elseif(AARCH64) set(DNNL_TARGET_ARCH "AARCH64" CACHE STRING "" FORCE) else() message(FATAL_ERROR "Unsupported system processor ${CMAKE_SYSTEM_PROCESSOR}") endif() - if(AARCH64 OR ARM) - set(DNNL_USE_ACL ON CACHE BOOL "Use ARM Compute Library kernels in oneDNN" FORCE) + if(AARCH64) + set(DNNL_AARCH64_USE_ACL ON CACHE BOOL "Use AArch64 Compute Library kernels in oneDNN" FORCE) endif() set(SDL_cmake_included ON) ## to skip internal SDL flags. SDL flags are already set on OV level @@ -125,7 +123,7 @@ function(ov_add_onednn) ov_add_compiler_flags(-Wno-error) ov_add_compiler_flags(-Wno-undef) ov_add_compiler_flags(-Wno-missing-declarations) - if(NOT CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64)) + if(NOT CMAKE_COMPILER_IS_GNUCXX AND AARCH64) ov_add_compiler_flags(-Wno-macro-redefined) endif() if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 11 AND CMAKE_COMPILER_IS_GNUCXX) @@ -148,7 +146,7 @@ function(ov_add_onednn) endif() # to find our FindACL.cmake - if(DNNL_USE_ACL) + if(DNNL_AARCH64_USE_ACL) list(APPEND CMAKE_MODULE_PATH "${intel_cpu_thirdparty_SOURCE_DIR}") # oneDNN needs arm_compute_version.embed file to detect ACL version # since the file has not been generated yet, it is created manually @@ -158,7 +156,7 @@ function(ov_add_onednn) # Workaround for ARM compiler flag conflicts: oneDNN sets -mcpu=generic which conflicts # with OpenVINO's -march=armv8.2-a+fp16 flags. Override oneDNN's arch optimization flags # to prevent the conflicting -mcpu=generic flag on ARM architectures. - if(AARCH64 OR ARM) + if(AARCH64) set(DNNL_ARCH_OPT_FLAGS "" CACHE STRING "Disable oneDNN's automatic -mcpu=generic to avoid conflicts with OpenVINO ARM flags" FORCE) endif() @@ -167,7 +165,7 @@ function(ov_add_onednn) # install static libraries ov_install_static_lib(dnnl ${OV_CPACK_COMP_CORE}) - if(DNNL_USE_ACL AND NOT BUILD_SHARED_LIBS) + if(DNNL_AARCH64_USE_ACL AND NOT BUILD_SHARED_LIBS) # use ACLConfig.cmake in OpenVINOConfig.cmake in case of static build # we cannot use 'ov_install_static_lib' for imported targets, # but for this we need to install library files diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index f82d833de6f13f..5f7996845f8b7b 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit f82d833de6f13fac4bb1926d521ca8fec4f4ae01 +Subproject commit 5f7996845f8b7b3c230eee7ba7569b9b6158a2d7