From 5f7996845f8b7b3c230eee7ba7569b9b6158a2d7 Mon Sep 17 00:00:00 2001 From: Alexander Nesterov Date: Mon, 1 Jun 2026 02:33:08 +0200 Subject: [PATCH] cpu: aarch64: align ACL integration with oneDNN v3.10.x --- .github/automation/aarch64/build.sh | 4 +- .github/workflows/aarch64-acl.yml | 129 +++++++++ .github/workflows/ci-aarch64.yml | 240 ++++++++++++++++ .github/workflows/nightly-aarch64.yml | 199 ++++++++++++++ .github/workflows/performance-aarch64.yml | 258 ++++++++++++++++++ CMakeLists.txt | 2 - cmake/ACL.cmake | 4 +- cmake/options.cmake | 2 +- cmake/platform.cmake | 4 +- src/common/utils.cpp | 2 +- src/cpu/CMakeLists.txt | 3 - src/cpu/README.md | 1 - src/cpu/aarch64/CMakeLists.txt | 9 +- .../acl_batch_normalization.cpp | 6 +- .../acl_batch_normalization.hpp | 12 +- .../acl_benchmark_scheduler.cpp | 6 +- .../acl_benchmark_scheduler.hpp | 10 +- src/cpu/{acl => aarch64}/acl_binary.cpp | 4 +- src/cpu/{acl => aarch64}/acl_binary.hpp | 8 +- .../acl_convolution_utils.cpp | 24 +- .../acl_convolution_utils.hpp | 10 +- .../{acl => aarch64}/acl_deconvolution.cpp | 6 +- .../{acl => aarch64}/acl_deconvolution.hpp | 10 +- .../acl_depthwise_convolution.cpp | 6 +- .../acl_depthwise_convolution.hpp | 10 +- src/cpu/{acl => aarch64}/acl_eltwise.cpp | 4 +- src/cpu/{acl => aarch64}/acl_eltwise.hpp | 10 +- .../{acl => aarch64}/acl_gemm_convolution.cpp | 4 +- .../{acl => aarch64}/acl_gemm_convolution.hpp | 8 +- .../acl_indirect_gemm_convolution.cpp | 4 +- .../acl_indirect_gemm_convolution.hpp | 10 +- .../{acl => aarch64}/acl_inner_product.cpp | 6 +- .../{acl => aarch64}/acl_inner_product.hpp | 14 +- .../acl_layer_normalization.cpp | 6 +- .../acl_layer_normalization.hpp | 12 +- src/cpu/{acl => aarch64}/acl_pooling.cpp | 6 +- src/cpu/{acl => aarch64}/acl_pooling.hpp | 12 +- src/cpu/{acl => aarch64}/acl_post_ops.cpp | 6 +- src/cpu/{acl => aarch64}/acl_post_ops.hpp | 14 +- src/cpu/{acl => aarch64}/acl_prelu.cpp | 6 +- src/cpu/{acl => aarch64}/acl_prelu.hpp | 12 +- src/cpu/aarch64/acl_reorder.cpp | 4 +- src/cpu/aarch64/acl_reorder.hpp | 12 +- src/cpu/{acl => aarch64}/acl_softmax.cpp | 6 +- src/cpu/{acl => aarch64}/acl_softmax.hpp | 10 +- src/cpu/{acl => aarch64}/acl_thread.cpp | 10 +- src/cpu/{acl => aarch64}/acl_thread.hpp | 10 +- .../acl_threadpool_scheduler.cpp | 8 +- .../acl_threadpool_scheduler.hpp | 10 +- src/cpu/{acl => aarch64}/acl_utils.cpp | 6 +- src/cpu/{acl => aarch64}/acl_utils.hpp | 10 +- .../acl_winograd_convolution.cpp | 6 +- .../acl_winograd_convolution.hpp | 14 +- .../matmul/acl_lowp_matmul.cpp | 26 +- .../matmul/acl_lowp_matmul.hpp | 14 +- .../matmul/acl_lowp_matmul_sq.cpp | 13 +- .../matmul/acl_lowp_matmul_sq.hpp | 12 +- .../{acl => aarch64}/matmul/acl_matmul.cpp | 13 +- .../{acl => aarch64}/matmul/acl_matmul.hpp | 8 +- .../matmul/acl_matmul_utils.cpp | 8 +- .../matmul/acl_matmul_utils.hpp | 10 +- src/cpu/acl/CMakeLists.txt | 33 --- src/cpu/cpu_batch_normalization_list.cpp | 6 +- src/cpu/cpu_binary_list.cpp | 6 +- src/cpu/cpu_convolution_list.cpp | 30 +- src/cpu/cpu_deconvolution_list.cpp | 6 +- src/cpu/cpu_eltwise_list.cpp | 6 +- src/cpu/cpu_engine.hpp | 8 +- src/cpu/cpu_inner_product_list.cpp | 12 +- src/cpu/cpu_layer_normalization_list.cpp | 6 +- src/cpu/cpu_pooling_list.cpp | 6 +- src/cpu/cpu_prelu_list.cpp | 6 +- src/cpu/cpu_softmax_list.cpp | 6 +- src/cpu/matmul/cpu_matmul_list.cpp | 16 +- src/cpu/platform.hpp | 21 +- .../reorder/cpu_reorder_regular_f32_bf16.cpp | 2 +- .../reorder/cpu_reorder_regular_f32_f32.cpp | 4 +- tests/benchdnn/rnn/rnn.cpp | 2 +- tests/benchdnn/softmax/softmax.cpp | 4 +- 79 files changed, 1139 insertions(+), 354 deletions(-) create mode 100644 .github/workflows/aarch64-acl.yml create mode 100644 .github/workflows/ci-aarch64.yml create mode 100644 .github/workflows/nightly-aarch64.yml create mode 100644 .github/workflows/performance-aarch64.yml rename src/cpu/{acl => aarch64}/acl_batch_normalization.cpp (96%) rename src/cpu/{acl => aarch64}/acl_batch_normalization.hpp (98%) rename src/cpu/{acl => aarch64}/acl_benchmark_scheduler.cpp (96%) rename src/cpu/{acl => aarch64}/acl_benchmark_scheduler.hpp (92%) rename src/cpu/{acl => aarch64}/acl_binary.cpp (99%) rename src/cpu/{acl => aarch64}/acl_binary.hpp (95%) rename src/cpu/{acl => aarch64}/acl_convolution_utils.cpp (96%) rename src/cpu/{acl => aarch64}/acl_convolution_utils.hpp (98%) rename src/cpu/{acl => aarch64}/acl_deconvolution.cpp (96%) rename src/cpu/{acl => aarch64}/acl_deconvolution.hpp (98%) rename src/cpu/{acl => aarch64}/acl_depthwise_convolution.cpp (97%) rename src/cpu/{acl => aarch64}/acl_depthwise_convolution.hpp (91%) rename src/cpu/{acl => aarch64}/acl_eltwise.cpp (98%) rename src/cpu/{acl => aarch64}/acl_eltwise.hpp (93%) rename src/cpu/{acl => aarch64}/acl_gemm_convolution.cpp (99%) rename src/cpu/{acl => aarch64}/acl_gemm_convolution.hpp (94%) rename src/cpu/{acl => aarch64}/acl_indirect_gemm_convolution.cpp (99%) rename src/cpu/{acl => aarch64}/acl_indirect_gemm_convolution.hpp (90%) rename src/cpu/{acl => aarch64}/acl_inner_product.cpp (99%) rename src/cpu/{acl => aarch64}/acl_inner_product.hpp (91%) rename src/cpu/{acl => aarch64}/acl_layer_normalization.cpp (98%) rename src/cpu/{acl => aarch64}/acl_layer_normalization.hpp (89%) rename src/cpu/{acl => aarch64}/acl_pooling.cpp (99%) rename src/cpu/{acl => aarch64}/acl_pooling.hpp (93%) rename src/cpu/{acl => aarch64}/acl_post_ops.cpp (97%) rename src/cpu/{acl => aarch64}/acl_post_ops.hpp (96%) rename src/cpu/{acl => aarch64}/acl_prelu.cpp (96%) rename src/cpu/{acl => aarch64}/acl_prelu.hpp (96%) rename src/cpu/{acl => aarch64}/acl_softmax.cpp (98%) rename src/cpu/{acl => aarch64}/acl_softmax.hpp (92%) rename src/cpu/{acl => aarch64}/acl_thread.cpp (95%) rename src/cpu/{acl => aarch64}/acl_thread.hpp (92%) rename src/cpu/{acl => aarch64}/acl_threadpool_scheduler.cpp (97%) rename src/cpu/{acl => aarch64}/acl_threadpool_scheduler.hpp (91%) rename src/cpu/{acl => aarch64}/acl_utils.cpp (99%) rename src/cpu/{acl => aarch64}/acl_utils.hpp (97%) rename src/cpu/{acl => aarch64}/acl_winograd_convolution.cpp (94%) rename src/cpu/{acl => aarch64}/acl_winograd_convolution.hpp (94%) rename src/cpu/{acl => aarch64}/matmul/acl_lowp_matmul.cpp (96%) rename src/cpu/{acl => aarch64}/matmul/acl_lowp_matmul.hpp (91%) rename src/cpu/{acl => aarch64}/matmul/acl_lowp_matmul_sq.cpp (97%) rename src/cpu/{acl => aarch64}/matmul/acl_lowp_matmul_sq.hpp (92%) rename src/cpu/{acl => aarch64}/matmul/acl_matmul.cpp (98%) rename src/cpu/{acl => aarch64}/matmul/acl_matmul.hpp (94%) rename src/cpu/{acl => aarch64}/matmul/acl_matmul_utils.cpp (98%) rename src/cpu/{acl => aarch64}/matmul/acl_matmul_utils.hpp (94%) delete mode 100644 src/cpu/acl/CMakeLists.txt diff --git a/.github/automation/aarch64/build.sh b/.github/automation/aarch64/build.sh index 2a244a41090..1be9c55be03 100755 --- a/.github/automation/aarch64/build.sh +++ b/.github/automation/aarch64/build.sh @@ -45,7 +45,7 @@ if [[ "$ONEDNN_ACTION" == "configure" ]]; then cmake \ "${GENERATOR_ARGS[@]}" \ -Bbuild -S. \ - -DDNNL_USE_ACL=ON \ + -DDNNL_AARCH64_USE_ACL=ON \ -DONEDNN_BUILD_GRAPH=OFF \ -DDNNL_CPU_RUNTIME=OMP \ -DDNNL_WERROR=ON \ @@ -61,7 +61,7 @@ if [[ "$ONEDNN_ACTION" == "configure" ]]; then cmake \ "${GENERATOR_ARGS[@]}" \ -Bbuild -S. \ - -DDNNL_USE_ACL=ON \ + -DDNNL_AARCH64_USE_ACL=ON \ -DONEDNN_BUILD_GRAPH=$ONEDNN_BUILD_GRAPH \ -DDNNL_CPU_RUNTIME=$ONEDNN_THREADING \ -DDNNL_WERROR=ON \ diff --git a/.github/workflows/aarch64-acl.yml b/.github/workflows/aarch64-acl.yml new file mode 100644 index 00000000000..3e9bb6c90d8 --- /dev/null +++ b/.github/workflows/aarch64-acl.yml @@ -0,0 +1,129 @@ +# ******************************************************************************* +# Copyright 2025 Arm Limited and affiliates. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ******************************************************************************* + +name: "Build ACL cache" + +#* To avoid duplicate jobs running when both push and PR is satisfied, we use this: +#* https://github.com/orgs/community/discussions/26940#discussioncomment-5686753 +on: + workflow_call: + inputs: + acl_hash: + required: false + type: string + + workflow_dispatch: + +# Declare default permissions as read only. +permissions: read-all + +jobs: + # Cache is built sequentially to avoid cache-hit race conditions + build-cache: + strategy: + max-parallel: 1 + matrix: + config: [ + { name: MacOS, label: macos-14, threading: SEQ, toolset: clang, build: RelWithAssert }, + { name: cb100, label: ubuntu-24.04-arm, threading: OMP, toolset: gcc, build: RelWithAssert }, + { name: cb100, label: ubuntu-24.04-arm, threading: OMP, toolset: gcc, build: Release } + ] + + name: ${{ matrix.config.name }}, ${{ matrix.config.toolset }}, ${{ matrix.config.threading }}, ${{ matrix.config.build }} + runs-on: ${{ matrix.config.label }} + steps: + - name: Checkout oneDNN + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + path: oneDNN + + - name: Read version file + id: get-versions + run: | + content=$(cat "${{ github.workspace }}/oneDNN/.github/automation/aarch64/ci.json") + content="${content//[$'\t\r\n$ ']}" + echo "output=$content" >> "$GITHUB_OUTPUT" + + - name: Clone ACL + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build_acl.sh + env: + ACL_ACTION: clone + ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary + ACL_VERSION: ${{ inputs.acl_hash || fromJson(steps.get-versions.outputs.output).dependencies.acl }} + + - name: Get ACL commit hash for cache key + id: get_acl_commit_hash + run: (cd "${{ github.workspace }}/ComputeLibrary" && echo "ACLCommitHash=$(git rev-parse --short HEAD)") >> "$GITHUB_OUTPUT" + + - name: Get system name + id: get_system_name + run: (echo "SystemName=$(uname)") >> "$GITHUB_OUTPUT" + + - name: Restore cached ACL + id: cache-acl-restore + uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ steps.get_system_name.outputs.SystemName }}-acl-${{ matrix.config.toolset }}-${{ matrix.config.build }}-${{ steps.get_acl_commit_hash.outputs.ACLCommitHash }} + path: ${{ github.workspace }}/ComputeLibrary/build + lookup-only: true + + - name: Install Scons (MacOS) + if: ${{ matrix.config.name == 'MacOS' && (steps.cache-acl-restore.outputs.cache-hit != 'true') }} + run: brew install scons + + - name: Install scons (Linux) + if: ${{ matrix.config.name != 'MacOS' && (steps.cache-acl-restore.outputs.cache-hit != 'true') }} + run: | + sudo apt update -y + sudo apt install -y scons + + - if: ${{ contains(matrix.config.label,'ubuntu') && (matrix.config.threading == 'OMP') && (steps.cache-acl-restore.outputs.cache-hit != 'true') }} + name: Install openmp + run: | + sudo apt install -y libomp-dev + + - if: ${{ contains(matrix.config.label,'ubuntu') && (matrix.config.toolset == 'gcc') && (steps.cache-acl-restore.outputs.cache-hit != 'true') }} + name: Install gcc + run: | + sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y + sudo apt update -y + sudo apt install -y g++-${{ fromJson(steps.get-versions.outputs.output).dependencies.gcc }} + + - if: ${{ contains(matrix.config.label,'ubuntu') && (matrix.config.toolset == 'clang') && (steps.cache-acl-restore.outputs.cache-hit != 'true') }} + name: Install clang + uses: KyleMayes/install-llvm-action@a7a1a882e2d06ebe05d5bb97c3e1f8c984ae96fc + with: + version: ${{ fromJson(steps.get-versions.outputs.output).dependencies.clang }} + + - name: Build ACL + if: ${{ steps.cache-acl-restore.outputs.cache-hit != 'true' }} + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build_acl.sh + env: + ACL_ACTION: build + ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary + ACL_THREADING: ${{ matrix.config.threading }} + BUILD_TOOLSET: ${{ matrix.config.toolset }} + ACL_BUILD_TYPE: ${{ matrix.config.build }} + GCC_VERSION: ${{ fromJson(steps.get-versions.outputs.output).dependencies.gcc }} + + - name: Save ACL in cache + id: cache-acl_build-save + if: ${{ steps.cache-acl-restore.outputs.cache-hit != 'true' }} + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ steps.get_system_name.outputs.SystemName }}-acl-${{ matrix.config.toolset }}-${{ matrix.config.build }}-${{ steps.get_acl_commit_hash.outputs.ACLCommitHash }} + path: ${{ github.workspace }}/ComputeLibrary/build diff --git a/.github/workflows/ci-aarch64.yml b/.github/workflows/ci-aarch64.yml new file mode 100644 index 00000000000..47cb807037a --- /dev/null +++ b/.github/workflows/ci-aarch64.yml @@ -0,0 +1,240 @@ +# ******************************************************************************* +# Copyright 2024-2025 Arm Limited and affiliates. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ******************************************************************************* + +name: "CI AArch64" + +#* To avoid duplicate jobs running when both push and PR is satisfied, we use this: +#* https://github.com/orgs/community/discussions/26940#discussioncomment-5686753 +on: + push: + branches: [main, "rls-*"] + paths: + - ".github/automation/performance/**" + - ".github/automation/*" + - ".github/automation/aarch64/**" + - ".github/workflows/aarch64-acl.yml" + - ".github/workflows/ci-aarch64.yml" + - "cmake/**" + - "examples/**" + - "include/**" + - "src/common/**" + - "src/cpu/*" + - "src/cpu/aarch64/**" + - "tests/**" + - "CMakeLists.txt" + pull_request: + types: [opened, synchronize, reopened] + paths: + - ".github/automation/performance/**" + - ".github/automation/*" + - ".github/automation/aarch64/**" + - ".github/workflows/aarch64-acl.yml" + - ".github/workflows/ci-aarch64.yml" + - "cmake/**" + - "examples/**" + - "include/**" + - "src/common/**" + - "src/cpu/*" + - "src/cpu/aarch64/**" + - "tests/**" + - "CMakeLists.txt" + #* allow manual trigger of workflow when needed. + workflow_dispatch: + +#* Stop stale workflows when pull requests are updated: https://stackoverflow.com/a/70972844 +#* Does not apply to the main branch. +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + +# Declare default permissions as read only. +permissions: read-all + +jobs: + build-acl-cache: + uses: ./.github/workflows/aarch64-acl.yml + + build-and-test: + needs: build-acl-cache + strategy: + matrix: + config: [ + { name: MacOS, label: macos-14, threading: SEQ, toolset: clang, build: RelWithAssert, testset: SMOKE }, + { name: cb100, label: ubuntu-24.04-arm, threading: OMP, toolset: gcc, build: RelWithAssert, testset: SMOKE }, + { name: cb100, label: ubuntu-24.04-arm, threading: OMP, toolset: gcc, build: Release, testset: CI } + ] + + name: ${{ matrix.config.name }}, ${{ matrix.config.toolset }}, ${{ matrix.config.threading }}, ${{ matrix.config.build }} + runs-on: ${{ matrix.config.label }} + steps: + - name: Checkout oneDNN + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + path: oneDNN + + - name: Read version file + id: get-versions + run: | + content=$(cat "${{ github.workspace }}/oneDNN/.github/automation/aarch64/ci.json") + content="${content//[$'\t\r\n$ ']}" + echo "output=$content" >> "$GITHUB_OUTPUT" + + # Note: This will create a github actions cache + - name: Get latest CMake and Ninja + uses: lukka/get-cmake@56d043d188c3612951d8755da8f4b709ec951ad6 # v3.31.6 + with: + cmakeVersion: 3.31.0 + ninjaVersion: 1.12.0 + + - if: ${{ (contains(matrix.config.label,'ubuntu') && (matrix.config.threading == 'OMP')) }} + name: Install openmp + run: | + sudo apt install -y libomp-dev + + - if: ${{ (contains(matrix.config.label,'ubuntu') && (matrix.config.toolset == 'gcc')) }} + name: Install gcc + run: | + sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y + sudo apt update -y + sudo apt install -y g++-${{ fromJson(steps.get-versions.outputs.output).dependencies.gcc }} + + - if: ${{ (contains(matrix.config.label,'ubuntu') && (matrix.config.toolset == 'clang')) }} + name: Install clang + uses: KyleMayes/install-llvm-action@a7a1a882e2d06ebe05d5bb97c3e1f8c984ae96fc + with: + version: ${{ fromJson(steps.get-versions.outputs.output).dependencies.clang }} + + - name: setup python + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + with: + python-version: '3.10' + + - name: Install dependencies + if: ${{ matrix.config.build == 'Release' }} + run: pip install scipy statistics gitpython + + - name: Clone ACL + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build_acl.sh + env: + ACL_ACTION: clone + ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary + ACL_VERSION: ${{ fromJson(steps.get-versions.outputs.output).dependencies.acl }} + + - name: Get ACL commit hash for cache key + id: get_acl_commit_hash + run: (cd "${{ github.workspace }}/ComputeLibrary" && echo "ACLCommitHash=$(git rev-parse --short HEAD)") >> "$GITHUB_OUTPUT" + + - name: Get system name + id: get_system_name + run: (echo "SystemName=$(uname)") >> "$GITHUB_OUTPUT" + + - name: Restore cached ACL + id: cache-acl-restore + uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ steps.get_system_name.outputs.SystemName }}-acl-${{ matrix.config.toolset }}-${{ matrix.config.build }}-${{ steps.get_acl_commit_hash.outputs.ACLCommitHash }} + path: ${{ github.workspace }}/ComputeLibrary/build + fail-on-cache-miss: true + + - name: Configure oneDNN + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build.sh + working-directory: ${{ github.workspace }}/oneDNN + env: + ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary + BUILD_TOOLSET: ${{ matrix.config.toolset }} + CMAKE_BUILD_TYPE: ${{ matrix.config.build }} + CMAKE_GENERATOR: Ninja + GCC_VERSION: ${{ fromJson(steps.get-versions.outputs.output).dependencies.gcc }} + ONEDNN_ACTION: configure + ONEDNN_TEST_SET: ${{ matrix.config.testset }} + ONEDNN_THREADING: ${{ matrix.config.threading }} + + - name: Build oneDNN + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build.sh + working-directory: ${{ github.workspace }}/oneDNN + env: + ONEDNN_ACTION: build + + - name: Run oneDNN tests + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/test.sh ${{ github.workspace }}/test_results.xml + working-directory: ${{ github.workspace }}/oneDNN/build + env: + CTEST_PARALLEL_LEVEL: 6 + DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build + ONEDNN_THREADING: ${{ matrix.config.threading }} + + ## Performance test steps ## + - name: Checkout oneDNN base + if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' && matrix.config.name != 'cb100' && matrix.config.name != 'MacOS' }} + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + ref: ${{ github.base_ref }} + path: oneDNN_base + + - name: Configure oneDNN base + if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' && matrix.config.name != 'cb100' && matrix.config.name != 'MacOS' }} + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build.sh + working-directory: ${{ github.workspace }}/oneDNN_base + env: + ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary + BUILD_TOOLSET: ${{ matrix.config.toolset }} + CMAKE_BUILD_TYPE: ${{ matrix.config.build }} + CMAKE_GENERATOR: Ninja + GCC_VERSION: ${{ fromJson(steps.get-versions.outputs.output).dependencies.gcc }} + ONEDNN_ACTION: configure + ONEDNN_TEST_SET: ${{ matrix.config.testset }} + ONEDNN_THREADING: ${{ matrix.config.threading }} + + - name: Build oneDNN base + if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' && matrix.config.name != 'cb100' && matrix.config.name != 'MacOS' }} + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build.sh + working-directory: ${{ github.workspace }}/oneDNN_base + env: + ONEDNN_ACTION: build + + - name: Run performance tests + shell: bash + if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' && matrix.config.name != 'cb100' && matrix.config.name != 'MacOS' }} + run: | + OMP_NUM_THREADS=4 bash ${{ github.workspace }}/oneDNN/.github/automation/performance/bench_pr_performance.sh ${{ github.workspace }}/oneDNN_base/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn base_4.txt new_4.txt + OMP_NUM_THREADS=16 bash ${{ github.workspace }}/oneDNN/.github/automation/performance/bench_pr_performance.sh ${{ github.workspace }}/oneDNN_base/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN/build/tests/benchdnn/benchdnn base_16.txt new_16.txt + env: + DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build + + - name: Compare performance test results + if: ${{ github.event_name == 'pull_request' && matrix.config.build == 'Release' && matrix.config.name != 'cb100' && matrix.config.name != 'MacOS' }} + id: performance-test + continue-on-error: true + run: | + echo "4 threads:" + python ${{ github.workspace }}/oneDNN/.github/automation/performance/benchdnn_comparison.py base_4.txt new_4.txt + echo "16 threads:" + python ${{ github.workspace }}/oneDNN/.github/automation/performance/benchdnn_comparison.py base_16.txt new_16.txt + + - name: Check performance test failure + if: ${{ steps.performance-test.outputs.pass != 'True' && github.event_name == 'pull_request' && matrix.config.build == 'Release' && matrix.config.name != 'cb100' && matrix.config.name != 'MacOS' }} + run: echo "::warning file=.github/workflows/ci-aarch64.yml,line=1,col=1::${{ steps.performance-test.outputs.message }}" + + # This job adds a check named "CI AArch64" that represents overall + # workflow status and can be used in branch rulesets + status: + needs: build-and-test + runs-on: ubuntu-latest + name: "CI AArch64" + steps: + - name: Print success + run: echo Success diff --git a/.github/workflows/nightly-aarch64.yml b/.github/workflows/nightly-aarch64.yml new file mode 100644 index 00000000000..ab667214864 --- /dev/null +++ b/.github/workflows/nightly-aarch64.yml @@ -0,0 +1,199 @@ +# ******************************************************************************* +# Copyright 2024-2025 Arm Limited and affiliates. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ******************************************************************************* + +name: "Nightly AArch64" + +on: + #* allow manual trigger of workflow when needed. Useful for a nightly. + workflow_dispatch: + schedule: + #* minute (0-59) hour (0-23) day (1-31) month (1-12) day of the week (0 - 6) + #* cron jobs run on the default (main) branch. + #* set to run at 5am UCT + - cron: "0 5 * * *" + +#* Stop stale workflows, though we should never hit this unless it hangs for a whole day. +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +# Declare default permissions as read only. +permissions: write-all + +jobs: + build-acl-cache: + uses: ./.github/workflows/aarch64-acl.yml + + test-performance: + uses: ./.github/workflows/performance-aarch64.yml + + build-and-test: + needs: build-acl-cache + strategy: + matrix: + config: [ + { name: cb100, label: ubuntu-24.04-arm, threading: OMP, toolset: gcc, build: RelWithAssert, testset: NIGHTLY } + ] + + name: ${{ matrix.config.name }}, ${{ matrix.config.toolset }}, ${{ matrix.config.threading }}, ${{ matrix.config.build }} + runs-on: ${{ matrix.config.label }} + steps: + + - name: Checkout oneDNN + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + path: oneDNN + fetch-tags: true + fetch-depth: 0 + + # Note: This will create a github actions cache + - name: Get latest CMake and Ninja + uses: lukka/get-cmake@56d043d188c3612951d8755da8f4b709ec951ad6 # v3.31.6 + with: + cmakeVersion: 3.31.0 + ninjaVersion: 1.12.0 + + - if: ${{ matrix.config.threading == 'OMP' }} + name: Install openmp + run: | + sudo apt install -y libomp-dev + + - name: Read version file + id: get-versions + run: | + content=$(cat "${{ github.workspace }}/oneDNN/.github/automation/aarch64/ci.json") + content="${content//[$'\t\r\n$ ']}" + echo "output=$content" >> "$GITHUB_OUTPUT" + + - name: Install gcc + run: | + sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y + sudo apt update -y + sudo apt install -y g++-${{ fromJson(steps.get-versions.outputs.output).dependencies.gcc }} + + - name: setup python + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + with: + python-version: '3.10' + + - name: Clone ACL + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build_acl.sh + env: + ACL_ACTION: clone + ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary + ACL_VERSION: ${{ fromJson(steps.get-versions.outputs.output).dependencies.acl }} + + - name: Get ACL commit hash for cache key + id: get_acl_commit_hash + run: (cd "${{ github.workspace }}/ComputeLibrary" && echo "ACLCommitHash=$(git rev-parse --short HEAD)") >> "$GITHUB_OUTPUT" + + - name: Get system name + id: get_system_name + run: (echo "SystemName=$(uname)") >> "$GITHUB_OUTPUT" + + - name: Restore cached ACL + id: cache-acl-restore + uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ steps.get_system_name.outputs.SystemName }}-acl-${{ matrix.config.toolset }}-${{ matrix.config.build }}-${{ steps.get_acl_commit_hash.outputs.ACLCommitHash }} + path: ${{ github.workspace }}/ComputeLibrary/build + + - name: Configure oneDNN + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build.sh + working-directory: ${{ github.workspace }}/oneDNN + env: + ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary + BUILD_TOOLSET: ${{ matrix.config.toolset }} + CMAKE_BUILD_TYPE: ${{ matrix.config.build }} + CMAKE_GENERATOR: Ninja + GCC_VERSION: ${{ fromJson(steps.get-versions.outputs.output).dependencies.gcc }} + ONEDNN_ACTION: configure + ONEDNN_TEST_SET: ${{ matrix.config.testset }} + ONEDNN_THREADING: ${{ matrix.config.threading }} + + - name: Build oneDNN + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build.sh + working-directory: ${{ github.workspace }}/oneDNN + env: + ONEDNN_ACTION: build + + - name: Run oneDNN tests + run: | + set -o pipefail + ${{ github.workspace }}/oneDNN/.github/automation/aarch64/test.sh ${{ github.workspace }}/test_results.xml + working-directory: ${{ github.workspace }}/oneDNN/build + env: + BUILD_TOOLSET: ${{ matrix.config.toolset }} + CMAKE_BUILD_TYPE: ${{ matrix.config.build }} + CTEST_PARALLEL_LEVEL: 8 + DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build + ONEDNN_THREADING: ${{ matrix.config.threading }} + + - name: Create hash file + working-directory: ${{ github.workspace }}/oneDNN + run: git rev-parse --short HEAD > .github/automation/aarch64/stable.sha + + - name: Save hash + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: latest-nightly-success-sha + path: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/stable.sha + + - name: Find last successful run + if: failure() + id: get-stable-cache + uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: latest-nightly-success-sha + path: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/stable.sha + + - name: Use backup stable hash + if: failure() && steps.get-stable-cache.outputs.cache-hit != 'true' + run: echo ${{ fromJson(steps.get-versions.outputs.output).dependencies.onednn-base }} > ${{ github.workspace }}/oneDNN/.github/automation/aarch64/stable.sha + + - name: Get stable hash + if: failure() + id: get-stable + run: | + stable_hash=$(cat "${{ github.workspace }}/oneDNN/.github/automation/aarch64/stable.sha") + echo "stable-hash=$stable_hash" >> "$GITHUB_OUTPUT" + + - name: Run git bisect + if: failure() + shell: bash + working-directory: ${{ github.workspace }}/oneDNN + run: python .github/automation/aarch64/bisect_ctest.py --unique ${{ steps.get-stable.outputs.stable-hash }} ${{ github.workspace }}/test_results.xml + env: + ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary + + - name: Update wiki + if: ${{ (success() || failure()) && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/ryo-not-rio/wiki') }} + uses: ./oneDNN/.github/actions/update-wiki + with: + command: add-unit + title: ${{ matrix.config.name }} + in-file: ${{ github.workspace }}/test_results.xml + + #* This job adds a check named "Nightly AArch64" that represents overall + #* workflow status and can be used in branch rulesets + status: + needs: build-and-test + runs-on: ubuntu-latest + name: "Nightly AArch64" + steps: + - name: Print success + run: echo Success diff --git a/.github/workflows/performance-aarch64.yml b/.github/workflows/performance-aarch64.yml new file mode 100644 index 00000000000..482ce4dd56b --- /dev/null +++ b/.github/workflows/performance-aarch64.yml @@ -0,0 +1,258 @@ +# ******************************************************************************* +# Copyright 2024-2025 Arm Limited and affiliates. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ******************************************************************************* + +name: "Performance AArch64" + +on: + workflow_dispatch: + inputs: + onednn_base_hash: + required: false + type: string + description: 'Baseline oneDNN commit' + onednn_new_hash: + required: false + type: string + description: 'New oneDNN commit' + acl_base_hash: + required: false + type: string + description: 'Baseline ACL commit' + acl_new_hash: + required: false + type: string + description: 'New ACL commit' + num_threads: + required: false + type: string + description: 'Number of threads to use' + benchdnn_command: + required: false + type: string + description: 'benchdnn command to run' + + workflow_call: + +#* Stop stale workflows +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-performance + cancel-in-progress: true + +# Declare default permissions as read only. +permissions: write-all + +jobs: + build-acl-base: + uses: ./.github/workflows/aarch64-acl.yml + with: + acl_hash: ${{ inputs.acl_base_hash }} + + build-acl-new: + uses: ./.github/workflows/aarch64-acl.yml + with: + acl_hash: ${{ inputs.acl_new_hash }} + + build-and-test-performance: + needs: [build-acl-base, build-acl-new] + strategy: + matrix: + config: [ + { name: cb100, label: ubuntu-24.04-arm, threading: OMP, toolset: gcc, build: Release, testset: NIGHTLY } + ] + + name: ${{ matrix.config.name }}, ${{ matrix.config.toolset }}, ${{ matrix.config.threading }}, ${{ matrix.config.build }} + runs-on: ${{ matrix.config.label }} + steps: + + - name: Checkout oneDNN + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + path: oneDNN + + # Note: This will create a github actions cache + - name: Get latest CMake and Ninja + uses: lukka/get-cmake@56d043d188c3612951d8755da8f4b709ec951ad6 # v3.31.6 + with: + cmakeVersion: 3.31.0 + ninjaVersion: 1.12.0 + + - if: ${{ matrix.config.threading == 'OMP' }} + name: Install openmp + run: | + sudo apt install -y libomp-dev + + - name: Read version file + id: get-versions + run: | + content=$(cat "${{ github.workspace }}/oneDNN/.github/automation/aarch64/ci.json") + content="${content//[$'\t\r\n$ ']}" + echo "output=$content" >> "$GITHUB_OUTPUT" + + - name: Install gcc + run: | + sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y + sudo apt update -y + sudo apt install -y g++-${{ fromJson(steps.get-versions.outputs.output).dependencies.gcc }} + + - name: setup python + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + with: + python-version: '3.10' + + - name: Install scipy + if: ${{ matrix.config.build == 'Release' }} + run: pip install scipy statistics GitPython + + - name: Clone base ACL + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build_acl.sh + env: + ACL_ACTION: clone + ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary + ACL_VERSION: ${{ inputs.acl_base_hash || fromJson(steps.get-versions.outputs.output).dependencies.acl }} + + - name: Get system name + id: get_system_name + run: (echo "SystemName=$(uname)") >> "$GITHUB_OUTPUT" + + - name: Get ACL commit hash for cache key + id: get_acl_commit_hash_base + run: | + cd "${{ github.workspace }}/ComputeLibrary" && echo "ACLCommitHash=$(git rev-parse --short HEAD)" >> "$GITHUB_OUTPUT" + + - name: Restore base cached ACL + id: cache-acl-restore-base + uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ steps.get_system_name.outputs.SystemName }}-acl-${{ matrix.config.toolset }}-${{ matrix.config.build }}-${{ steps.get_acl_commit_hash_base.outputs.ACLCommitHash }} + path: ${{ github.workspace }}/ComputeLibrary/build + fail-on-cache-miss: true + + - name: Rename to ComputeLibrary_base + run: mv ${{ github.workspace }}/ComputeLibrary ${{ github.workspace }}/ComputeLibrary_base + + - name: Clone new ACL + run: ${{ github.workspace }}/oneDNN/.github/automation/aarch64/build_acl.sh + env: + ACL_ACTION: clone + ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary + ACL_VERSION: ${{ inputs.acl_new_hash || fromJson(steps.get-versions.outputs.output).dependencies.acl }} + + - name: Get ACL commit hash for cache key + id: get_acl_commit_hash_new + run: | + cd "${{ github.workspace }}/ComputeLibrary" && echo "ACLCommitHash=$(git rev-parse --short HEAD)" >> "$GITHUB_OUTPUT" + + - name: Restore new cached ACL + id: cache-acl-restore-new + uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ steps.get_system_name.outputs.SystemName }}-acl-${{ matrix.config.toolset }}-${{ matrix.config.build }}-${{ steps.get_acl_commit_hash_new.outputs.ACLCommitHash }} + path: ${{ github.workspace }}/ComputeLibrary/build + fail-on-cache-miss: true + + - name: Move to ComputeLibrary_new + run: mv ${{ github.workspace }}/ComputeLibrary ${{ github.workspace }}/ComputeLibrary_new + + - name: Checkout oneDNN base + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + ref: ${{inputs.onednn_base_hash || fromJson(steps.get-versions.outputs.output).dependencies.onednn-base }} + path: oneDNN_base + + - name: Checkout oneDNN new + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + # when the input is non empty use it otherwise falls back to github.sha + ref: ${{ inputs.onednn_new_hash != '' && inputs.onednn_new_hash || github.sha }} + path: oneDNN_new + + - name: Configure oneDNN base + run: .github/automation/aarch64/build.sh + working-directory: ${{ github.workspace }}/oneDNN_base + env: + ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary_base + BUILD_TOOLSET: ${{ matrix.config.toolset }} + CMAKE_BUILD_TYPE: ${{ matrix.config.build }} + CMAKE_GENERATOR: Ninja + GCC_VERSION: ${{ fromJson(steps.get-versions.outputs.output).dependencies.gcc }} + ONEDNN_ACTION: configure + ONEDNN_TEST_SET: ${{ matrix.config.testset }} + ONEDNN_THREADING: ${{ matrix.config.threading }} + + - name: Build oneDNN base + run: .github/automation/aarch64/build.sh + working-directory: ${{ github.workspace }}/oneDNN_base + env: + ONEDNN_ACTION: build + + - name: Configure oneDNN new + run: .github/automation/aarch64/build.sh + working-directory: ${{ github.workspace }}/oneDNN_new + env: + ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary_new + BUILD_TOOLSET: ${{ matrix.config.toolset }} + CMAKE_BUILD_TYPE: ${{ matrix.config.build }} + CMAKE_GENERATOR: Ninja + GCC_VERSION: ${{ fromJson(steps.get-versions.outputs.output).dependencies.gcc }} + ONEDNN_ACTION: configure + ONEDNN_TEST_SET: ${{ matrix.config.testset }} + ONEDNN_THREADING: ${{ matrix.config.threading }} + + - name: Build oneDNN new + run: .github/automation/aarch64/build.sh + working-directory: ${{ github.workspace }}/oneDNN_new + env: + ONEDNN_ACTION: build + + - name: Run nightly performance tests + if: ${{ inputs.benchdnn_command == '' }} + shell: bash + run: | + OMP_NUM_THREADS=${{ inputs.num_threads || 16 }} bash ${{ github.workspace }}/oneDNN/.github/automation/performance/bench_nightly_performance.sh ${{ github.workspace }}/oneDNN_base/build/tests/benchdnn/benchdnn ${{ github.workspace }}/oneDNN_new/build/tests/benchdnn/benchdnn base.txt new.txt + python ${{ github.workspace }}/oneDNN/.github/automation/performance/benchdnn_comparison.py base.txt new.txt --out-file perf_table.md + + - name: Update wiki + if: ${{ (success() || failure()) && inputs.benchdnn_command == '' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/ryo-not-rio/wiki') }} + uses: ./oneDNN/.github/actions/update-wiki + with: + command: add-perf + title: ${{ matrix.config.name }} + in-file: perf_table.md + + - name: Run custom performance tests + if: ${{ inputs.benchdnn_command != '' }} + shell: bash + run: | + OMP_NUM_THREADS=${{ inputs.num_threads || 16 }} \ + bash ${{ github.workspace }}/oneDNN/.github/automation/performance/run_benchdnn_compare.sh \ + "${{ github.workspace }}/oneDNN_base/build/tests/benchdnn/benchdnn" \ + "${{ github.workspace }}/oneDNN_new/build/tests/benchdnn/benchdnn" \ + base.txt new.txt ${{ inputs.benchdnn_command }} + + - name: Print speed comparisons + if: ${{ (success() || failure()) && inputs.benchdnn_command != '' }} + run: python3 "${{ github.workspace }}/oneDNN/.github/automation/performance/benchdnn_comparison.py" base.txt new.txt --out-file perf_results.md; cat perf_results.md >> "$GITHUB_STEP_SUMMARY" + + #* This job adds a check named "Nightly Performance AArch64" that represents overall + #* workflow status and can be used in branch rulesets + status: + needs: build-and-test-performance + runs-on: ubuntu-latest + name: "Nightly Performance AArch64" + steps: + - name: Print success + run: echo Success diff --git a/CMakeLists.txt b/CMakeLists.txt index 4af933ec000..9d745709639 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,8 +64,6 @@ endif() if(NOT DNNL_TARGET_ARCH) if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)") set(DNNL_TARGET_ARCH "AARCH64") - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)") - set(DNNL_TARGET_ARCH "ARM") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64.*|PPC64.*|powerpc64.*)") set(DNNL_TARGET_ARCH "PPC64") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x.*|S390X.*)") diff --git a/cmake/ACL.cmake b/cmake/ACL.cmake index 22b203ebabb..9622db5e65f 100644 --- a/cmake/ACL.cmake +++ b/cmake/ACL.cmake @@ -21,11 +21,11 @@ endif() set(acl_cmake_included true) include("cmake/options.cmake") -if(NOT DNNL_TARGET_ARCH MATCHES "^(AARCH64|ARM)$") +if(NOT DNNL_TARGET_ARCH STREQUAL "AARCH64") return() endif() -if(NOT DNNL_USE_ACL) +if(NOT DNNL_AARCH64_USE_ACL) return() endif() diff --git a/cmake/options.cmake b/cmake/options.cmake index 22441104296..720612c8066 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -416,7 +416,7 @@ set(DNNL_BLAS_VENDOR "NONE" CACHE STRING # AArch64 optimizations with Arm Compute Library # ============================================== -option(DNNL_USE_ACL "Enables use of ARM optimised functions +option(DNNL_AARCH64_USE_ACL "Enables use of AArch64 optimised functions from Arm Compute Library. This is only supported on AArch64 builds and assumes there is a functioning Compute Library build available at the location specified by the diff --git a/cmake/platform.cmake b/cmake/platform.cmake index 52944c18198..e6b69db8f9b 100644 --- a/cmake/platform.cmake +++ b/cmake/platform.cmake @@ -340,7 +340,7 @@ elseif(UNIX OR MINGW) platform_unix_and_mingw_noexcept_ccxx_flags(CMAKE_CMAKE_CCXX_NOEXCEPT_FLAGS) # compiler specific settings if(CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang") - if(DNNL_TARGET_ARCH MATCHES "^(AARCH64|ARM)$") + if(DNNL_TARGET_ARCH STREQUAL "AARCH64") if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug") set(DEF_ARCH_OPT_FLAGS "-O3") endif() @@ -449,7 +449,7 @@ elseif(UNIX OR MINGW) append(CMAKE_CCXX_FLAGS "-Wno-ignored-attributes") endif() - if(DNNL_TARGET_ARCH MATCHES "^(AARCH64|ARM)$") + if(DNNL_TARGET_ARCH STREQUAL "AARCH64") if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug") set(DEF_ARCH_OPT_FLAGS "-O3") endif() diff --git a/src/common/utils.cpp b/src/common/utils.cpp index 90a8bf9e904..c7219be59e3 100644 --- a/src/common/utils.cpp +++ b/src/common/utils.cpp @@ -230,7 +230,7 @@ bool get_jit_dump() { return jit_dump.get(); } -#if defined(DNNL_AARCH64) && (DNNL_AARCH64 == 1) || defined(DNNL_ARM) && (DNNL_ARM == 1) +#if defined(DNNL_AARCH64) && (DNNL_AARCH64 == 1) static setting_t jit_profiling_flags {DNNL_JIT_PROFILE_LINUX_PERFMAP}; #else static setting_t jit_profiling_flags {DNNL_JIT_PROFILE_VTUNE}; diff --git a/src/cpu/CMakeLists.txt b/src/cpu/CMakeLists.txt index ab791ee7b2c..49e73c7676a 100644 --- a/src/cpu/CMakeLists.txt +++ b/src/cpu/CMakeLists.txt @@ -139,9 +139,6 @@ endif() if (DNNL_TARGET_ARCH STREQUAL "AARCH64") add_subdirectory(aarch64) endif() -if (DNNL_USE_ACL) - add_subdirectory(acl) -endif() if (DNNL_TARGET_ARCH STREQUAL "PPC64") add_subdirectory(ppc64) endif() diff --git a/src/cpu/README.md b/src/cpu/README.md index 7641f9e825b..81718eae221 100644 --- a/src/cpu/README.md +++ b/src/cpu/README.md @@ -46,7 +46,6 @@ enable or disable parts of code. There the following macros defined: - `DNNL_X64` is 1 on x64 architecture; - `DNNL_X86` is 1 on x86 architecture; - `DNNL_AARCH64` is 1 on Arm AArch64 architecture; -- `DNNL_ARM` is 1 on Arm 32 architecture; - `DNNL_PPC64` is 1 on OpenPOWER / IBM Power architecture; - `DNNL_S390X` is 1 on IBMz / s390x architecture; - `DNNL_RV64` is 1 on RISC-V architecture; diff --git a/src/cpu/aarch64/CMakeLists.txt b/src/cpu/aarch64/CMakeLists.txt index 32eec64988c..75ba8e2705c 100644 --- a/src/cpu/aarch64/CMakeLists.txt +++ b/src/cpu/aarch64/CMakeLists.txt @@ -20,6 +20,14 @@ file(GLOB_RECURSE SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.[ch]pp ) +if(NOT DNNL_AARCH64_USE_ACL) + file(GLOB_RECURSE ACL_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/acl_*.[ch] + ${CMAKE_CURRENT_SOURCE_DIR}/acl_*.[ch]pp + ) + list(REMOVE_ITEM SOURCES ${ACL_FILES}) +endif() + # If the runtime is not THREADPOOL remove threadpool_scheduler sources. if(NOT DNNL_CPU_RUNTIME STREQUAL "THREADPOOL") list(APPEND ACL_THREADPOOL_FILES @@ -33,6 +41,5 @@ set(OBJ_LIB ${LIB_PACKAGE_NAME}_cpu_aarch64) add_library(${OBJ_LIB} OBJECT ${SOURCES}) set_property(GLOBAL APPEND PROPERTY DNNL_LIB_DEPS $) -enable_conditional_compilation4(${OBJ_LIB}) add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/xbyak_aarch64 xbyak_aarch64) diff --git a/src/cpu/acl/acl_batch_normalization.cpp b/src/cpu/aarch64/acl_batch_normalization.cpp similarity index 96% rename from src/cpu/acl/acl_batch_normalization.cpp rename to src/cpu/aarch64/acl_batch_normalization.cpp index 83f4c5061a0..77a723207fc 100644 --- a/src/cpu/acl/acl_batch_normalization.cpp +++ b/src/cpu/aarch64/acl_batch_normalization.cpp @@ -14,12 +14,12 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/acl_batch_normalization.hpp" +#include "cpu/aarch64/acl_batch_normalization.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { status_t acl_batch_normalization_fwd_t::execute_forward( const exec_ctx_t &ctx) const { @@ -72,7 +72,7 @@ status_t acl_batch_normalization_fwd_t::execute_forward( return status::success; } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_batch_normalization.hpp b/src/cpu/aarch64/acl_batch_normalization.hpp similarity index 98% rename from src/cpu/acl/acl_batch_normalization.hpp rename to src/cpu/aarch64/acl_batch_normalization.hpp index ef7e4c22cbd..df8cd1223f7 100644 --- a/src/cpu/acl/acl_batch_normalization.hpp +++ b/src/cpu/aarch64/acl_batch_normalization.hpp @@ -14,18 +14,18 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_BATCH_NORMALIZATION_HPP -#define CPU_ACL_BATCH_NORMALIZATION_HPP +#ifndef CPU_AARCH64_ACL_BATCH_NORMALIZATION_HPP +#define CPU_AARCH64_ACL_BATCH_NORMALIZATION_HPP #include "cpu/cpu_batch_normalization_pd.hpp" -#include "cpu/acl/acl_post_ops.hpp" -#include "cpu/acl/acl_utils.hpp" +#include "cpu/aarch64/acl_post_ops.hpp" +#include "cpu/aarch64/acl_utils.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { struct acl_batch_normalization_obj_t { arm_compute::NEBatchNormalizationLayer bnorm; @@ -266,7 +266,7 @@ struct acl_batch_normalization_fwd_t : public primitive_t { const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } }; // acl_batch_normalization_fwd_t -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_benchmark_scheduler.cpp b/src/cpu/aarch64/acl_benchmark_scheduler.cpp similarity index 96% rename from src/cpu/acl/acl_benchmark_scheduler.cpp rename to src/cpu/aarch64/acl_benchmark_scheduler.cpp index 9c41e0a488f..4f453f7eabf 100644 --- a/src/cpu/acl/acl_benchmark_scheduler.cpp +++ b/src/cpu/aarch64/acl_benchmark_scheduler.cpp @@ -14,13 +14,13 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/acl_benchmark_scheduler.hpp" +#include "cpu/aarch64/acl_benchmark_scheduler.hpp" #include "common/verbose.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { using namespace arm_compute; benchmark_scheduler_t::benchmark_scheduler_t(IScheduler &real_scheduler) @@ -72,7 +72,7 @@ void benchmark_scheduler_t::run_workloads(std::vector &workloads) { ARM_COMPUTE_ERROR("Can't be reached"); } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_benchmark_scheduler.hpp b/src/cpu/aarch64/acl_benchmark_scheduler.hpp similarity index 92% rename from src/cpu/acl/acl_benchmark_scheduler.hpp rename to src/cpu/aarch64/acl_benchmark_scheduler.hpp index 59de1d26ea4..0b5d0b929e5 100644 --- a/src/cpu/acl/acl_benchmark_scheduler.hpp +++ b/src/cpu/aarch64/acl_benchmark_scheduler.hpp @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_BENCHMARK_SCHEDULER_HPP -#define CPU_ACL_BENCHMARK_SCHEDULER_HPP +#ifndef CPU_AARCH64_ACL_BENCHMARK_SCHEDULER_HPP +#define CPU_AARCH64_ACL_BENCHMARK_SCHEDULER_HPP #include "arm_compute/core/CPP/ICPPKernel.h" #include "arm_compute/runtime/IScheduler.h" @@ -22,7 +22,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { // BenchmarkScheduler implement's ACL IScheduler interface and acts as an interceptor scheduler // when DNNL_VERBOSE=profile,profile_externals. It intercepts calls made by the actual scheduler used by ACL and adds // timers to benchmark execution time of ACL kernels and store kernel information. @@ -52,9 +52,9 @@ class benchmark_scheduler_t final : public arm_compute::IScheduler { IScheduler &_real_scheduler; }; -#endif // CPU_ACL_BENCHMARK_SCHEDULER_HPP +#endif // CPU_AARCH64_ACL_BENCHMARK_SCHEDULER_HPP -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_binary.cpp b/src/cpu/aarch64/acl_binary.cpp similarity index 99% rename from src/cpu/acl/acl_binary.cpp rename to src/cpu/aarch64/acl_binary.cpp index 703ddac3f0a..8795d9d1ec7 100644 --- a/src/cpu/acl/acl_binary.cpp +++ b/src/cpu/aarch64/acl_binary.cpp @@ -27,7 +27,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { status_t acl_binary_t::pd_t::init(engine_t *engine) { using namespace acl_utils; @@ -232,7 +232,7 @@ const acl_binary_t::pd_t *acl_binary_t::pd() const { return static_cast(primitive_t::pd().get()); } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_binary.hpp b/src/cpu/aarch64/acl_binary.hpp similarity index 95% rename from src/cpu/acl/acl_binary.hpp rename to src/cpu/aarch64/acl_binary.hpp index 7040fe8aa42..41ecdded523 100644 --- a/src/cpu/acl/acl_binary.hpp +++ b/src/cpu/aarch64/acl_binary.hpp @@ -14,8 +14,8 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_BINARY_HPP -#define CPU_ACL_BINARY_HPP +#ifndef CPU_AARCH64_ACL_BINARY_HPP +#define CPU_AARCH64_ACL_BINARY_HPP #include "acl_utils.hpp" #include "cpu/cpu_binary_pd.hpp" @@ -28,7 +28,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { struct acl_binary_conf_t { arm_compute::TensorInfo src0_info; @@ -73,7 +73,7 @@ struct acl_binary_t : public primitive_t { }; // acl_binary_t -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_convolution_utils.cpp b/src/cpu/aarch64/acl_convolution_utils.cpp similarity index 96% rename from src/cpu/acl/acl_convolution_utils.cpp rename to src/cpu/aarch64/acl_convolution_utils.cpp index 9cb9146bc4b..eb71f230908 100644 --- a/src/cpu/acl/acl_convolution_utils.cpp +++ b/src/cpu/aarch64/acl_convolution_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/acl_convolution_utils.hpp" +#include "cpu/aarch64/acl_convolution_utils.hpp" #include "common/convolution_pd.hpp" #include "common/utils.hpp" #include "oneapi/dnnl/dnnl.h" @@ -22,7 +22,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace acl_convolution_utils { @@ -289,21 +289,19 @@ status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md, memory_desc_t &bias_md, const convolution_desc_t &cd, const primitive_attr_t &attr) { - // Under these conditions, fallback to faster GEMM-based convolution - // unless the user explicitly specifies Winograd algorithm - // clang-format off - // Heuristic only for servers - if (dnnl_get_max_threads() > 28 && cd.alg_kind == alg_kind::convolution_auto) { - return status::unimplemented; + // clang-format off + if (dnnl_get_max_threads() > 28 + && cd.alg_kind == alg_kind::convolution_auto) { + return status::unimplemented; } + // Heuristic for other devices if (one_of(true, src_md.dims[1] < 64, // ic - dst_md.dims[1] < 64) // oc - && cd.alg_kind == alg_kind::convolution_auto) { - return status::unimplemented; + dst_md.dims[1] < 64) // oc + && cd.alg_kind == alg_kind::convolution_auto) { + return status::unimplemented; } - // clang-format on // General Compute Library checks, memory tags are also set there @@ -339,7 +337,7 @@ status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md, } // namespace acl_convolution_utils -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_convolution_utils.hpp b/src/cpu/aarch64/acl_convolution_utils.hpp similarity index 98% rename from src/cpu/acl/acl_convolution_utils.hpp rename to src/cpu/aarch64/acl_convolution_utils.hpp index fb616e71a7c..c438cf9574b 100644 --- a/src/cpu/acl/acl_convolution_utils.hpp +++ b/src/cpu/aarch64/acl_convolution_utils.hpp @@ -14,8 +14,8 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_CONVOLUTION_UTILS_HPP -#define CPU_ACL_CONVOLUTION_UTILS_HPP +#ifndef CPU_AARCH64_ACL_CONVOLUTION_UTILS_HPP +#define CPU_AARCH64_ACL_CONVOLUTION_UTILS_HPP #include #include "acl_post_ops.hpp" @@ -26,7 +26,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { template struct acl_obj_t { @@ -231,9 +231,9 @@ status_t execute_forward_conv_acl( return status::success; } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // CPU_ACL_CONVOLUTION_UTILS_HPP +#endif // CPU_AARCH64_ACL_CONVOLUTION_UTILS_HPP diff --git a/src/cpu/acl/acl_deconvolution.cpp b/src/cpu/aarch64/acl_deconvolution.cpp similarity index 96% rename from src/cpu/acl/acl_deconvolution.cpp rename to src/cpu/aarch64/acl_deconvolution.cpp index 0eef20dbabc..cdeca9cb8bb 100644 --- a/src/cpu/acl/acl_deconvolution.cpp +++ b/src/cpu/aarch64/acl_deconvolution.cpp @@ -14,12 +14,12 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/acl_deconvolution.hpp" +#include "cpu/aarch64/acl_deconvolution.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { status_t acl_deconvolution_fwd_t::execute_forward(const exec_ctx_t &ctx) const { @@ -64,7 +64,7 @@ status_t acl_deconvolution_fwd_t::execute_forward(const exec_ctx_t &ctx) const { return status::success; } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_deconvolution.hpp b/src/cpu/aarch64/acl_deconvolution.hpp similarity index 98% rename from src/cpu/acl/acl_deconvolution.hpp rename to src/cpu/aarch64/acl_deconvolution.hpp index 18c8c1f1a67..e4379cb0d99 100644 --- a/src/cpu/acl/acl_deconvolution.hpp +++ b/src/cpu/aarch64/acl_deconvolution.hpp @@ -14,16 +14,16 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_DECONVOLUTION_HPP -#define CPU_ACL_DECONVOLUTION_HPP +#ifndef CPU_AARCH64_ACL_DECONVOLUTION_HPP +#define CPU_AARCH64_ACL_DECONVOLUTION_HPP -#include "cpu/acl/acl_post_ops.hpp" +#include "cpu/aarch64/acl_post_ops.hpp" #include "cpu/cpu_deconvolution_pd.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { struct acl_deconv_obj_t { arm_compute::NEDeconvolutionLayer deconv; @@ -330,7 +330,7 @@ struct acl_deconvolution_fwd_t : public primitive_t { const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } }; // acl_deconvolution_fwd_t -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_depthwise_convolution.cpp b/src/cpu/aarch64/acl_depthwise_convolution.cpp similarity index 97% rename from src/cpu/acl/acl_depthwise_convolution.cpp rename to src/cpu/aarch64/acl_depthwise_convolution.cpp index 2f6d5756cc0..976cf096408 100644 --- a/src/cpu/acl/acl_depthwise_convolution.cpp +++ b/src/cpu/aarch64/acl_depthwise_convolution.cpp @@ -14,12 +14,12 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/acl_depthwise_convolution.hpp" +#include "cpu/aarch64/acl_depthwise_convolution.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace { using data_t = prec_traits_t::type; @@ -88,7 +88,7 @@ status_t acl_depthwise_convolution_fwd_t::init(engine_t *engine) { acl_obj_->aux_mem_req = acl_obj_->conv.workspace(); return status::success; } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_depthwise_convolution.hpp b/src/cpu/aarch64/acl_depthwise_convolution.hpp similarity index 91% rename from src/cpu/acl/acl_depthwise_convolution.hpp rename to src/cpu/aarch64/acl_depthwise_convolution.hpp index 61c39332a67..6fa37e98e4f 100644 --- a/src/cpu/acl/acl_depthwise_convolution.hpp +++ b/src/cpu/aarch64/acl_depthwise_convolution.hpp @@ -14,8 +14,8 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_DEPTHWISE_CONVOLUTION_HPP -#define CPU_ACL_DEPTHWISE_CONVOLUTION_HPP +#ifndef CPU_AARCH64_ACL_DEPTHWISE_CONVOLUTION_HPP +#define CPU_AARCH64_ACL_DEPTHWISE_CONVOLUTION_HPP #include "acl_convolution_utils.hpp" #include "arm_compute/runtime/experimental/operators/CpuDepthwiseConv2d.h" @@ -24,7 +24,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { struct acl_depthwise_convolution_fwd_t : public primitive_t { @@ -57,9 +57,9 @@ struct acl_depthwise_convolution_fwd_t : public primitive_t { std::unique_ptr> acl_obj_; }; -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // CPU_ACL_DEPTHWISE_CONVOLUTION_HPP +#endif // CPU_AARCH64_ACL_DEPTHWISE_CONVOLUTION_HPP diff --git a/src/cpu/acl/acl_eltwise.cpp b/src/cpu/aarch64/acl_eltwise.cpp similarity index 98% rename from src/cpu/acl/acl_eltwise.cpp rename to src/cpu/aarch64/acl_eltwise.cpp index b0cf7181cd0..64d0ce6b3f6 100644 --- a/src/cpu/acl/acl_eltwise.cpp +++ b/src/cpu/aarch64/acl_eltwise.cpp @@ -19,7 +19,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { status_t acl_eltwise_fwd_t::execute(const exec_ctx_t &ctx) const { return execute_forward(ctx); @@ -109,7 +109,7 @@ status_t acl_eltwise_fwd_t::pd_t::init(engine_t *engine) { return status::success; } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_eltwise.hpp b/src/cpu/aarch64/acl_eltwise.hpp similarity index 93% rename from src/cpu/acl/acl_eltwise.hpp rename to src/cpu/aarch64/acl_eltwise.hpp index 45869414bec..bd64eac1936 100644 --- a/src/cpu/acl/acl_eltwise.hpp +++ b/src/cpu/aarch64/acl_eltwise.hpp @@ -14,8 +14,8 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_ELTWISE_HPP -#define CPU_ACL_ELTWISE_HPP +#ifndef CPU_AARCH64_ACL_ELTWISE_HPP +#define CPU_AARCH64_ACL_ELTWISE_HPP #include #include "cpu/cpu_eltwise_pd.hpp" @@ -27,7 +27,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { struct acl_eltwise_conf_t { arm_compute::ActivationLayerInfo act_info; @@ -71,9 +71,9 @@ struct acl_eltwise_fwd_t : public primitive_t { friend struct acl_post_ops_t; }; // acl_eltwise_fwd_t -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // CPU_ACL_ELTWISE_HPP +#endif // CPU_AARCH64_ACL_ELTWISE_HPP diff --git a/src/cpu/acl/acl_gemm_convolution.cpp b/src/cpu/aarch64/acl_gemm_convolution.cpp similarity index 99% rename from src/cpu/acl/acl_gemm_convolution.cpp rename to src/cpu/aarch64/acl_gemm_convolution.cpp index 8392bd29676..e299644eee2 100644 --- a/src/cpu/acl/acl_gemm_convolution.cpp +++ b/src/cpu/aarch64/acl_gemm_convolution.cpp @@ -21,7 +21,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace { // Keys are anonymous. So deduce the type automagically. @@ -113,7 +113,7 @@ template struct acl_gemm_convolution_fwd_t; template struct acl_gemm_convolution_fwd_t; template struct acl_gemm_convolution_fwd_t; -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_gemm_convolution.hpp b/src/cpu/aarch64/acl_gemm_convolution.hpp similarity index 94% rename from src/cpu/acl/acl_gemm_convolution.hpp rename to src/cpu/aarch64/acl_gemm_convolution.hpp index 14d0050c7ab..58ff3e5fd9a 100644 --- a/src/cpu/acl/acl_gemm_convolution.hpp +++ b/src/cpu/aarch64/acl_gemm_convolution.hpp @@ -14,8 +14,8 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_GEMM_CONVOLUTION_HPP -#define CPU_ACL_GEMM_CONVOLUTION_HPP +#ifndef CPU_AARCH64_ACL_GEMM_CONVOLUTION_HPP +#define CPU_AARCH64_ACL_GEMM_CONVOLUTION_HPP #include "common/memory_tracking.hpp" #include "cpu/cpu_convolution_pd.hpp" @@ -27,7 +27,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { template @@ -67,7 +67,7 @@ struct acl_gemm_convolution_fwd_t : public primitive_t { std::unique_ptr> acl_obj_; }; // acl_gemm_convolution_fwd_t -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_indirect_gemm_convolution.cpp b/src/cpu/aarch64/acl_indirect_gemm_convolution.cpp similarity index 99% rename from src/cpu/acl/acl_indirect_gemm_convolution.cpp rename to src/cpu/aarch64/acl_indirect_gemm_convolution.cpp index f53304b7c35..0e080c6e6ca 100644 --- a/src/cpu/acl/acl_indirect_gemm_convolution.cpp +++ b/src/cpu/aarch64/acl_indirect_gemm_convolution.cpp @@ -22,7 +22,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace { using data_t = typename prec_traits_t::type; @@ -123,7 +123,7 @@ status_t acl_indirect_gemm_convolution_fwd_t::pd_t::init(engine_t *engine) { dst_md_); } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_indirect_gemm_convolution.hpp b/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp similarity index 90% rename from src/cpu/acl/acl_indirect_gemm_convolution.hpp rename to src/cpu/aarch64/acl_indirect_gemm_convolution.hpp index 7286cc3ced6..6c91ac5e35c 100644 --- a/src/cpu/acl/acl_indirect_gemm_convolution.hpp +++ b/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp @@ -14,8 +14,8 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_INDIRECT_GEMM_CONVOLUTION_HPP -#define CPU_ACL_INDIRECT_GEMM_CONVOLUTION_HPP +#ifndef CPU_AARCH64_ACL_INDIRECT_GEMM_CONVOLUTION_HPP +#define CPU_AARCH64_ACL_INDIRECT_GEMM_CONVOLUTION_HPP #include "cpu/cpu_convolution_pd.hpp" @@ -25,7 +25,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { struct acl_indirect_gemm_convolution_fwd_t : public primitive_t { @@ -61,9 +61,9 @@ struct acl_indirect_gemm_convolution_fwd_t : public primitive_t { std::unique_ptr> acl_obj_; }; -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // CPU_ACL_INDIRECT_GEMM_CONVOLUTION_HPP +#endif // CPU_AARCH64_ACL_INDIRECT_GEMM_CONVOLUTION_HPP diff --git a/src/cpu/acl/acl_inner_product.cpp b/src/cpu/aarch64/acl_inner_product.cpp similarity index 99% rename from src/cpu/acl/acl_inner_product.cpp rename to src/cpu/aarch64/acl_inner_product.cpp index d51030c4381..16bd5318f28 100644 --- a/src/cpu/acl/acl_inner_product.cpp +++ b/src/cpu/aarch64/acl_inner_product.cpp @@ -14,12 +14,12 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/acl_inner_product.hpp" +#include "cpu/aarch64/acl_inner_product.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { status_t acl_inner_product_fwd_t::init(engine_t *engine) { auto aip = pd()->aip_; @@ -283,7 +283,7 @@ status_t acl_inner_product_fwd_t::pd_t::init_conf_ip( return status::success; } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_inner_product.hpp b/src/cpu/aarch64/acl_inner_product.hpp similarity index 91% rename from src/cpu/acl/acl_inner_product.hpp rename to src/cpu/aarch64/acl_inner_product.hpp index 46e9acb8313..8ed5f858b9a 100644 --- a/src/cpu/acl/acl_inner_product.hpp +++ b/src/cpu/aarch64/acl_inner_product.hpp @@ -14,11 +14,11 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_INNER_PRODUCT_HPP -#define CPU_ACL_INNER_PRODUCT_HPP +#ifndef CPU_AARCH64_ACL_INNER_PRODUCT_HPP +#define CPU_AARCH64_ACL_INNER_PRODUCT_HPP -#include "cpu/acl/acl_post_ops.hpp" -#include "cpu/acl/acl_utils.hpp" +#include "cpu/aarch64/acl_post_ops.hpp" +#include "cpu/aarch64/acl_utils.hpp" #include "cpu/cpu_inner_product_pd.hpp" #include "arm_compute/runtime/experimental/operators/CpuFullyConnected.h" @@ -26,7 +26,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { struct acl_ip_conf_t { bool with_bias; @@ -75,9 +75,9 @@ struct acl_inner_product_fwd_t : public primitive_t { inner_product_op_; }; // acl_inner_product_fwd_t -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // CPU_ACL_INNER_PRODUCT_HPP +#endif // CPU_AARCH64_ACL_INNER_PRODUCT_HPP diff --git a/src/cpu/acl/acl_layer_normalization.cpp b/src/cpu/aarch64/acl_layer_normalization.cpp similarity index 98% rename from src/cpu/acl/acl_layer_normalization.cpp rename to src/cpu/aarch64/acl_layer_normalization.cpp index 1f244585571..18324de0c80 100644 --- a/src/cpu/acl/acl_layer_normalization.cpp +++ b/src/cpu/aarch64/acl_layer_normalization.cpp @@ -14,12 +14,12 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/acl_layer_normalization.hpp" +#include "cpu/aarch64/acl_layer_normalization.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { acl_layer_normalization_fwd_t::acl_layer_normalization_fwd_t(const pd_t *apd) : primitive_t(apd) @@ -200,7 +200,7 @@ status_t acl_layer_normalization_fwd_t::execute_forward( return status::success; } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_layer_normalization.hpp b/src/cpu/aarch64/acl_layer_normalization.hpp similarity index 89% rename from src/cpu/acl/acl_layer_normalization.hpp rename to src/cpu/aarch64/acl_layer_normalization.hpp index 133ea2be4a4..c3dfaf3b214 100644 --- a/src/cpu/acl/acl_layer_normalization.hpp +++ b/src/cpu/aarch64/acl_layer_normalization.hpp @@ -14,18 +14,18 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_LAYER_NORMALIZATION_HPP -#define CPU_ACL_LAYER_NORMALIZATION_HPP +#ifndef CPU_AARCH64_ACL_LAYER_NORMALIZATION_HPP +#define CPU_AARCH64_ACL_LAYER_NORMALIZATION_HPP #include "arm_compute/runtime/experimental/operators/CpuMeanStdDevNormalization.h" -#include "cpu/acl/acl_utils.hpp" +#include "cpu/aarch64/acl_utils.hpp" #include "cpu/cpu_layer_normalization_pd.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { struct acl_layer_normalization_fwd_t : public primitive_t { struct pd_t : public cpu_layer_normalization_fwd_pd_t { using cpu_layer_normalization_fwd_pd_t:: @@ -55,9 +55,9 @@ struct acl_layer_normalization_fwd_t : public primitive_t { acl_obj_; }; -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // CPU_ACL_LAYER_NORMALIZATION_HPP +#endif // CPU_AARCH64_ACL_LAYER_NORMALIZATION_HPP diff --git a/src/cpu/acl/acl_pooling.cpp b/src/cpu/aarch64/acl_pooling.cpp similarity index 99% rename from src/cpu/acl/acl_pooling.cpp rename to src/cpu/aarch64/acl_pooling.cpp index 4c14193ce1c..6728f0a2395 100644 --- a/src/cpu/acl/acl_pooling.cpp +++ b/src/cpu/aarch64/acl_pooling.cpp @@ -14,13 +14,13 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/acl_pooling.hpp" +#include "cpu/aarch64/acl_pooling.hpp" #include "common/memory_tracking.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { status_t acl_pooling_fwd_t::pd_t::init(engine_t *engine) { @@ -297,7 +297,7 @@ status_t acl_pooling_fwd_t::execute_forward(const exec_ctx_t &ctx) const { return status; } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_pooling.hpp b/src/cpu/aarch64/acl_pooling.hpp similarity index 93% rename from src/cpu/acl/acl_pooling.hpp rename to src/cpu/aarch64/acl_pooling.hpp index b47fdfa175b..f8b540cd023 100644 --- a/src/cpu/acl/acl_pooling.hpp +++ b/src/cpu/aarch64/acl_pooling.hpp @@ -14,12 +14,12 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_POOLING_HPP -#define CPU_ACL_POOLING_HPP +#ifndef CPU_AARCH64_ACL_POOLING_HPP +#define CPU_AARCH64_ACL_POOLING_HPP #include "cpu/cpu_pooling_pd.hpp" -#include "cpu/acl/acl_utils.hpp" +#include "cpu/aarch64/acl_utils.hpp" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/IOperator.h" @@ -28,7 +28,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { struct acl_pooling_conf_t { arm_compute::TensorInfo src_info; @@ -74,9 +74,9 @@ struct acl_pooling_fwd_t : public primitive_t { std::unique_ptr pooling_op_; }; // acl_pooling_fwd_t -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // CPU_ACL_POOLING_HPP +#endif // CPU_AARCH64_ACL_POOLING_HPP diff --git a/src/cpu/acl/acl_post_ops.cpp b/src/cpu/aarch64/acl_post_ops.cpp similarity index 97% rename from src/cpu/acl/acl_post_ops.cpp rename to src/cpu/aarch64/acl_post_ops.cpp index 816d195a920..dbb1bf2d53c 100644 --- a/src/cpu/acl/acl_post_ops.cpp +++ b/src/cpu/aarch64/acl_post_ops.cpp @@ -15,12 +15,12 @@ *******************************************************************************/ #include "common/float16.hpp" -#include "cpu/acl/acl_gemm_convolution.hpp" +#include "cpu/aarch64/acl_gemm_convolution.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { status_t acl_post_ops_t::execute( const exec_ctx_t &ctx, void *src, void *dst) const { @@ -97,7 +97,7 @@ status_t acl_post_ops_t::execute( return status::success; } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_post_ops.hpp b/src/cpu/aarch64/acl_post_ops.hpp similarity index 96% rename from src/cpu/acl/acl_post_ops.hpp rename to src/cpu/aarch64/acl_post_ops.hpp index d5e470e4578..9fd4650456a 100644 --- a/src/cpu/acl/acl_post_ops.hpp +++ b/src/cpu/aarch64/acl_post_ops.hpp @@ -14,16 +14,16 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_POST_OPS_HPP -#define CPU_ACL_POST_OPS_HPP +#ifndef CPU_AARCH64_ACL_POST_OPS_HPP +#define CPU_AARCH64_ACL_POST_OPS_HPP -#include "cpu/acl/acl_binary.hpp" -#include "cpu/acl/acl_eltwise.hpp" +#include "cpu/aarch64/acl_binary.hpp" +#include "cpu/aarch64/acl_eltwise.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { struct acl_post_ops_t { @@ -142,7 +142,7 @@ struct acl_post_ops_t { CHECK(base_post_ops.set_default_formats(&dst_md)); dst_data_type = dst_md.data_type; - // If the first entry is eltwise, we fuse it + // If the first entry is eltwise, we fuse it. if (base_post_ops.len() >= 1 && base_post_ops.entry_[0].is_eltwise()) { const auto &first_po = base_post_ops.entry_[0].eltwise; @@ -176,7 +176,7 @@ struct acl_post_ops_t { std::vector> post_op_primitives; }; -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_prelu.cpp b/src/cpu/aarch64/acl_prelu.cpp similarity index 96% rename from src/cpu/acl/acl_prelu.cpp rename to src/cpu/aarch64/acl_prelu.cpp index b118fe20811..e2aae9392c0 100644 --- a/src/cpu/acl/acl_prelu.cpp +++ b/src/cpu/aarch64/acl_prelu.cpp @@ -14,12 +14,12 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/acl_prelu.hpp" +#include "cpu/aarch64/acl_prelu.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { status_t acl_prelu_fwd_t::execute_forward(const exec_ctx_t &ctx) const { @@ -51,7 +51,7 @@ status_t acl_prelu_fwd_t::execute_forward(const exec_ctx_t &ctx) const { return status::success; } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_prelu.hpp b/src/cpu/aarch64/acl_prelu.hpp similarity index 96% rename from src/cpu/acl/acl_prelu.hpp rename to src/cpu/aarch64/acl_prelu.hpp index a7b70402687..8517d1bb3ee 100644 --- a/src/cpu/acl/acl_prelu.hpp +++ b/src/cpu/aarch64/acl_prelu.hpp @@ -13,16 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_PRELU_HPP -#define CPU_ACL_PRELU_HPP +#ifndef CPU_AARCH64_ACL_PRELU_HPP +#define CPU_AARCH64_ACL_PRELU_HPP -#include "cpu/acl/acl_utils.hpp" +#include "cpu/aarch64/acl_utils.hpp" #include "cpu/cpu_prelu_pd.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { struct acl_prelu_obj_t { arm_compute::NEPReluLayer prelu; @@ -151,9 +151,9 @@ struct acl_prelu_fwd_t : public primitive_t { const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } }; // acl_prelu_fwd_t -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // CPU_ACL_PRELU_HPP +#endif // CPU_AARCH64_ACL_PRELU_HPP diff --git a/src/cpu/aarch64/acl_reorder.cpp b/src/cpu/aarch64/acl_reorder.cpp index 9f3c062cb82..7158c5fb19d 100644 --- a/src/cpu/aarch64/acl_reorder.cpp +++ b/src/cpu/aarch64/acl_reorder.cpp @@ -39,7 +39,7 @@ int find_innermost_dense_idx(const dnnl::impl::memory_desc_t *md) { namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { status_t acl_reorder_resource_t::configure(const acl_reorder_conf_t &app) { if (!acl_obj_) return status::out_of_memory; @@ -255,7 +255,7 @@ status_t acl_reorder_fwd_t::execute_forward(const exec_ctx_t &ctx) const { return status::success; } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/aarch64/acl_reorder.hpp b/src/cpu/aarch64/acl_reorder.hpp index e6bcad62f3c..50c058ed99b 100644 --- a/src/cpu/aarch64/acl_reorder.hpp +++ b/src/cpu/aarch64/acl_reorder.hpp @@ -13,19 +13,19 @@ * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_REORDER_HPP -#define CPU_ACL_REORDER_HPP +#ifndef CPU_AARCH64_ACL_REORDER_HPP +#define CPU_AARCH64_ACL_REORDER_HPP #include "arm_compute/core/Types.h" #include "common/utils.hpp" -#include "cpu/acl/acl_utils.hpp" +#include "cpu/aarch64/acl_utils.hpp" #include "cpu/aarch64/cpu_isa_traits.hpp" #include "cpu/reorder/cpu_reorder_pd.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { struct acl_reorder_obj_t { arm_compute::NEReorderLayer reorder; @@ -91,9 +91,9 @@ struct acl_reorder_fwd_t : public primitive_t { }; // acl_reorder_fwd_t -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // CPU_ACL_REORDER_HPP +#endif // CPU_AARCH64_ACL_REORDER_HPP diff --git a/src/cpu/acl/acl_softmax.cpp b/src/cpu/aarch64/acl_softmax.cpp similarity index 98% rename from src/cpu/acl/acl_softmax.cpp rename to src/cpu/aarch64/acl_softmax.cpp index 50966d33e4e..47517f1de8a 100644 --- a/src/cpu/acl/acl_softmax.cpp +++ b/src/cpu/aarch64/acl_softmax.cpp @@ -14,12 +14,12 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/acl_softmax.hpp" +#include "cpu/aarch64/acl_softmax.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { const acl_softmax_fwd_t::pd_t *acl_softmax_fwd_t::pd() const { return static_cast(primitive_t::pd().get()); @@ -168,7 +168,7 @@ status_t acl_softmax_fwd_t::execute_forward(const exec_ctx_t &ctx) const { return status::success; } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_softmax.hpp b/src/cpu/aarch64/acl_softmax.hpp similarity index 92% rename from src/cpu/acl/acl_softmax.hpp rename to src/cpu/aarch64/acl_softmax.hpp index 470eea9a1a3..59a16f23b43 100644 --- a/src/cpu/acl/acl_softmax.hpp +++ b/src/cpu/aarch64/acl_softmax.hpp @@ -14,12 +14,12 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_SOFTMAX_HPP -#define CPU_ACL_SOFTMAX_HPP +#ifndef CPU_AARCH64_ACL_SOFTMAX_HPP +#define CPU_AARCH64_ACL_SOFTMAX_HPP #include "cpu/cpu_softmax_pd.hpp" -#include "cpu/acl/acl_utils.hpp" +#include "cpu/aarch64/acl_utils.hpp" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/IOperator.h" @@ -28,7 +28,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { struct acl_softmax_conf_t { arm_compute::TensorInfo src_info; @@ -63,7 +63,7 @@ struct acl_softmax_fwd_t : public primitive_t { std::unique_ptr softmax_op_; }; // acl_softmax_fwd_t -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_thread.cpp b/src/cpu/aarch64/acl_thread.cpp similarity index 95% rename from src/cpu/acl/acl_thread.cpp rename to src/cpu/aarch64/acl_thread.cpp index 383fd283176..040e338c08d 100644 --- a/src/cpu/acl/acl_thread.cpp +++ b/src/cpu/aarch64/acl_thread.cpp @@ -14,16 +14,16 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/acl_thread.hpp" +#include "cpu/aarch64/acl_thread.hpp" #if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL -#include "cpu/acl/acl_threadpool_scheduler.hpp" +#include "cpu/aarch64/acl_threadpool_scheduler.hpp" #endif -#include "cpu/acl/acl_benchmark_scheduler.hpp" +#include "cpu/aarch64/acl_benchmark_scheduler.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace acl_thread_utils { @@ -119,7 +119,7 @@ void set_acl_threading() { } // namespace acl_thread_utils -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_thread.hpp b/src/cpu/aarch64/acl_thread.hpp similarity index 92% rename from src/cpu/acl/acl_thread.hpp rename to src/cpu/aarch64/acl_thread.hpp index 26b65564d79..f073376e63a 100644 --- a/src/cpu/acl/acl_thread.hpp +++ b/src/cpu/aarch64/acl_thread.hpp @@ -14,8 +14,8 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_THREAD_HPP -#define CPU_ACL_THREAD_HPP +#ifndef CPU_AARCH64_ACL_THREAD_HPP +#define CPU_AARCH64_ACL_THREAD_HPP #include "common/dnnl_thread.hpp" #include "common/verbose.hpp" @@ -25,7 +25,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace acl_thread_utils { @@ -49,9 +49,9 @@ void acl_set_tp_benchmark_scheduler(); void set_acl_threading(); } // namespace acl_thread_utils -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // CPU_ACL_THREAD_HPP +#endif // CPU_AARCH64_ACL_THREAD_HPP diff --git a/src/cpu/acl/acl_threadpool_scheduler.cpp b/src/cpu/aarch64/acl_threadpool_scheduler.cpp similarity index 97% rename from src/cpu/acl/acl_threadpool_scheduler.cpp rename to src/cpu/aarch64/acl_threadpool_scheduler.cpp index ae559c5ead9..34cf44b7e25 100644 --- a/src/cpu/acl/acl_threadpool_scheduler.cpp +++ b/src/cpu/aarch64/acl_threadpool_scheduler.cpp @@ -14,13 +14,13 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/acl_threadpool_scheduler.hpp" +#include "cpu/aarch64/acl_threadpool_scheduler.hpp" #if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL #include "common/counting_barrier.hpp" #include "common/dnnl_thread.hpp" -#include "cpu/acl/acl_thread.hpp" +#include "cpu/aarch64/acl_thread.hpp" #include "arm_compute/core/CPP/ICPPKernel.h" #include "arm_compute/core/Error.h" @@ -33,7 +33,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { using namespace arm_compute; @@ -135,7 +135,7 @@ void ThreadpoolScheduler::run_workloads( if (is_async) b.wait(); } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_threadpool_scheduler.hpp b/src/cpu/aarch64/acl_threadpool_scheduler.hpp similarity index 91% rename from src/cpu/acl/acl_threadpool_scheduler.hpp rename to src/cpu/aarch64/acl_threadpool_scheduler.hpp index 0bdd068bde5..1c7d054c08d 100644 --- a/src/cpu/acl/acl_threadpool_scheduler.hpp +++ b/src/cpu/aarch64/acl_threadpool_scheduler.hpp @@ -14,8 +14,8 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_THREADPOOL_SCHEDULER_HPP -#define CPU_ACL_THREADPOOL_SCHEDULER_HPP +#ifndef CPU_AARCH64_ACL_THREADPOOL_SCHEDULER_HPP +#define CPU_AARCH64_ACL_THREADPOOL_SCHEDULER_HPP #include "oneapi/dnnl/dnnl_config.h" @@ -28,7 +28,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { class ThreadpoolScheduler final : public arm_compute::IScheduler { public: @@ -59,11 +59,11 @@ class ThreadpoolScheduler final : public arm_compute::IScheduler { std::mutex _mtx; }; -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl #endif // DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL -#endif // CPU_ACL_THREADPOOL_SCHEDULER_HPP +#endif // CPU_AARCH64_ACL_THREADPOOL_SCHEDULER_HPP diff --git a/src/cpu/acl/acl_utils.cpp b/src/cpu/aarch64/acl_utils.cpp similarity index 99% rename from src/cpu/acl/acl_utils.cpp rename to src/cpu/aarch64/acl_utils.cpp index 66494672aaf..ec7f162891f 100644 --- a/src/cpu/acl/acl_utils.cpp +++ b/src/cpu/aarch64/acl_utils.cpp @@ -14,13 +14,13 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/acl_utils.hpp" +#include "cpu/aarch64/acl_utils.hpp" #include namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace acl_utils { @@ -362,7 +362,7 @@ status_t reorder_to_weight_format(arm_compute::TensorInfo &info, } // namespace acl_utils -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_utils.hpp b/src/cpu/aarch64/acl_utils.hpp similarity index 97% rename from src/cpu/acl/acl_utils.hpp rename to src/cpu/aarch64/acl_utils.hpp index 1aba0d4644b..b1ec3f345da 100644 --- a/src/cpu/acl/acl_utils.hpp +++ b/src/cpu/aarch64/acl_utils.hpp @@ -14,8 +14,8 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_UTILS_HPP -#define CPU_ACL_UTILS_HPP +#ifndef CPU_AARCH64_ACL_UTILS_HPP +#define CPU_AARCH64_ACL_UTILS_HPP #include @@ -33,7 +33,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace acl_utils { @@ -125,9 +125,9 @@ status_t reorder_to_weight_format(arm_compute::TensorInfo &info, } // namespace acl_utils -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // CPU_ACL_UTILS_HPP +#endif // CPU_AARCH64_ACL_UTILS_HPP diff --git a/src/cpu/acl/acl_winograd_convolution.cpp b/src/cpu/aarch64/acl_winograd_convolution.cpp similarity index 94% rename from src/cpu/acl/acl_winograd_convolution.cpp rename to src/cpu/aarch64/acl_winograd_convolution.cpp index eb2e0bd9883..b801fa28aa1 100644 --- a/src/cpu/acl/acl_winograd_convolution.cpp +++ b/src/cpu/aarch64/acl_winograd_convolution.cpp @@ -14,12 +14,12 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/acl_winograd_convolution.hpp" +#include "cpu/aarch64/acl_winograd_convolution.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { using data_t = prec_traits_t::type; status_t acl_wino_convolution_fwd_t::execute_forward( @@ -38,7 +38,7 @@ status_t acl_wino_convolution_fwd_t::execute_forward( ctx, acl_wino_obj, pd()); } -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/acl_winograd_convolution.hpp b/src/cpu/aarch64/acl_winograd_convolution.hpp similarity index 94% rename from src/cpu/acl/acl_winograd_convolution.hpp rename to src/cpu/aarch64/acl_winograd_convolution.hpp index 9c29ea376a3..69aee76e4b7 100644 --- a/src/cpu/acl/acl_winograd_convolution.hpp +++ b/src/cpu/aarch64/acl_winograd_convolution.hpp @@ -14,22 +14,22 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_WINOGRAD_CONVOLUTION_HPP -#define CPU_ACL_WINOGRAD_CONVOLUTION_HPP +#ifndef CPU_AARCH64_ACL_WINOGRAD_CONVOLUTION_HPP +#define CPU_AARCH64_ACL_WINOGRAD_CONVOLUTION_HPP #include "cpu/cpu_convolution_pd.hpp" -#include "cpu/acl/acl_convolution_utils.hpp" +#include "cpu/aarch64/acl_convolution_utils.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { struct acl_wino_resource_t : public resource_t { acl_wino_resource_t() : acl_wino_obj_(utils::make_unique< - acl_obj_t>()) {} + acl_obj_t>()) {} status_t configure(const acl_conv_conf_t &acp) { if (!acl_wino_obj_) return status::out_of_memory; @@ -144,9 +144,9 @@ struct acl_wino_convolution_fwd_t : public primitive_t { const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } }; // acl_wino_convolution_fwd_t -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // CPU_ACL_WINOGRAD_CONVOLUTION_HPP +#endif // CPU_AARCH64_ACL_WINOGRAD_CONVOLUTION_HPP diff --git a/src/cpu/acl/matmul/acl_lowp_matmul.cpp b/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp similarity index 96% rename from src/cpu/acl/matmul/acl_lowp_matmul.cpp rename to src/cpu/aarch64/matmul/acl_lowp_matmul.cpp index 8b4dca04b76..9f6209df55e 100644 --- a/src/cpu/acl/matmul/acl_lowp_matmul.cpp +++ b/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp @@ -14,15 +14,13 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/matmul/acl_lowp_matmul.hpp" +#include "cpu/aarch64/matmul/acl_lowp_matmul.hpp" #include "cpu/cpu_primitive.hpp" -#include "src/cpu/CpuTypes.h" - namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace matmul { namespace { @@ -253,11 +251,11 @@ status_t acl_lowp_matmul_t::pd_t::init_scratchpad( const memory_desc_wrapper dst_d(&dst_md_); if (almc_.use_dst_acc) { scratchpad.book(memory_tracking::names::key_matmul_dst_in_acc_dt, - dst_d.nelems(), sizeof(arm_compute::float32_t)); + dst_d.nelems(), sizeof(float32_t)); } if (almc_.use_cast_acc) { scratchpad.book(memory_tracking::names::key_matmul_dst_cast_acc, - dst_d.nelems(), sizeof(arm_compute::float32_t)); + dst_d.nelems(), sizeof(float32_t)); } return status::success; } @@ -335,15 +333,17 @@ status_t acl_lowp_matmul_t::execute(const exec_ctx_t &ctx) const { bia_tensor.allocator()->import_memory(const_cast(bias)); } - auto dst = pd()->almc_.use_dst_acc ? scratchpad.get( - memory_tracking::names::key_matmul_dst_in_acc_dt) - : CTX_OUT_MEM(float *, DNNL_ARG_DST); + auto dst = pd()->almc_.use_dst_acc + ? scratchpad.get( + memory_tracking::names::key_matmul_dst_in_acc_dt) + : CTX_OUT_MEM(float *, DNNL_ARG_DST); dst_tensor.allocator()->init(alcm.dst_tensor_info); dst_tensor.allocator()->import_memory(dst); - auto dst_cast = pd()->almc_.use_cast_acc ? scratchpad.get( - memory_tracking::names::key_matmul_dst_cast_acc) - : nullptr; + auto dst_cast = pd()->almc_.use_cast_acc + ? scratchpad.get( + memory_tracking::names::key_matmul_dst_cast_acc) + : nullptr; if (dst_cast) { dst_cast_tensor.allocator()->init(alcm.dst_cast_tensor_info); dst_cast_tensor.allocator()->import_memory(dst_cast); @@ -438,7 +438,7 @@ status_t acl_lowp_matmul_t::execute(const exec_ctx_t &ctx) const { }; } // namespace matmul -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/matmul/acl_lowp_matmul.hpp b/src/cpu/aarch64/matmul/acl_lowp_matmul.hpp similarity index 91% rename from src/cpu/acl/matmul/acl_lowp_matmul.hpp rename to src/cpu/aarch64/matmul/acl_lowp_matmul.hpp index bde5fade7fd..46d005515a9 100644 --- a/src/cpu/acl/matmul/acl_lowp_matmul.hpp +++ b/src/cpu/aarch64/matmul/acl_lowp_matmul.hpp @@ -14,21 +14,21 @@ * limitations under the License. *******************************************************************************/ -#ifndef ACL_LOWP_MATMUL_HPP -#define ACL_LOWP_MATMUL_HPP +#ifndef CPU_AARCH64_MATMUL_ACL_LOWP_MATMUL_HPP +#define CPU_AARCH64_MATMUL_ACL_LOWP_MATMUL_HPP #include "arm_compute/runtime/experimental/operators/CpuDequantize.h" #include "arm_compute/runtime/experimental/operators/CpuGEMMLowp.h" #include "arm_compute/runtime/experimental/operators/CpuQuantize.h" -#include "cpu/acl/acl_post_ops.hpp" -#include "cpu/acl/acl_utils.hpp" +#include "cpu/aarch64/acl_post_ops.hpp" +#include "cpu/aarch64/acl_utils.hpp" #include "cpu/matmul/cpu_matmul_pd.hpp" #include "cpu/matmul/matmul_utils.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace matmul { using arm_compute::experimental::MemoryLifetime; @@ -81,9 +81,9 @@ struct acl_lowp_matmul_t : public primitive_t { }; } // namespace matmul -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // ACL_LOWP_MATMUL_HPP +#endif // CPU_AARCH64_MATMUL_ACL_LOWP_MATMUL_HPP diff --git a/src/cpu/acl/matmul/acl_lowp_matmul_sq.cpp b/src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp similarity index 97% rename from src/cpu/acl/matmul/acl_lowp_matmul_sq.cpp rename to src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp index 0884a123ba9..e58b0823360 100644 --- a/src/cpu/acl/matmul/acl_lowp_matmul_sq.cpp +++ b/src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp @@ -14,17 +14,16 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/matmul/acl_lowp_matmul_sq.hpp" +#include "cpu/aarch64/matmul/acl_lowp_matmul_sq.hpp" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "cpu/acl/acl_utils.hpp" -#include "src/cpu/CpuTypes.h" +#include "cpu/aarch64/acl_utils.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace matmul { namespace { @@ -279,10 +278,10 @@ status_t acl_lowp_matmul_sq_t::execute(const exec_ctx_t &ctx) const { if (with_bias) { auto bia_s32_base = scratchpad.get( memory_tracking::names::key_conv_bias_s32_convert); - auto bia_f32_base = CTX_IN_MEM(const arm_compute::float32_t *, DNNL_ARG_BIAS); + auto bia_f32_base = CTX_IN_MEM(const float32_t *, DNNL_ARG_BIAS); const float bias_scale = 1 / (*src_scale * (*wei_scale)); const int num_elements - = almc.bia_tensor_info.total_size() / sizeof(arm_compute::float32_t); + = almc.bia_tensor_info.total_size() / sizeof(float32_t); parallel_nd(num_elements, [&](dim_t e) { const auto b = int32_t(std::round(bia_f32_base[e] * bias_scale)); bia_s32_base[e] = b; @@ -340,7 +339,7 @@ status_t acl_lowp_matmul_sq_t::execute(const exec_ctx_t &ctx) const { return status::success; }; } // namespace matmul -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/matmul/acl_lowp_matmul_sq.hpp b/src/cpu/aarch64/matmul/acl_lowp_matmul_sq.hpp similarity index 92% rename from src/cpu/acl/matmul/acl_lowp_matmul_sq.hpp rename to src/cpu/aarch64/matmul/acl_lowp_matmul_sq.hpp index 24b60ab3450..52b83c6d964 100644 --- a/src/cpu/acl/matmul/acl_lowp_matmul_sq.hpp +++ b/src/cpu/aarch64/matmul/acl_lowp_matmul_sq.hpp @@ -14,12 +14,12 @@ * limitations under the License. *******************************************************************************/ -#ifndef ACL_LOWP_MATMUL_SQ_HPP -#define ACL_LOWP_MATMUL_SQ_HPP +#ifndef CPU_AARCH64_MATMUL_ACL_LOWP_MATMUL_SQ_HPP +#define CPU_AARCH64_MATMUL_ACL_LOWP_MATMUL_SQ_HPP #include -#include "cpu/acl/acl_post_ops.hpp" +#include "cpu/aarch64/acl_post_ops.hpp" #include "cpu/cpu_primitive.hpp" #include "cpu/matmul/cpu_matmul_pd.hpp" #include "cpu/matmul/matmul_utils.hpp" @@ -30,7 +30,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace matmul { struct acl_lowp_matmul_sq_conf_t { @@ -84,9 +84,9 @@ struct acl_lowp_matmul_sq_t : public primitive_t { }; } // namespace matmul -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // ACL_LOWP_MATMUL_SQ_HPP +#endif // CPU_AARCH64_MATMUL_ACL_LOWP_MATMUL_SQ_HPP diff --git a/src/cpu/acl/matmul/acl_matmul.cpp b/src/cpu/aarch64/matmul/acl_matmul.cpp similarity index 98% rename from src/cpu/acl/matmul/acl_matmul.cpp rename to src/cpu/aarch64/matmul/acl_matmul.cpp index bbaa3f37832..8bf7bcd757b 100644 --- a/src/cpu/acl/matmul/acl_matmul.cpp +++ b/src/cpu/aarch64/matmul/acl_matmul.cpp @@ -14,14 +14,14 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/matmul/acl_matmul.hpp" +#include "cpu/aarch64/matmul/acl_matmul.hpp" #include namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace matmul { using namespace data_type; @@ -229,9 +229,10 @@ status_t acl_matmul_t::execute_forward(const exec_ctx_t &ctx) const { // If we have an unfused sum post op, put the result in a scratchpad tensor. // Result will be summed to the dst during acl_post_ops.execute - auto dst_base = use_dst_acc_for_sum ? scratchpad.get( - memory_tracking::names::key_matmul_dst_in_acc_dt) - : CTX_OUT_MEM(data_t *, DNNL_ARG_DST); + auto dst_base = use_dst_acc_for_sum + ? scratchpad.get( + memory_tracking::names::key_matmul_dst_in_acc_dt) + : CTX_OUT_MEM(data_t *, DNNL_ARG_DST); dst_tensor.allocator()->import_memory(dst_base); // Run transpose kernel @@ -377,7 +378,7 @@ template status_t acl_matmul_t::execute_forward( const exec_ctx_t &ctx) const; } // namespace matmul -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/matmul/acl_matmul.hpp b/src/cpu/aarch64/matmul/acl_matmul.hpp similarity index 94% rename from src/cpu/acl/matmul/acl_matmul.hpp rename to src/cpu/aarch64/matmul/acl_matmul.hpp index d78351941c2..cbfc33c5ed8 100644 --- a/src/cpu/acl/matmul/acl_matmul.hpp +++ b/src/cpu/aarch64/matmul/acl_matmul.hpp @@ -17,8 +17,8 @@ #ifndef CPU_AARCH64_MATMUL_ACL_MATMUL_HPP #define CPU_AARCH64_MATMUL_ACL_MATMUL_HPP -#include "cpu/acl/acl_post_ops.hpp" -#include "cpu/acl/matmul/acl_matmul_utils.hpp" +#include "cpu/aarch64/acl_post_ops.hpp" +#include "cpu/aarch64/matmul/acl_matmul_utils.hpp" #include "cpu/matmul/cpu_matmul_pd.hpp" #include @@ -26,7 +26,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace matmul { struct acl_matmul_t : public primitive_t { @@ -66,7 +66,7 @@ struct acl_matmul_t : public primitive_t { }; // acl_matmul_t } // namespace matmul -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/matmul/acl_matmul_utils.cpp b/src/cpu/aarch64/matmul/acl_matmul_utils.cpp similarity index 98% rename from src/cpu/acl/matmul/acl_matmul_utils.cpp rename to src/cpu/aarch64/matmul/acl_matmul_utils.cpp index 11e61715940..8b3becd3e1b 100644 --- a/src/cpu/acl/matmul/acl_matmul_utils.cpp +++ b/src/cpu/aarch64/matmul/acl_matmul_utils.cpp @@ -14,15 +14,15 @@ * limitations under the License. *******************************************************************************/ -#include "cpu/acl/matmul/acl_matmul_utils.hpp" -#include "cpu/acl/acl_utils.hpp" +#include "cpu/aarch64/matmul/acl_matmul_utils.hpp" +#include "cpu/aarch64/acl_utils.hpp" #include "cpu/matmul/gemm_based_common.hpp" #include "cpu/matmul/matmul_utils.hpp" namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { bool batch_dims_have_default_order(const memory_desc_wrapper &mdw) { assert(mdw.is_blocking_desc()); @@ -267,7 +267,7 @@ template status_t init_conf_matmul(acl_matmul_conf_t &, } // namespace acl_matmul_utils -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl diff --git a/src/cpu/acl/matmul/acl_matmul_utils.hpp b/src/cpu/aarch64/matmul/acl_matmul_utils.hpp similarity index 94% rename from src/cpu/acl/matmul/acl_matmul_utils.hpp rename to src/cpu/aarch64/matmul/acl_matmul_utils.hpp index d55cf71263f..c154852a866 100644 --- a/src/cpu/acl/matmul/acl_matmul_utils.hpp +++ b/src/cpu/aarch64/matmul/acl_matmul_utils.hpp @@ -14,8 +14,8 @@ * limitations under the License. *******************************************************************************/ -#ifndef CPU_ACL_MATMUL_UTILS_HPP -#define CPU_ACL_MATMUL_UTILS_HPP +#ifndef CPU_AARCH64_MATMUL_ACL_MATMUL_UTILS_HPP +#define CPU_AARCH64_MATMUL_ACL_MATMUL_UTILS_HPP #include "arm_compute/runtime/experimental/low_level/CpuGemmAssemblyDispatch.h" #include "arm_compute/runtime/experimental/operators/CpuActivation.h" @@ -26,7 +26,7 @@ namespace dnnl { namespace impl { namespace cpu { -namespace acl { +namespace aarch64 { namespace { // Keys are anonymous. So deduce the type automagically. @@ -80,9 +80,9 @@ status_t init_scratchpad(memory_tracking::registrar_t &scratchpad, } // namespace acl_matmul_utils -} // namespace acl +} // namespace aarch64 } // namespace cpu } // namespace impl } // namespace dnnl -#endif // CPU_ACL_MATMUL_UTILS_HPP +#endif // CPU_AARCH64_MATMUL_ACL_MATMUL_UTILS_HPP diff --git a/src/cpu/acl/CMakeLists.txt b/src/cpu/acl/CMakeLists.txt deleted file mode 100644 index abe0a5c49eb..00000000000 --- a/src/cpu/acl/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -#******************************************************************************* -# Copyright 2020-2022 Arm Ltd. and affiliates -# Copyright 2020-2021 FUJITSU LIMITED -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#******************************************************************************* -file(GLOB_RECURSE SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/*.[ch] - ${CMAKE_CURRENT_SOURCE_DIR}/*.[ch]pp - ) -# If the runtime is not THREADPOOL remove threadpool_scheduler sources. -if(NOT DNNL_CPU_RUNTIME STREQUAL "THREADPOOL") - list(APPEND ACL_THREADPOOL_FILES - ${CMAKE_CURRENT_SOURCE_DIR}/acl_threadpool_scheduler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/acl_threadpool_scheduler.hpp - ) - list(REMOVE_ITEM SOURCES ${ACL_THREADPOOL_FILES}) -endif() -set(OBJ_LIB ${DNNL_LIBRARY_NAME}_cpu_acl) -add_library(${OBJ_LIB} OBJECT ${SOURCES}) -set_property(GLOBAL APPEND PROPERTY DNNL_LIB_DEPS - $) -enable_conditional_compilation4(${OBJ_LIB}) \ No newline at end of file diff --git a/src/cpu/cpu_batch_normalization_list.cpp b/src/cpu/cpu_batch_normalization_list.cpp index e56d39c134f..c41de337514 100644 --- a/src/cpu/cpu_batch_normalization_list.cpp +++ b/src/cpu/cpu_batch_normalization_list.cpp @@ -33,8 +33,8 @@ using namespace dnnl::impl::cpu::x64; using namespace dnnl::impl::cpu::aarch64; #endif #if defined(DNNL_AARCH64_USE_ACL) -#include "cpu/acl/acl_batch_normalization.hpp" -using namespace dnnl::impl::cpu::acl; +#include "cpu/aarch64/acl_batch_normalization.hpp" +using namespace dnnl::impl::cpu::aarch64; #endif namespace dnnl { @@ -59,7 +59,7 @@ const std::map> &impl_list_map() { CPU_INSTANCE_AARCH64(jit_uni_batch_normalization_fwd_t, sve_512) CPU_INSTANCE_AARCH64(jit_uni_batch_normalization_fwd_t, sve_256) CPU_INSTANCE_AARCH64(jit_uni_batch_normalization_fwd_t, asimd) - DNNL_AARCH64_ONLY(DNNL_ACL_ONLY(CPU_INSTANCE(acl::acl_batch_normalization_fwd_t))) + DNNL_AARCH64_ACL_ONLY(CPU_INSTANCE(aarch64::acl_batch_normalization_fwd_t)) CPU_INSTANCE(ncsp_batch_normalization_fwd_t, f32) CPU_INSTANCE(ncsp_batch_normalization_fwd_t, bf16) CPU_INSTANCE(ncsp_batch_normalization_fwd_t, f16) diff --git a/src/cpu/cpu_binary_list.cpp b/src/cpu/cpu_binary_list.cpp index 1cb4692f451..c83c39268a3 100644 --- a/src/cpu/cpu_binary_list.cpp +++ b/src/cpu/cpu_binary_list.cpp @@ -28,8 +28,8 @@ using namespace dnnl::impl::cpu::x64; using namespace dnnl::impl::cpu::aarch64; #endif #if defined(DNNL_AARCH64_USE_ACL) -#include "cpu/acl/acl_binary.hpp" -using namespace dnnl::impl::cpu::acl; +#include "cpu/aarch64/acl_binary.hpp" +using namespace dnnl::impl::cpu::aarch64; #endif namespace dnnl { @@ -43,7 +43,7 @@ using namespace dnnl::impl::data_type; const impl_list_item_t impl_list[] = REG_BINARY_P({ CPU_INSTANCE_X64(jit_uni_binary_t) CPU_INSTANCE_AARCH64(jit_uni_binary_t) - CPU_INSTANCE_ACL(acl_binary_t) + CPU_INSTANCE_AARCH64_ACL(acl_binary_t) CPU_INSTANCE(ref_binary_t) /* eol */ nullptr, diff --git a/src/cpu/cpu_convolution_list.cpp b/src/cpu/cpu_convolution_list.cpp index 431f6ba60e3..73076c62cb0 100644 --- a/src/cpu/cpu_convolution_list.cpp +++ b/src/cpu/cpu_convolution_list.cpp @@ -71,11 +71,11 @@ using namespace dnnl::impl::cpu::x64; using namespace dnnl::impl::cpu::aarch64; #endif #if defined(DNNL_AARCH64_USE_ACL) -#include "cpu/acl/acl_depthwise_convolution.hpp" -#include "cpu/acl/acl_gemm_convolution.hpp" -#include "cpu/acl/acl_indirect_gemm_convolution.hpp" -#include "cpu/acl/acl_winograd_convolution.hpp" -using namespace dnnl::impl::cpu::acl; +#include "cpu/aarch64/acl_depthwise_convolution.hpp" +#include "cpu/aarch64/acl_gemm_convolution.hpp" +#include "cpu/aarch64/acl_indirect_gemm_convolution.hpp" +#include "cpu/aarch64/acl_winograd_convolution.hpp" +using namespace dnnl::impl::cpu::aarch64; #endif namespace dnnl { @@ -150,7 +150,7 @@ const std::map> &impl_list_map() CPU_INSTANCE_SSE41(jit_sse41_1x1_convolution_fwd_t) CPU_INSTANCE_AVX2(jit_avx2_convolution_fwd_t) CPU_INSTANCE_SSE41(jit_sse41_convolution_fwd_t) - CPU_INSTANCE_ACL(acl_wino_convolution_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_wino_convolution_fwd_t) CPU_INSTANCE_AARCH64(brdgmm_dw_convolution_fwd_t, sve_512) CPU_INSTANCE_AARCH64(brgemm_1x1_convolution_fwd_t, sve_512) CPU_INSTANCE_AARCH64(brgemm_convolution_fwd_t, sve_512) @@ -168,9 +168,9 @@ const std::map> &impl_list_map() CPU_INSTANCE_AARCH64(brdgmm_dw_convolution_fwd_t, sve_128) CPU_INSTANCE_AARCH64(brdgmm_dw_convolution_fwd_t, sve_256) CPU_INSTANCE_AARCH64(brdgmm_dw_convolution_fwd_t, sve_128) - CPU_INSTANCE_ACL(acl_depthwise_convolution_fwd_t) - CPU_INSTANCE_ACL(acl_indirect_gemm_convolution_fwd_t) - CPU_INSTANCE_ACL(acl_gemm_convolution_fwd_t, f32) + CPU_INSTANCE_AARCH64_ACL(acl_depthwise_convolution_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_indirect_gemm_convolution_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_gemm_convolution_fwd_t, f32) CPU_INSTANCE_AARCH64(brgemm_convolution_fwd_t, sve_256) CPU_INSTANCE_AARCH64(brgemm_1x1_convolution_fwd_t, sve_128) CPU_INSTANCE_AARCH64(brgemm_convolution_fwd_t, sve_128) @@ -243,7 +243,7 @@ const std::map> &impl_list_map() CPU_INSTANCE_AARCH64(brdgmm_dw_convolution_fwd_t, sve_256) CPU_INSTANCE_AARCH64(brdgmm_dw_convolution_fwd_t, sve_128) CPU_INSTANCE_AARCH64(jit_uni_dw_convolution_fwd_t, sve_128, bf16, bf16) - CPU_INSTANCE_ACL(acl_indirect_gemm_convolution_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_indirect_gemm_convolution_fwd_t) CPU_INSTANCE_AARCH64(brgemm_1x1_convolution_fwd_t, sve_256) CPU_INSTANCE_AARCH64(brgemm_convolution_fwd_t, sve_256) CPU_INSTANCE_AARCH64(brgemm_1x1_convolution_fwd_t, sve_128) @@ -279,10 +279,10 @@ const std::map> &impl_list_map() CPU_INSTANCE_AVX512(brgemm_convolution_fwd_t, avx512_core_fp16) CPU_INSTANCE_AVX2(brgemm_1x1_convolution_fwd_t, avx2_vnni_2) CPU_INSTANCE_AVX2(brgemm_convolution_fwd_t, avx2_vnni_2) - CPU_INSTANCE_ACL(acl_wino_convolution_fwd_t) - CPU_INSTANCE_ACL(acl_depthwise_convolution_fwd_t) - CPU_INSTANCE_ACL(acl_indirect_gemm_convolution_fwd_t) - CPU_INSTANCE_ACL(acl_gemm_convolution_fwd_t, f16) + CPU_INSTANCE_AARCH64_ACL(acl_wino_convolution_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_depthwise_convolution_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_indirect_gemm_convolution_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_gemm_convolution_fwd_t, f16) CPU_INSTANCE(ref_convolution_fwd_t) CPU_INSTANCE(ref_fused_convolution_fwd_t) nullptr, @@ -646,7 +646,7 @@ const std::map> &impl_list_map() CPU_INSTANCE_SSE41(jit_uni_x8s8s32x_1x1_convolution_fwd_t, sse41) CPU_INSTANCE_SSE41(jit_uni_x8s8s32x_convolution_fwd_t, sse41) CPU_INSTANCE_AARCH64(jit_sve_512_x8s8s32x_convolution_fwd_t, s8, s8) - CPU_INSTANCE_ACL(acl_gemm_convolution_fwd_t, s8, s8, s8, s32) + CPU_INSTANCE_AARCH64_ACL(acl_gemm_convolution_fwd_t, s8, s8, s8, s32) CPU_INSTANCE_AARCH64(brgemm_1x1_convolution_fwd_t, sve_256) CPU_INSTANCE_AARCH64(brgemm_convolution_fwd_t, sve_256) CPU_INSTANCE(gemm_x8s8s32x_convolution_fwd_t) diff --git a/src/cpu/cpu_deconvolution_list.cpp b/src/cpu/cpu_deconvolution_list.cpp index 917eaa28ace..e916c9249f3 100644 --- a/src/cpu/cpu_deconvolution_list.cpp +++ b/src/cpu/cpu_deconvolution_list.cpp @@ -33,8 +33,8 @@ using namespace dnnl::impl::cpu::x64; using namespace dnnl::impl::cpu::aarch64; #endif #if defined(DNNL_AARCH64_USE_ACL) -#include "cpu/acl/acl_deconvolution.hpp" -using namespace dnnl::impl::cpu::acl; +#include "cpu/aarch64/acl_deconvolution.hpp" +using namespace dnnl::impl::cpu::aarch64; #endif namespace dnnl { @@ -70,7 +70,7 @@ const std::map> &impl_list_map() { CPU_INSTANCE_AARCH64(brgemm_deconvolution_fwd_t, sve_256) CPU_INSTANCE_AARCH64(brgemm_deconvolution_fwd_t, sve_128) CPU_INSTANCE_AARCH64(jit_sve_512_core_x8s8s32x_deconvolution_fwd_t) - CPU_INSTANCE_ACL(acl_deconvolution_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_deconvolution_fwd_t) CPU_INSTANCE(ref_deconvolution_fwd_t) nullptr, }}, diff --git a/src/cpu/cpu_eltwise_list.cpp b/src/cpu/cpu_eltwise_list.cpp index eec7a054399..c4164953f9f 100644 --- a/src/cpu/cpu_eltwise_list.cpp +++ b/src/cpu/cpu_eltwise_list.cpp @@ -30,8 +30,8 @@ using namespace dnnl::impl::cpu::x64; using namespace dnnl::impl::cpu::aarch64; #endif #if defined(DNNL_AARCH64_USE_ACL) -#include "cpu/acl/acl_eltwise.hpp" -using namespace dnnl::impl::cpu::acl; +#include "cpu/aarch64/acl_eltwise.hpp" +using namespace dnnl::impl::cpu::aarch64; #endif namespace dnnl { @@ -76,7 +76,7 @@ const std::map> &impl_list_map() { CPU_INSTANCE_AARCH64(jit_uni_eltwise_int_fwd_t, sve_512, s32) CPU_INSTANCE_AARCH64(jit_uni_eltwise_int_fwd_t, sve_512, s8) CPU_INSTANCE_AARCH64(jit_uni_eltwise_int_fwd_t, sve_512, u8) - CPU_INSTANCE_ACL(acl_eltwise_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_eltwise_fwd_t) CPU_INSTANCE(ref_eltwise_fwd_t, f32) CPU_INSTANCE(ref_eltwise_fwd_t, bf16) // CPU_INSTANCE(ref_eltwise_fwd_t, f16) diff --git a/src/cpu/cpu_engine.hpp b/src/cpu/cpu_engine.hpp index cbaff592afa..8fc890d24c5 100644 --- a/src/cpu/cpu_engine.hpp +++ b/src/cpu/cpu_engine.hpp @@ -30,7 +30,7 @@ #include "cpu/platform.hpp" #if DNNL_AARCH64 && defined(DNNL_AARCH64_USE_ACL) -#include "cpu/acl/acl_thread.hpp" +#include "cpu/aarch64/acl_thread.hpp" #endif #define CPU_INSTANCE_IMPL(...) \ @@ -46,8 +46,8 @@ #define CPU_INSTANCE_AVX512(...) REG_AVX512_ISA(CPU_INSTANCE(__VA_ARGS__)) #define CPU_INSTANCE_AMX(...) REG_AMX_ISA(CPU_INSTANCE(__VA_ARGS__)) #define CPU_INSTANCE_AARCH64(...) DNNL_AARCH64_ONLY(CPU_INSTANCE(__VA_ARGS__)) -#define CPU_INSTANCE_ARM(...) DNNL_ARM_ONLY(CPU_INSTANCE(__VA_ARGS__)) -#define CPU_INSTANCE_ACL(...) DNNL_ACL_ONLY(CPU_INSTANCE(__VA_ARGS__)) +#define CPU_INSTANCE_AARCH64_ACL(...) \ + DNNL_AARCH64_ACL_ONLY(CPU_INSTANCE(__VA_ARGS__)) #define CPU_INSTANCE_RV64GCV(...) DNNL_RV64GCV_ONLY(CPU_INSTANCE(__VA_ARGS__)) #define CPU_INSTANCE_RV64GCV_ZVFH(...) \ DNNL_RV64GCV_ZVFH_ONLY(CPU_INSTANCE(__VA_ARGS__)) @@ -164,7 +164,7 @@ class cpu_engine_factory_t : public engine_factory_t { engine_kind::cpu, get_cpu_native_runtime(), 0)); #if DNNL_AARCH64 && defined(DNNL_AARCH64_USE_ACL) - dnnl::impl::cpu::acl::acl_thread_utils::set_acl_threading(); + dnnl::impl::cpu::aarch64::acl_thread_utils::set_acl_threading(); #endif return status::success; }; diff --git a/src/cpu/cpu_inner_product_list.cpp b/src/cpu/cpu_inner_product_list.cpp index 96047c232c4..d96c619d860 100644 --- a/src/cpu/cpu_inner_product_list.cpp +++ b/src/cpu/cpu_inner_product_list.cpp @@ -28,8 +28,8 @@ #include "cpu/x64/matmul_inner_product.hpp" using namespace dnnl::impl::cpu::x64; #elif defined(DNNL_AARCH64_USE_ACL) -#include "cpu/acl/acl_inner_product.hpp" -using namespace dnnl::impl::cpu::acl; +#include "cpu/aarch64/acl_inner_product.hpp" +using namespace dnnl::impl::cpu::aarch64; #endif namespace dnnl { @@ -57,7 +57,7 @@ const std::map> &impl_list_map() CPU_INSTANCE_AMX(brgemm_inner_product_fwd_t, avx512_core_amx) // bf32 CPU_INSTANCE_AVX512(brgemm_inner_product_fwd_t, avx512_core) CPU_INSTANCE_AVX2(brgemm_inner_product_fwd_t, avx2) - CPU_INSTANCE_ACL(acl_inner_product_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_inner_product_fwd_t) CPU_INSTANCE(gemm_inner_product_fwd_t, f32) CPU_INSTANCE(ref_inner_product_fwd_t) nullptr, @@ -130,7 +130,7 @@ const std::map> &impl_list_map() CPU_INSTANCE_AVX512(brgemm_inner_product_fwd_t, avx512_core_bf16) CPU_INSTANCE_AVX512(gemm_bf16_inner_product_fwd_t, bf16) CPU_INSTANCE_AVX2(brgemm_inner_product_fwd_t, avx2_vnni_2) - CPU_INSTANCE_ACL(acl_inner_product_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_inner_product_fwd_t) CPU_INSTANCE(ref_inner_product_fwd_t) nullptr, }}, @@ -230,7 +230,7 @@ const std::map> &impl_list_map() CPU_INSTANCE_AVX512(brgemm_inner_product_fwd_t, avx10_2_512) CPU_INSTANCE_AVX512(brgemm_inner_product_fwd_t, avx512_core_fp16) CPU_INSTANCE_AVX2(brgemm_inner_product_fwd_t, avx2_vnni_2) - CPU_INSTANCE_ACL(acl_inner_product_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_inner_product_fwd_t) CPU_INSTANCE(ref_inner_product_fwd_t) nullptr, }}, @@ -240,7 +240,7 @@ const std::map> &impl_list_map() * in fp32 and weights are in bf16 */ {{forward, f32, bf16, f32}, { - CPU_INSTANCE_ACL(acl_inner_product_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_inner_product_fwd_t) nullptr, }}, diff --git a/src/cpu/cpu_layer_normalization_list.cpp b/src/cpu/cpu_layer_normalization_list.cpp index 4f1e5d178a2..b521fb63fba 100644 --- a/src/cpu/cpu_layer_normalization_list.cpp +++ b/src/cpu/cpu_layer_normalization_list.cpp @@ -24,8 +24,8 @@ #include "cpu/x64/jit_uni_layer_normalization.hpp" using namespace dnnl::impl::cpu::x64; #elif defined(DNNL_AARCH64_USE_ACL) -#include "cpu/acl/acl_layer_normalization.hpp" -using namespace dnnl::impl::cpu::acl; +#include "cpu/aarch64/acl_layer_normalization.hpp" +using namespace dnnl::impl::cpu::aarch64; #endif namespace dnnl { @@ -41,7 +41,7 @@ const std::map> &impl_list_map() { static const std::map> the_map = REG_LNORM_P({ {{forward}, { CPU_INSTANCE_X64(jit_uni_layer_normalization_fwd_t) - CPU_INSTANCE_ACL(acl_layer_normalization_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_layer_normalization_fwd_t) CPU_INSTANCE(simple_layer_normalization_fwd_t) CPU_INSTANCE(ref_layer_normalization_fwd_t) nullptr, diff --git a/src/cpu/cpu_pooling_list.cpp b/src/cpu/cpu_pooling_list.cpp index 6284cdeb858..79afb1ccdd2 100644 --- a/src/cpu/cpu_pooling_list.cpp +++ b/src/cpu/cpu_pooling_list.cpp @@ -37,8 +37,8 @@ using namespace dnnl::impl::cpu::rv64; #endif // DNNL_RISCV_USE_RVV_INTRINSICS #endif #if defined(DNNL_AARCH64_USE_ACL) -#include "cpu/acl/acl_pooling.hpp" -using namespace dnnl::impl::cpu::acl; +#include "cpu/aarch64/acl_pooling.hpp" +using namespace dnnl::impl::cpu::aarch64; #endif namespace dnnl { @@ -66,7 +66,7 @@ const std::map> &impl_list_map() { CPU_INSTANCE_X64(jit_uni_pooling_fwd_t, sse41, f32) CPU_INSTANCE_AARCH64(jit_uni_pooling_fwd_t, sve_512, f32) CPU_INSTANCE_AARCH64(jit_uni_pooling_fwd_t, sve_256, f32) - CPU_INSTANCE_ACL(acl_pooling_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_pooling_fwd_t) CPU_INSTANCE_RV64GCV(riscv_nchw_pooling_fwd_t) CPU_INSTANCE(nchw_pooling_fwd_t, bf16) CPU_INSTANCE(nchw_pooling_fwd_t, f32) diff --git a/src/cpu/cpu_prelu_list.cpp b/src/cpu/cpu_prelu_list.cpp index e55f18203b8..c88974bc323 100644 --- a/src/cpu/cpu_prelu_list.cpp +++ b/src/cpu/cpu_prelu_list.cpp @@ -24,8 +24,8 @@ using namespace dnnl::impl::cpu::x64; #elif defined(DNNL_AARCH64_USE_ACL) -#include "cpu/acl/acl_prelu.hpp" -using namespace dnnl::impl::cpu::acl; +#include "cpu/aarch64/acl_prelu.hpp" +using namespace dnnl::impl::cpu::aarch64; #endif namespace dnnl { @@ -41,7 +41,7 @@ const std::map> &impl_list_map() { static const std::map> the_map = REG_PRELU_P({ {{forward}, { CPU_INSTANCE_X64(jit_prelu_fwd_t) - CPU_INSTANCE_ACL(acl_prelu_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_prelu_fwd_t) CPU_INSTANCE(ref_prelu_fwd_t) nullptr, }}, diff --git a/src/cpu/cpu_softmax_list.cpp b/src/cpu/cpu_softmax_list.cpp index c4c2954b235..efd4e6a28db 100644 --- a/src/cpu/cpu_softmax_list.cpp +++ b/src/cpu/cpu_softmax_list.cpp @@ -29,8 +29,8 @@ using namespace dnnl::impl::cpu::x64; using namespace dnnl::impl::cpu::aarch64; #endif #if defined(DNNL_AARCH64_USE_ACL) -#include "cpu/acl/acl_softmax.hpp" -using namespace dnnl::impl::cpu::acl; +#include "cpu/aarch64/acl_softmax.hpp" +using namespace dnnl::impl::cpu::aarch64; #endif namespace dnnl { @@ -52,7 +52,7 @@ const std::map> &impl_list_map() { CPU_INSTANCE_AARCH64(jit_uni_softmax_fwd_t, sve_512) CPU_INSTANCE_AARCH64(jit_uni_softmax_fwd_t, sve_256) CPU_INSTANCE_AARCH64(jit_uni_softmax_fwd_t, sve_128) - CPU_INSTANCE_ACL(acl_softmax_fwd_t) + CPU_INSTANCE_AARCH64_ACL(acl_softmax_fwd_t) CPU_INSTANCE(ref_softmax_fwd_t) nullptr, }}, diff --git a/src/cpu/matmul/cpu_matmul_list.cpp b/src/cpu/matmul/cpu_matmul_list.cpp index e0e98c23e91..5107770b453 100644 --- a/src/cpu/matmul/cpu_matmul_list.cpp +++ b/src/cpu/matmul/cpu_matmul_list.cpp @@ -45,11 +45,11 @@ using namespace dnnl::impl::cpu::rv64; #endif #ifdef DNNL_AARCH64_USE_ACL -#include "cpu/acl/matmul/acl_lowp_matmul.hpp" -#include "cpu/acl/matmul/acl_lowp_matmul_sq.hpp" -#include "cpu/acl/matmul/acl_matmul.hpp" -using namespace dnnl::impl::cpu::acl::matmul; -using namespace dnnl::impl::cpu::acl; +#include "cpu/aarch64/matmul/acl_lowp_matmul.hpp" +#include "cpu/aarch64/matmul/acl_lowp_matmul_sq.hpp" +#include "cpu/aarch64/matmul/acl_matmul.hpp" +using namespace dnnl::impl::cpu::aarch64::matmul; +using namespace dnnl::impl::cpu::aarch64; #endif namespace dnnl { @@ -63,9 +63,9 @@ using namespace dnnl::impl::cpu::matmul; // clang-format off const impl_list_item_t impl_list[] = REG_MATMUL_P({ CPU_INSTANCE_AARCH64(brgemm_matmul_t, sve_512) - CPU_INSTANCE_ACL(acl_lowp_matmul_sq_t) - CPU_INSTANCE_ACL(acl_lowp_matmul_t) - CPU_INSTANCE_ACL(acl_matmul_t) + CPU_INSTANCE_AARCH64_ACL(acl_lowp_matmul_sq_t) + CPU_INSTANCE_AARCH64_ACL(acl_lowp_matmul_t) + CPU_INSTANCE_AARCH64_ACL(acl_matmul_t) CPU_INSTANCE_AARCH64(jit_bf16_matmul_t) CPU_INSTANCE_AARCH64(brgemm_matmul_t, sve_256) CPU_INSTANCE_AARCH64(jit_int8_matmul_t) diff --git a/src/cpu/platform.hpp b/src/cpu/platform.hpp index 1052367b028..b6645725dd5 100644 --- a/src/cpu/platform.hpp +++ b/src/cpu/platform.hpp @@ -28,7 +28,6 @@ // - DNNL_X64 // - DNNL_X86 // - DNNL_AARCH64 -// - DNNL_ARM // - DNNL_PPC64 // - DNNL_S390X // - DNNL_RV64 @@ -36,8 +35,8 @@ // Target architecture macro is set to 1, others to 0. All macros are defined. #if defined(DNNL_X64) + defined(DNNL_AARCH64) + defined(DNNL_PPC64) \ - + defined(DNNL_S390X) + defined(DNNL_RV64) + defined(DNNL_ARM) \ - + defined(DNNL_X86) + defined(DNNL_ARCH_GENERIC) \ + + defined(DNNL_S390X) + defined(DNNL_RV64) + defined(DNNL_X86) \ + + defined(DNNL_ARCH_GENERIC) \ == 0 #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) \ || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) @@ -49,8 +48,6 @@ #define DNNL_X86 1 #elif defined(__aarch64__) || defined(_M_ARM64) #define DNNL_AARCH64 1 -#elif defined(__arm__) || defined(_M_ARM) || defined(__ARMEL__) -#define DNNL_ARM 1 #elif defined(__powerpc64__) || defined(__PPC64__) || defined(_ARCH_PPC64) #define DNNL_PPC64 1 #elif defined(__s390x__) @@ -63,8 +60,8 @@ #endif // defined(DNNL_X64) + ... == 0 #if defined(DNNL_X64) + defined(DNNL_AARCH64) + defined(DNNL_PPC64) \ - + defined(DNNL_S390X) + defined(DNNL_RV64) + defined(DNNL_ARM) \ - + defined(DNNL_X86) + defined(DNNL_ARCH_GENERIC) \ + + defined(DNNL_S390X) + defined(DNNL_RV64) + defined(DNNL_X86) \ + + defined(DNNL_ARCH_GENERIC) \ != 1 #error One and only one architecture should be defined at a time #endif @@ -78,9 +75,6 @@ #if !defined(DNNL_AARCH64) #define DNNL_AARCH64 0 #endif -#if !defined(DNNL_ARM) -#define DNNL_ARM 0 -#endif #if !defined(DNNL_PPC64) #define DNNL_PPC64 0 #endif @@ -100,7 +94,6 @@ #define DNNL_PPC64_ONLY(...) Z_CONDITIONAL_DO(DNNL_PPC64, __VA_ARGS__) #define DNNL_S390X_ONLY(...) Z_CONDITIONAL_DO(DNNL_S390X_ONLY, __VA_ARGS__) #define DNNL_AARCH64_ONLY(...) Z_CONDITIONAL_DO(DNNL_AARCH64, __VA_ARGS__) -#define DNNL_ARM_ONLY(...) Z_CONDITIONAL_DO(DNNL_ARM, __VA_ARGS__) // Using RISC-V implementations optimized with RVV Intrinsics is optional for RISC-V builds // and can be enabled with DNNL_ARCH_OPT_FLAGS="-march=" option, where @@ -122,11 +115,11 @@ #define DNNL_NON_X64_ONLY(...) Z_CONDITIONAL_DO(Z_NOT(DNNL_X64), __VA_ARGS__) // Using Arm Compute Library kernels is optional for AArch64 builds -// and can be enabled with the DNNL_USE_ACL CMake option +// and can be enabled with the DNNL_AARCH64_USE_ACL CMake option #if defined(DNNL_AARCH64) && defined(DNNL_AARCH64_USE_ACL) -#define DNNL_ACL_ONLY(...) __VA_ARGS__ +#define DNNL_AARCH64_ACL_ONLY(...) __VA_ARGS__ #else -#define DNNL_ACL_ONLY(...) +#define DNNL_AARCH64_ACL_ONLY(...) #endif // Primitive ISA section for configuring knobs. diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp index 9b6d5cd4f2d..6c2c76a8451 100644 --- a/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp @@ -36,7 +36,7 @@ const impl_list_map_t ®ular_f32_bf16_impl_list_map() { DNNL_NON_X64_ONLY(REG_SR_BIDIR(f32, any, bf16, nChw16c)) DNNL_NON_X64_ONLY(REG_SR_BIDIR(f32, any, bf16, nCdhw16c)) - DNNL_AARCH64_ONLY(DNNL_ACL_ONLY(CPU_REORDER_INSTANCE(acl::acl_reorder_fwd_t))) + DNNL_AARCH64_ACL_ONLY(CPU_REORDER_INSTANCE(aarch64::acl_reorder_fwd_t)) DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t)) DNNL_NON_X64_ONLY(REG_SR(f32, oihw, bf16, OIhw8i16o2i, fmt_order::keep)) diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp index b7e34aaf92e..dfe6e96553d 100644 --- a/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp @@ -33,7 +33,7 @@ const impl_list_map_t ®ular_f32_f32_impl_list_map() { DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_blk_reorder_t)) DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_uni_reorder_t)) - DNNL_AARCH64_ONLY(DNNL_ACL_ONLY(CPU_REORDER_INSTANCE(acl::acl_reorder_fwd_t))) + DNNL_AARCH64_ACL_ONLY(CPU_REORDER_INSTANCE(aarch64::acl_reorder_fwd_t)) DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::brgemm_matmul_copy_reorder_t)) DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64_jit_blk_reorder_t)) DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64_jit_uni_reorder_t)) @@ -85,7 +85,7 @@ const impl_list_map_t ®ular_f32_f32_impl_list_map() { DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_blk_reorder_t)) DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_uni_reorder_t)) - DNNL_AARCH64_ONLY(DNNL_ACL_ONLY(CPU_REORDER_INSTANCE(acl::acl_reorder_fwd_t))) + DNNL_AARCH64_ACL_ONLY(CPU_REORDER_INSTANCE(aarch64::acl_reorder_fwd_t)) DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64_jit_blk_reorder_t)) DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64_jit_uni_reorder_t)) diff --git a/tests/benchdnn/rnn/rnn.cpp b/tests/benchdnn/rnn/rnn.cpp index 8fcf2b4385c..ba325bda930 100644 --- a/tests/benchdnn/rnn/rnn.cpp +++ b/tests/benchdnn/rnn/rnn.cpp @@ -816,7 +816,7 @@ void skip_unimplemented_prb(const prb_t *prb_, res_t *res) { return; } -#ifdef DNNL_USE_ACL +#ifdef DNNL_AARCH64_USE_ACL const bool is_acl_f16_not_ok = prb.cfg[SRC_LAYER].dt == dnnl_f16 && dnnl::impl::cpu::platform::has_data_type_support(dnnl_f16); if (is_acl_f16_not_ok) { diff --git a/tests/benchdnn/softmax/softmax.cpp b/tests/benchdnn/softmax/softmax.cpp index 2a64e0b787f..66bd2a1b0da 100644 --- a/tests/benchdnn/softmax/softmax.cpp +++ b/tests/benchdnn/softmax/softmax.cpp @@ -266,7 +266,7 @@ void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind, const float trh_coeff_bwd = (prb->dir & FLAG_FWD) ? 1.f : 4.f; const float trh_f32 = trh_coeff_log * trh_coeff_bwd * trh_coeff_f32 * epsilon_dt(trh_dt); -#if defined(DNNL_USE_ACL) || defined(DNNL_SYCL_HIP) || defined(DNNL_SYCL_CUDA) +#if defined(DNNL_AARCH64) || defined(DNNL_SYCL_HIP) || defined(DNNL_SYCL_CUDA) // MIOpen and ACL softmax accumulate in F16, but oneDNN now expects accumulation in // F32, this partially reverts 6727bbe8. For more information on ACL softmax, see // https://github.com/uxlfoundation/oneDNN/issues/1819 @@ -306,7 +306,7 @@ void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind, const auto softmax_add_check = [&](const compare::compare_t::driver_check_func_args_t &args) { -#if defined(DNNL_USE_ACL) +#if defined(DNNL_AARCH64_USE_ACL) auto diff_trh = epsilon_dt(args.dt); #else auto diff_trh = epsilon_dt(dnnl_f32);