From 620a246f49b632dd03f67cc292dc6150c32bdb98 Mon Sep 17 00:00:00 2001
From: Erik Lundell <erik.lundell@arm.com>
Date: Fri, 12 Jun 2026 10:25:44 +0200
Subject: [PATCH] Cortex-M backend: Minimize scope of cmsis_nn dependency.

For Ethos-U flows, you might want to run only the ReplaceQuantNodesPass
pass which doesn't require the cmsis_nn dependency. Since the install
is currently not trivial, we shouldn't force people to do it when
not needed.
Right now, all passes are imported when importing that pass,
triggering importing cmsis_nn.

- Move dependency check to only trigger when cmsis_nn is imported.
- Do this by wrapping cmsis_nn, taking the chance to add typing.
- Lazy load passes to not require importing all passes.

Tested by running
from executorch.backends.cortex_m.passes import ReplaceQuantNodesPass

Before the patch this triggers an error if cmsis_nn is not installed.
After the patch, it doesn't.

Signed-off-by: Erik Lundell <erik.lundell@arm.com>
Change-Id: Ic674509b09355145cdaa601008ba19074bda0283
---
 backends/cortex_m/TARGETS                     |  13 +-
 backends/cortex_m/library/__init__.py         |   4 +
 backends/cortex_m/library/cmsis_nn.py         | 279 ++++++++++++++++++
 backends/cortex_m/passes/BUCK                 |   2 +
 .../cortex_m/passes/aten_to_cortex_m_pass.py  |   2 +-
 .../cortex_m/passes/scratch_buffer_sizes.py   |   2 +-
 backends/cortex_m/target_config.py            |   3 +-
 .../cortex_m/test/misc/test_cmsis_pybind.py   |   3 +-
 .../cortex_m/test/misc/test_target_config.py  |   3 +-
 backends/cortex_m/test/ops/test_avg_pool2d.py |   2 +-
 10 files changed, 305 insertions(+), 8 deletions(-)
 create mode 100644 backends/cortex_m/library/__init__.py
 create mode 100644 backends/cortex_m/library/cmsis_nn.py

diff --git a/backends/cortex_m/TARGETS b/backends/cortex_m/TARGETS
index b84add05516..1b73bb03bfc 100644
--- a/backends/cortex_m/TARGETS
+++ b/backends/cortex_m/TARGETS
@@ -20,12 +20,23 @@ python_library(
     ],
 )
 
+python_library(
+    name = "cmsis_nn",
+    srcs = [
+        "library/__init__.py",
+        "library/cmsis_nn.py",
+    ],
+    deps = [
+        "fbsource//third-party/cmsis-nn:cmsis_nn_py",
+    ],
+)
+
 python_library(
     name = "target_config",
     srcs = [
         "target_config.py",
     ],
     deps = [
-        "fbsource//third-party/cmsis-nn:cmsis_nn_py",
+        ":cmsis_nn",
     ],
 )
diff --git a/backends/cortex_m/library/__init__.py b/backends/cortex_m/library/__init__.py
new file mode 100644
index 00000000000..19ebb35e5f2
--- /dev/null
+++ b/backends/cortex_m/library/__init__.py
@@ -0,0 +1,4 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/backends/cortex_m/library/cmsis_nn.py b/backends/cortex_m/library/cmsis_nn.py
new file mode 100644
index 00000000000..791ffdf754b
--- /dev/null
+++ b/backends/cortex_m/library/cmsis_nn.py
@@ -0,0 +1,279 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from __future__ import annotations
+
+from types import ModuleType
+from typing import Any, cast, ClassVar, Sequence, TYPE_CHECKING
+
+_cmsis_nn: ModuleType | None = None
+_cmsis_nn_import_error: ModuleNotFoundError | None = None
+
+
+class _EnumValue:
+    def __init__(self, enum_name: str, name: str, value: int) -> None:
+        self._enum_name = enum_name
+        self.name = name
+        self.value = value
+
+    def __repr__(self) -> str:
+        return f"<{self._enum_name}.{self.name}: {self.value}>"
+
+    def __str__(self) -> str:
+        return f"{self._enum_name}.{self.name}"
+
+
+class Backend:
+    MVE: ClassVar[Backend]
+    DSP: ClassVar[Backend]
+    SCALAR: ClassVar[Backend]
+
+    name: str
+    value: int
+
+
+Backend.MVE = cast(Backend, _EnumValue("Backend", "MVE", 0))
+Backend.DSP = cast(Backend, _EnumValue("Backend", "DSP", 1))
+Backend.SCALAR = cast(Backend, _EnumValue("Backend", "SCALAR", 2))
+
+
+class CortexM:
+    M0: ClassVar[CortexM]
+    M0PLUS: ClassVar[CortexM]
+    M3: ClassVar[CortexM]
+    M4: ClassVar[CortexM]
+    M7: ClassVar[CortexM]
+    M23: ClassVar[CortexM]
+    M33: ClassVar[CortexM]
+    M35P: ClassVar[CortexM]
+    M55: ClassVar[CortexM]
+    M85: ClassVar[CortexM]
+
+    name: str
+    value: int
+
+
+CortexM.M0 = cast(CortexM, _EnumValue("CortexM", "M0", 0))
+CortexM.M0PLUS = cast(CortexM, _EnumValue("CortexM", "M0PLUS", 1))
+CortexM.M3 = cast(CortexM, _EnumValue("CortexM", "M3", 2))
+CortexM.M4 = cast(CortexM, _EnumValue("CortexM", "M4", 3))
+CortexM.M7 = cast(CortexM, _EnumValue("CortexM", "M7", 4))
+CortexM.M23 = cast(CortexM, _EnumValue("CortexM", "M23", 5))
+CortexM.M33 = cast(CortexM, _EnumValue("CortexM", "M33", 6))
+CortexM.M35P = cast(CortexM, _EnumValue("CortexM", "M35P", 7))
+CortexM.M55 = cast(CortexM, _EnumValue("CortexM", "M55", 8))
+CortexM.M85 = cast(CortexM, _EnumValue("CortexM", "M85", 9))
+
+
+class DataType:
+    A8W4: ClassVar[DataType]
+    A8W8: ClassVar[DataType]
+    A16W8: ClassVar[DataType]
+
+    name: str
+    value: int
+
+
+DataType.A8W4 = cast(DataType, _EnumValue("DataType", "A8W4", 0))
+DataType.A8W8 = cast(DataType, _EnumValue("DataType", "A8W8", 1))
+DataType.A16W8 = cast(DataType, _EnumValue("DataType", "A16W8", 2))
+
+
+if not TYPE_CHECKING:
+    try:
+        import cmsis_nn as _real_cmsis_nn  # type: ignore[import-not-found, import-untyped]
+    except ModuleNotFoundError as exc:
+        if exc.name != "cmsis_nn":
+            raise
+        _cmsis_nn_import_error = exc
+    else:
+        _cmsis_nn = _real_cmsis_nn
+        Backend = _real_cmsis_nn.Backend
+        CortexM = _real_cmsis_nn.CortexM
+        DataType = _real_cmsis_nn.DataType
+
+
+def _missing_dependencies_error() -> ModuleNotFoundError:
+    return ModuleNotFoundError(
+        "Cortex-M backend dependencies are not installed. "
+        "Install by running `examples/arm/setup.sh --i-agree-to-the-contained-eula`, "
+        "or pip install from the CMSIS-NN repo."
+    )
+
+
+def _require_cmsis_nn() -> ModuleType:
+    if _cmsis_nn is None:
+        raise _missing_dependencies_error() from _cmsis_nn_import_error
+    return _cmsis_nn
+
+
+def resolve_backend(cpu: CortexM) -> Backend:
+    return _require_cmsis_nn().resolve_backend(cpu)
+
+
+def convolve_wrapper_buffer_size(
+    backend: Backend,
+    data_type: DataType,
+    *,
+    input_nhwc: Sequence[int],
+    filter_nhwc: Sequence[int],
+    output_nhwc: Sequence[int],
+    padding_hw: Sequence[int],
+    stride_hw: Sequence[int],
+    dilation_hw: Sequence[int],
+    input_offset: int = 0,
+    output_offset: int = 0,
+    activation_min: int = -128,
+    activation_max: int = 127,
+) -> int:
+    return _require_cmsis_nn().convolve_wrapper_buffer_size(
+        backend,
+        data_type,
+        input_nhwc=input_nhwc,
+        filter_nhwc=filter_nhwc,
+        output_nhwc=output_nhwc,
+        padding_hw=padding_hw,
+        stride_hw=stride_hw,
+        dilation_hw=dilation_hw,
+        input_offset=input_offset,
+        output_offset=output_offset,
+        activation_min=activation_min,
+        activation_max=activation_max,
+    )
+
+
+def depthwise_conv_wrapper_buffer_size(
+    backend: Backend,
+    data_type: DataType,
+    *,
+    input_nhwc: Sequence[int],
+    filter_nhwc: Sequence[int],
+    output_nhwc: Sequence[int],
+    padding_hw: Sequence[int],
+    stride_hw: Sequence[int],
+    dilation_hw: Sequence[int],
+    ch_mult: int,
+    input_offset: int = 0,
+    output_offset: int = 0,
+    activation_min: int = -128,
+    activation_max: int = 127,
+) -> int:
+    return _require_cmsis_nn().depthwise_conv_wrapper_buffer_size(
+        backend,
+        data_type,
+        input_nhwc=input_nhwc,
+        filter_nhwc=filter_nhwc,
+        output_nhwc=output_nhwc,
+        padding_hw=padding_hw,
+        stride_hw=stride_hw,
+        dilation_hw=dilation_hw,
+        ch_mult=ch_mult,
+        input_offset=input_offset,
+        output_offset=output_offset,
+        activation_min=activation_min,
+        activation_max=activation_max,
+    )
+
+
+def fully_connected_buffer_size(
+    backend: Backend,
+    data_type: DataType,
+    *,
+    filter_nhwc: Sequence[int],
+) -> int:
+    return _require_cmsis_nn().fully_connected_buffer_size(
+        backend,
+        data_type,
+        filter_nhwc=filter_nhwc,
+    )
+
+
+def transpose_conv_buffer_size(
+    backend: Backend,
+    data_type: DataType,
+    *,
+    input_nhwc: Sequence[int],
+    filter_nhwc: Sequence[int],
+    output_nhwc: Sequence[int],
+    padding_hw: Sequence[int],
+    stride_hw: Sequence[int],
+    dilation_hw: Sequence[int],
+    padding_offsets_hw: Sequence[int] = (0, 0),
+    input_offset: int = 0,
+    output_offset: int = 0,
+    activation_min: int = -128,
+    activation_max: int = 127,
+) -> int:
+    return _require_cmsis_nn().transpose_conv_buffer_size(
+        backend,
+        data_type,
+        input_nhwc=input_nhwc,
+        filter_nhwc=filter_nhwc,
+        output_nhwc=output_nhwc,
+        padding_hw=padding_hw,
+        stride_hw=stride_hw,
+        dilation_hw=dilation_hw,
+        padding_offsets_hw=padding_offsets_hw,
+        input_offset=input_offset,
+        output_offset=output_offset,
+        activation_min=activation_min,
+        activation_max=activation_max,
+    )
+
+
+def transpose_conv_reverse_conv_buffer_size(
+    backend: Backend,
+    data_type: DataType,
+    *,
+    input_nhwc: Sequence[int],
+    filter_nhwc: Sequence[int],
+    padding_hw: Sequence[int],
+    stride_hw: Sequence[int],
+    dilation_hw: Sequence[int] = (1, 1),
+    padding_offsets_hw: Sequence[int] = (0, 0),
+    input_offset: int = 0,
+    output_offset: int = 0,
+    activation_min: int = -128,
+    activation_max: int = 127,
+) -> int:
+    return _require_cmsis_nn().transpose_conv_reverse_conv_buffer_size(
+        backend,
+        data_type,
+        input_nhwc=input_nhwc,
+        filter_nhwc=filter_nhwc,
+        padding_hw=padding_hw,
+        stride_hw=stride_hw,
+        dilation_hw=dilation_hw,
+        padding_offsets_hw=padding_offsets_hw,
+        input_offset=input_offset,
+        output_offset=output_offset,
+        activation_min=activation_min,
+        activation_max=activation_max,
+    )
+
+
+def avgpool_buffer_size(
+    backend: Backend,
+    data_type: DataType,
+    *,
+    dim_dst_width: int,
+    ch_src: int,
+) -> int:
+    return _require_cmsis_nn().avgpool_buffer_size(
+        backend,
+        data_type,
+        dim_dst_width=dim_dst_width,
+        ch_src=ch_src,
+    )
+
+
+def __getattr__(name: str) -> Any:
+    return getattr(_require_cmsis_nn(), name)
+
+
+def __dir__() -> list[str]:
+    cmsis_names = set() if _cmsis_nn is None else set(dir(_cmsis_nn))
+    return sorted(set(globals()) | cmsis_names)
diff --git a/backends/cortex_m/passes/BUCK b/backends/cortex_m/passes/BUCK
index 20444f16718..c792583f657 100644
--- a/backends/cortex_m/passes/BUCK
+++ b/backends/cortex_m/passes/BUCK
@@ -1,6 +1,7 @@
 load("@fbcode_macros//build_defs:build_file_migration.bzl", "fbcode_target", "non_fbcode_target")
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -40,6 +41,7 @@ fbcode_target(_kind = runtime.python_library,
     deps=[
         "//caffe2:torch",
         "//executorch/backends/arm/_passes:passes",
+        "//executorch/backends/cortex_m:cmsis_nn",
         "//executorch/backends/cortex_m:target_config",
         "//executorch/backends/cortex_m/ops:ops",
         "//executorch/backends/cortex_m/passes:passes_utils",
diff --git a/backends/cortex_m/passes/aten_to_cortex_m_pass.py b/backends/cortex_m/passes/aten_to_cortex_m_pass.py
index ecc7187797d..3f5a6055331 100644
--- a/backends/cortex_m/passes/aten_to_cortex_m_pass.py
+++ b/backends/cortex_m/passes/aten_to_cortex_m_pass.py
@@ -8,12 +8,12 @@
 import math
 from typing import cast, Optional
 
-import cmsis_nn  # type: ignore[import-not-found, import-untyped]
 import executorch.backends.cortex_m.ops.operators  # noqa
 import executorch.exir as exir
 import torch
 import torch.fx
 from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
+from executorch.backends.cortex_m.library import cmsis_nn
 
 from executorch.backends.cortex_m.passes.passes_utils import (
     build_activation_lut,
diff --git a/backends/cortex_m/passes/scratch_buffer_sizes.py b/backends/cortex_m/passes/scratch_buffer_sizes.py
index 95a9c441f61..b247e2be944 100644
--- a/backends/cortex_m/passes/scratch_buffer_sizes.py
+++ b/backends/cortex_m/passes/scratch_buffer_sizes.py
@@ -6,11 +6,11 @@
 from collections.abc import Callable
 from typing import Any, cast
 
-import cmsis_nn  # type: ignore[import-not-found, import-untyped]
 import executorch.backends.cortex_m.ops.operators  # noqa
 
 import torch
 import torch.fx
+from executorch.backends.cortex_m.library import cmsis_nn
 
 from executorch.exir.dialects._ops import ops as exir_ops
 
diff --git a/backends/cortex_m/target_config.py b/backends/cortex_m/target_config.py
index 23cb15c4a53..341ae612cb5 100644
--- a/backends/cortex_m/target_config.py
+++ b/backends/cortex_m/target_config.py
@@ -1,5 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -10,7 +11,7 @@
 from enum import auto, Enum
 from typing import Optional
 
-import cmsis_nn  # type: ignore[import-not-found, import-untyped]
+from executorch.backends.cortex_m.library import cmsis_nn
 
 
 class CortexM(Enum):
diff --git a/backends/cortex_m/test/misc/test_cmsis_pybind.py b/backends/cortex_m/test/misc/test_cmsis_pybind.py
index f85a4bacece..08a1d973234 100644
--- a/backends/cortex_m/test/misc/test_cmsis_pybind.py
+++ b/backends/cortex_m/test/misc/test_cmsis_pybind.py
@@ -1,5 +1,4 @@
 # Copyright 2026 Arm Limited and/or its affiliates.
-# All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -11,7 +10,7 @@
 
 def _import_cmsis_nn():
     try:
-        return importlib.import_module("cmsis_nn")
+        return importlib.import_module("executorch.backends.cortex_m.library.cmsis_nn")
     except Exception as exc:
         pytest.fail(f"Failed to resolve cmsis_nn: {exc}")
 
diff --git a/backends/cortex_m/test/misc/test_target_config.py b/backends/cortex_m/test/misc/test_target_config.py
index 3e648b0a81c..472d1927886 100644
--- a/backends/cortex_m/test/misc/test_target_config.py
+++ b/backends/cortex_m/test/misc/test_target_config.py
@@ -1,12 +1,13 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-import cmsis_nn  # type: ignore[import-not-found, import-untyped]
 import pytest
 
+from executorch.backends.cortex_m.library import cmsis_nn
 from executorch.backends.cortex_m.target_config import CortexM, CortexMTargetConfig
 
 
diff --git a/backends/cortex_m/test/ops/test_avg_pool2d.py b/backends/cortex_m/test/ops/test_avg_pool2d.py
index 315d968188f..a2992b50905 100644
--- a/backends/cortex_m/test/ops/test_avg_pool2d.py
+++ b/backends/cortex_m/test/ops/test_avg_pool2d.py
@@ -93,7 +93,7 @@ def test_dialect_avg_pool2d(test_case, cortex_m_target):
         qtol=1,
     )
 
-    import cmsis_nn  # type: ignore[import-not-found, import-untyped]
+    from executorch.backends.cortex_m.library import cmsis_nn
 
     module = tester.get_artifact(StageType.RUN_PASSES).exported_program().module()
     pool_target = exir_ops.edge.cortex_m.quantized_avg_pool2d.default