From c06b9f37d46962476b48fa4537dd338daf515795 Mon Sep 17 00:00:00 2001 From: Martin Pavella Date: Tue, 16 Jun 2026 16:56:51 +0200 Subject: [PATCH 1/2] NXP backend: Enable `aten.bmm` with new Neutron flow. --- .../ops_converters/bmm_converter.py | 46 +-- backends/nxp/quantizer/patterns.py | 21 +- .../node_converter/test_bmm_converter.py | 285 ++++++------------ backends/nxp/tests/models.py | 21 +- backends/nxp/tests/ops_aliases.py | 2 +- 5 files changed, 135 insertions(+), 240 deletions(-) diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/bmm_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/bmm_converter.py index 08b8533a89a..d31522a665d 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/bmm_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/bmm_converter.py @@ -2,9 +2,13 @@ # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import torch from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT -from executorch.backends.nxp.backend.edge_helper import input_rank +from executorch.backends.nxp.backend.edge_helper import ( + get_quantization_parameters_for, + input_rank, +) from executorch.backends.nxp.backend.ir.converter.conversion import translator from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList from executorch.backends.nxp.backend.ir.converter.node_converter import ( @@ -14,9 +18,6 @@ from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import ( batch_mat_mul_options, ) -from executorch.backends.nxp.backend.neutron_operator_support import ( - transposition_is_supported_on_neutron, -) from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from torch.fx import Node from torch.nn import Parameter @@ -44,35 +45,18 @@ def _is_supported_on_target( parameters_mapping: dict[str, Parameter], custom_delegation_options: CustomDelegationOptions, ) -> bool: - is_ch_first_1 = node.args[0].meta[NXP_NODE_FORMAT].is_channels_first() - is_ch_first_2 = node.args[1].meta[NXP_NODE_FORMAT].is_channels_first() - # This combination of node formats is not supported on Neutron (`adj_x = True`, `adj_y = False`), - # but it should never happen because both input tensors are expected to share the same format. - if is_ch_first_1 and not is_ch_first_2: + if not NodeConverter.uses_quantization_type_for_io( + node, + supported_types=[torch.int8, torch.uint8], + input_indices=[0, 1], + output_indices=[0], + ): return False - # In case we need to insert transpose after `BatchMatMul`, we also need to check if - # such transposition is supported. - if node.meta[NXP_NODE_FORMAT].is_channels_first(): - tensor_shape = node.meta["val"].shape - tensor_rank = len(tensor_shape) - perm = translator.create_channels_first_to_channels_last_permutation( - tensor_rank, return_list=True - ) - - tensor_shape_channels_last = [tensor_shape[i] for i in perm] - if not transposition_is_supported_on_neutron( - tensor_shape_channels_last, perm, neutron_target_spec - ): - return False - - _, d1, d2 = node.args[0].meta["val"].shape - _, d3, d4 = node.args[1].meta["val"].shape - - # The Neutron converter requires that every dimension participating in the - # multiplication is divisible by NUM_MACS. - num_macs = neutron_target_spec.get_num_macs() - if not all(m % num_macs == 0 for m in [d1, d2, d3, d4]): + _, input_1_zp = get_quantization_parameters_for(node.args[0]) + _, input_2_zp = get_quantization_parameters_for(node.args[1]) + if not (input_1_zp == input_2_zp == 0): + # Neutron requirement. return False return True diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py index 9e21e4f1660..841f3897d6e 100644 --- a/backends/nxp/quantizer/patterns.py +++ b/backends/nxp/quantizer/patterns.py @@ -10,6 +10,7 @@ from functools import partial import torch + from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import ( _is_convertible_to_relu, ) @@ -22,6 +23,8 @@ from torch.fx import Node from torchao.quantization.pt2e import ( FakeQuantize, + MinMaxObserver, + MovingAverageMinMaxObserver, MovingAveragePerChannelMinMaxObserver, PerChannelMinMaxObserver, ) @@ -326,10 +329,24 @@ def get_anchors( ) -> PartitionAnchors | None: bmm_node = fused_partition[0].nodes[-1] + # Use per_tensor_symmetric to enforce zero_point=0 for both inputs + observer_or_fake_quant_ctr = ( + FakeQuantize.with_args(observer=MovingAverageMinMaxObserver) + if self.is_qat + else MinMaxObserver + ) + input_quantization_spec = QuantizationSpec( + dtype=torch.int8, + observer_or_fake_quant_ctr=observer_or_fake_quant_ctr, + quant_min=-128, + quant_max=127, + qscheme=torch.per_tensor_symmetric, # Neutron requires the inputs to have zero point = 0. + ) + return PartitionAnchors( inputs=[ - (bmm_node, NodeArgsIdx(0)), - (bmm_node, NodeArgsIdx(1)), + (bmm_node, NodeArgsIdx(0), input_quantization_spec), + (bmm_node, NodeArgsIdx(1), input_quantization_spec), ], biases=[], output=[(bmm_node,)], diff --git a/backends/nxp/tests/ir/converter/node_converter/test_bmm_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_bmm_converter.py index dc442a4931c..4636e6205af 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_bmm_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_bmm_converter.py @@ -3,29 +3,26 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import numpy as np +# noinspection PyUnusedImports import pytest import torch -from executorch.backends.nxp.backend.edge_program_converter import ( - EdgeProgramToIRConverter, -) -from executorch.backends.nxp.backend.ir.converter.conversion import translator -from executorch.backends.nxp.backend.neutron_operator_support import ( - transposition_is_supported_on_neutron, + +from executorch.backends.nxp.edge_passes.move_auxiliary_operator_into_separate_qdq_cluster_pass import ( + ViewCopy, ) -from executorch.backends.nxp.tests.executorch_pipeline import ( - neutron_target_spec, - to_quantized_edge_program, +from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator +from executorch.backends.nxp.tests.executorch_pipeline import ModelInputSpec +from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier +from executorch.backends.nxp.tests.model_output_comparator import ( + AllCloseOutputComparator, ) -from executorch.backends.nxp.tests.executors import ( - convert_run_compare, - graph_contains_any_of_ops, - ToChannelFirstPreprocess, - ToChannelLastPreprocess, +from executorch.backends.nxp.tests.models import ( + BatchMatMulMaxPoolModel, + BatchMatMulModel, ) -from executorch.backends.nxp.tests.models import BatchMatMulConvModel, BatchMatMulModel +from executorch.backends.nxp.tests.nsys_testing import lower_run_compare +from executorch.backends.nxp.tests.ops_aliases import BMM, GetItem, MaxPool2DWithIndices from executorch.backends.nxp.tests.use_qat import * # noqa F403 -from executorch.exir.dialects._ops import ops as exir_ops @pytest.fixture(autouse=True) @@ -33,186 +30,86 @@ def reseed_model_per_test_run(): torch.manual_seed(23) -ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate -Bmm = exir_ops.edge.aten.bmm.default - - -@pytest.mark.parametrize( - "input_shape_x1, input_shape_x2", - [ - pytest.param((1, 24, 16), (1, 16, 24), id="3D, one batch."), - pytest.param((3, 8, 24), (3, 24, 8), id="3D, more batches."), - pytest.param((2, 24, 16), (2, 16, 8), id="3D, more batches, x1_C != x2_W"), - ], -) -def test_convert_bmm__supported(mocker, input_shape_x1, input_shape_x2, use_qat): - model = BatchMatMulModel() - - converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") - delegated_ep = to_quantized_edge_program( - model, [input_shape_x1, input_shape_x2], use_qat=use_qat - ).exported_program() +class TestBMM: - # Make sure the `bmm` was delegated. - assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall]) - assert not graph_contains_any_of_ops(delegated_ep.graph, [Bmm]) - - # Verify correct behavior of the converted NeutronIR model. - intermediate_ep = converter_spy.call_args.args[1] - neutron_ir_model, _ = converter_spy.spy_return - - input_data_1 = ( - np.random.random(input_shape_x1).astype(np.float32) * 256.0 - 128.0 - ).astype(np.int8) - input_data_2 = ( - np.random.random(input_shape_x2).astype(np.float32) * 256.0 - 128.0 - ).astype(np.int8) - - # Make sure the tested program contains the `bmm`. - assert graph_contains_any_of_ops(intermediate_ep.graph, [Bmm]) - - # Verify that the delegated `bmm` node produces correct results - # The delegated `bmm` runs with a numerical tolerance of atol = 1 - convert_run_compare( - intermediate_ep, - tfl_model=neutron_ir_model, - input_data={ - 0: input_data_1, - 1: input_data_2, - }, - atol=1, - ) - - -@pytest.mark.parametrize( - "input_shape_x1, input_shape_x2", - [ + # noinspection PyMethodMayBeStatic + def assert_delegated( + self, + model, + input_1_shape, + input_2_shape, + mocker, + use_qat=False, + expected_delegated_ops=None, + ): + if expected_delegated_ops is None: + expected_delegated_ops = {BMM: 1} + + graph_verifier = DetailedGraphVerifier( + mocker, + expected_delegated_ops=expected_delegated_ops, + expected_non_delegated_ops={}, + ) + + # Use quantized dataset and allow a single-bit error. + remove_quant_io_ops = True + output_comparator = AllCloseOutputComparator(atol=1) + + # Cover also negative values to thoroughly test the operator. + dataset_creator = RandomDatasetCreator(low=-2, high=2) + + lower_run_compare( + model, + [ModelInputSpec(input_1_shape), ModelInputSpec(input_2_shape)], + graph_verifier, + dataset_creator, + output_comparator=output_comparator, + use_qat=use_qat, + remove_quant_io_ops=remove_quant_io_ops, + ) + + def test__qat(self, mocker, use_qat): + input_1_shape, input_2_shape = (1, 24, 16), (1, 16, 24) + model = BatchMatMulModel() + self.assert_delegated( + model, input_1_shape, input_2_shape, mocker, use_qat=use_qat + ) + + # Input shape parameters used by 2 tests in this class. + BMM_SHAPES = [ + pytest.param((3, 8, 24), (3, 24, 8), id="more batches"), + pytest.param((2, 24, 16), (2, 16, 8), id="more batches, x1_C != x2_W"), pytest.param( - (1, 8, 7), (1, 7, 16), id="3D, x1_W (and x2_C) not divisible by NUM_MACS." + (1, 8, 7), (1, 7, 16), id="x1_W (and x2_C) not divisible by NUM_MACS" ), - pytest.param((1, 7, 16), (1, 16, 8), id="3D, x1_C not divisible by NUM_MACS."), - pytest.param((1, 8, 16), (1, 16, 7), id="3D, x2_W not divisible by NUM_MACS."), - ], -) -def test_convert_bmm__unsupported_shape(input_shape_x1, input_shape_x2, use_qat): - model = BatchMatMulModel() - - delegated_ep = to_quantized_edge_program( - model, [input_shape_x1, input_shape_x2], use_qat=use_qat - ).exported_program() - - # Make sure the `bmm` was NOT delegated. - assert graph_contains_any_of_ops(delegated_ep.graph, [Bmm]) - - -def test_convert_bmm__unsupported_dim_order(mocker, use_qat): - pytest.xfail( - "`test_convert_bmm__unsupported_dim_order` is invalid due to incorrect propagation of node format " - "through `view_copy` nodes introduced by the aten pass that converts `conv1d` to `conv2d` " - "in the test model. `NodeFormatInference` needs to be updated to propagate the " - "`channels_first` format only when the batch or channel dimension is modified by the `view_copy` " - "or by other nodes." + pytest.param((1, 7, 16), (1, 16, 8), id="x1_C not divisible by NUM_MACS"), + pytest.param((1, 8, 16), (1, 16, 7), id="x2_W not divisible by NUM_MACS"), + pytest.param((3, 5, 7), (3, 7, 11), id="nothing divisible by NUM_MACS"), + ] + + @pytest.mark.parametrize( + "input_1_shape, input_2_shape", + BMM_SHAPES, ) + def test__nsys_inference(self, mocker, input_1_shape, input_2_shape): + model = BatchMatMulModel() + self.assert_delegated(model, input_1_shape, input_2_shape, mocker) - n1 = n2 = 5 - w1 = c2 = 16 - c1 = 8 - w2 = 24 - - x_input_shape = (n1, c1, w1) - y_input_shape = (n2, c2, w2) - - model = BatchMatMulConvModel(in_channels=c1, out_channels=c1) - - delegated_ep = to_quantized_edge_program( - model, - [x_input_shape, y_input_shape], - use_neutron_for_format_conversion=False, - use_qat=use_qat, - ).exported_program() - - # Make sure the `bmm` was NOT delegated. - # For `bmm` to work in channels-first order, support for 3D `transpose` is needed, - # which is not implemented in NXP Executorch backend yet. - assert not graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall]) - assert graph_contains_any_of_ops(delegated_ep.graph, [Bmm]) - - -def test_convert_bmm__channels_first(mocker, use_qat): - pytest.xfail( - "`test_convert_bmm__channels_first` is invalid due to incorrect propagation of node format " - "through `view_copy` nodes introduced by the aten pass that converts `conv1d` to `conv2d` " - "in the test model. `NodeFormatInference` needs to be updated to propagate the " - "`channels_first` format only when the batch or channel dimension is modified by the `view_copy` " - "or by other nodes." - ) - # These must match: - # - `n1 = n2` - # - `w1 = c2` - # Otherwise it violates `bmm` constraints per mathematical definition. - n1 = n2 = 5 - w1 = c2 = 16 - - # `c1`, `w1`, `c2`, `w2` also need to be divisible by `num_macs`. - c1 = 8 - w2 = 24 - - x_input_shape = (n1, c1, w1) - y_input_shape = (n2, c2, w2) - - # Channels-last shape of the output before the newly-inserted `transpose` - # converts it to channels-first - output_shape = (n1, w2, c1) - - perm = translator.create_channels_first_to_channels_last_permutation( - len(output_shape), return_list=True - ) - transp_not_supported = not transposition_is_supported_on_neutron( - output_shape, perm, neutron_target_spec - ) - if transp_not_supported: - pytest.skip("3D dim order swap not implemented.") - - model = BatchMatMulConvModel(in_channels=c1, out_channels=c1) - - converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") - delegated_ep = to_quantized_edge_program( - model, - [x_input_shape, y_input_shape], - use_neutron_for_format_conversion=False, - use_qat=use_qat, - ).exported_program() - - # Make sure the `bmm` was delegated. - assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall]) - assert not graph_contains_any_of_ops(delegated_ep.graph, [Bmm]) - - # Verify correct behavior of the converted NeutronIR model. - neutron_ir_model = converter_spy.spy_return[0] - bmm_intermediate_ep = converter_spy.call_args.args[1] - - input_data_1 = ( - np.random.random(x_input_shape).astype(np.float32) * 256.0 - 128.0 - ).astype(np.int8) - input_data_2 = ( - np.random.random(y_input_shape).astype(np.float32) * 256.0 - 128.0 - ).astype(np.int8) - - # Make sure the tested program contains the `bmm`. - assert graph_contains_any_of_ops(bmm_intermediate_ep.graph, [Bmm]) - - # Verify that the delegated `bmm` node produces correct results - # The delegated `bmm` runs with a numerical tolerance of atol = 1. - # The `intermediate_ep` has input positions swapped. - input_data = { - 0: input_data_2, - 1: input_data_1, - } - convert_run_compare( - bmm_intermediate_ep, - tfl_model=neutron_ir_model, - input_data=input_data, - atol=1, - tflite_input_preprocess=ToChannelLastPreprocess(), - tflite_output_preprocess=ToChannelFirstPreprocess(), + @pytest.mark.parametrize( + "input_1_shape, input_2_shape", + BMM_SHAPES, ) + def test__channels_first(self, mocker, input_1_shape, input_2_shape): + model = BatchMatMulMaxPoolModel() + self.assert_delegated( + model, + input_1_shape, + input_2_shape, + mocker, + expected_delegated_ops={ + BMM: 1, + MaxPool2DWithIndices: 1, + GetItem: 1, + ViewCopy: 2, + }, + ) diff --git a/backends/nxp/tests/models.py b/backends/nxp/tests/models.py index 7545dd940f2..d43fc336bc5 100644 --- a/backends/nxp/tests/models.py +++ b/backends/nxp/tests/models.py @@ -946,17 +946,14 @@ def forward(self, x, y): return torch.bmm(x, y) -class BatchMatMulConvModel(torch.nn.Module): - def __init__(self, in_channels, out_channels): - super().__init__() - self.conv = Conv1dModule( - in_channels=in_channels, - out_channels=out_channels, - stride=1, - padding=1, - kernel_size=3, - ) +class BatchMatMulMaxPoolModel(torch.nn.Module): + + @staticmethod + def noop_max_pool_1d(x): + """Call `torch.max_pool1d` that is a NoOp, but it enforces the ChannelsFirst format in the `NodeFormatInference`.""" + return torch.max_pool1d(x, kernel_size=1) def forward(self, x, y): - x = self.conv(x) - return torch.bmm(x, y) + x = torch.bmm(x, y) + x = self.noop_max_pool_1d(x) + return x diff --git a/backends/nxp/tests/ops_aliases.py b/backends/nxp/tests/ops_aliases.py index aceb9707106..f16ab66c528 100644 --- a/backends/nxp/tests/ops_aliases.py +++ b/backends/nxp/tests/ops_aliases.py @@ -16,7 +16,7 @@ AddMm = exir_ops.edge.aten.addmm.default AddTensor = exir_ops.edge.aten.add.Tensor AvgPool2D = exir_ops.edge.aten.avg_pool2d.default -Bmm = exir_ops.edge.aten.bmm.default +BMM = exir_ops.edge.aten.bmm.default Cat = exir_ops.edge.aten.cat.default Clamp = exir_ops.edge.aten.clamp.default Clone = exir_ops.edge.aten.clone.default From bfe6e5e73443f9be0cbd5be582fc7d872bc5bae9 Mon Sep 17 00:00:00 2001 From: Martin Pavella Date: Fri, 19 Jun 2026 09:56:14 +0200 Subject: [PATCH 2/2] Resolve conflicts. --- .../converter/node_converter/test_bmm_converter.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_bmm_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_bmm_converter.py index 4636e6205af..c564c024623 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_bmm_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_bmm_converter.py @@ -39,6 +39,7 @@ def assert_delegated( input_1_shape, input_2_shape, mocker, + request, use_qat=False, expected_delegated_ops=None, ): @@ -62,17 +63,18 @@ def assert_delegated( model, [ModelInputSpec(input_1_shape), ModelInputSpec(input_2_shape)], graph_verifier, + request, dataset_creator, output_comparator=output_comparator, use_qat=use_qat, remove_quant_io_ops=remove_quant_io_ops, ) - def test__qat(self, mocker, use_qat): + def test__qat(self, mocker, request, use_qat): input_1_shape, input_2_shape = (1, 24, 16), (1, 16, 24) model = BatchMatMulModel() self.assert_delegated( - model, input_1_shape, input_2_shape, mocker, use_qat=use_qat + model, input_1_shape, input_2_shape, mocker, request, use_qat=use_qat ) # Input shape parameters used by 2 tests in this class. @@ -91,21 +93,22 @@ def test__qat(self, mocker, use_qat): "input_1_shape, input_2_shape", BMM_SHAPES, ) - def test__nsys_inference(self, mocker, input_1_shape, input_2_shape): + def test__nsys_inference(self, mocker, request, input_1_shape, input_2_shape): model = BatchMatMulModel() - self.assert_delegated(model, input_1_shape, input_2_shape, mocker) + self.assert_delegated(model, input_1_shape, input_2_shape, mocker, request) @pytest.mark.parametrize( "input_1_shape, input_2_shape", BMM_SHAPES, ) - def test__channels_first(self, mocker, input_1_shape, input_2_shape): + def test__channels_first(self, mocker, request, input_1_shape, input_2_shape): model = BatchMatMulMaxPoolModel() self.assert_delegated( model, input_1_shape, input_2_shape, mocker, + request, expected_delegated_ops={ BMM: 1, MaxPool2DWithIndices: 1,