pytorch
diff --git a/‎backends/nxp/backend/ir/converter/node_converters/ops_converters/bmm_converter.py‎
Lines changed: 15 additions & 31 deletions b/‎backends/nxp/backend/ir/converter/node_converters/ops_converters/bmm_converter.py‎
Lines changed: 15 additions & 31 deletions
diff --git a/‎backends/nxp/quantizer/patterns.py‎
Lines changed: 19 additions & 2 deletions b/‎backends/nxp/quantizer/patterns.py‎
Lines changed: 19 additions & 2 deletions
@@ -2,9 +2,13 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+import torch
 
 from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT
-from executorch.backends.nxp.backend.edge_helper import input_rank
+from executorch.backends.nxp.backend.edge_helper import (
+    get_quantization_parameters_for,
+    input_rank,
+)
 from executorch.backends.nxp.backend.ir.converter.conversion import translator
 from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
@@ -14,9 +18,6 @@
 from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import (
     batch_mat_mul_options,
 )
-from executorch.backends.nxp.backend.neutron_operator_support import (
-    transposition_is_supported_on_neutron,
-)
 from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from torch.fx import Node
 from torch.nn import Parameter
@@ -44,35 +45,18 @@ def _is_supported_on_target(
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
-        is_ch_first_1 = node.args[0].meta[NXP_NODE_FORMAT].is_channels_first()
-        is_ch_first_2 = node.args[1].meta[NXP_NODE_FORMAT].is_channels_first()
-        # This combination of node formats is not supported on Neutron (`adj_x = True`, `adj_y = False`),
-        # but it should never happen because both input tensors are expected to share the same format.
-        if is_ch_first_1 and not is_ch_first_2:
+        if not NodeConverter.uses_quantization_type_for_io(
+            node,
+            supported_types=[torch.int8, torch.uint8],
+            input_indices=[0, 1],
+            output_indices=[0],
+        ):
             return False
 
-        # In case we need to insert transpose after `BatchMatMul`, we also need to check if
-        # such transposition is supported.
-        if node.meta[NXP_NODE_FORMAT].is_channels_first():
-            tensor_shape = node.meta["val"].shape
-            tensor_rank = len(tensor_shape)
-            perm = translator.create_channels_first_to_channels_last_permutation(
-                tensor_rank, return_list=True
-            )
-
-            tensor_shape_channels_last = [tensor_shape[i] for i in perm]
-            if not transposition_is_supported_on_neutron(
-                tensor_shape_channels_last, perm, neutron_target_spec
-            ):
-                return False
-
-        _, d1, d2 = node.args[0].meta["val"].shape
-        _, d3, d4 = node.args[1].meta["val"].shape
-
-        # The Neutron converter requires that every dimension participating in the
-        # multiplication is divisible by NUM_MACS.
-        num_macs = neutron_target_spec.get_num_macs()
-        if not all(m % num_macs == 0 for m in [d1, d2, d3, d4]):
+        _, input_1_zp = get_quantization_parameters_for(node.args[0])
+        _, input_2_zp = get_quantization_parameters_for(node.args[1])
+        if not (input_1_zp == input_2_zp == 0):
+            # Neutron requirement.
             return False
 
         return True
 
@@ -10,6 +10,7 @@
 from functools import partial
 
 import torch
+
 from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import (
     _is_convertible_to_relu,
 )
@@ -22,6 +23,8 @@
 from torch.fx import Node
 from torchao.quantization.pt2e import (
     FakeQuantize,
+    MinMaxObserver,
+    MovingAverageMinMaxObserver,
     MovingAveragePerChannelMinMaxObserver,
     PerChannelMinMaxObserver,
 )
@@ -326,10 +329,24 @@ def get_anchors(
     ) -> PartitionAnchors | None:
         bmm_node = fused_partition[0].nodes[-1]
 
+        # Use per_tensor_symmetric to enforce zero_point=0 for both inputs
+        observer_or_fake_quant_ctr = (
+            FakeQuantize.with_args(observer=MovingAverageMinMaxObserver)
+            if self.is_qat
+            else MinMaxObserver
+        )
+        input_quantization_spec = QuantizationSpec(
+            dtype=torch.int8,
+            observer_or_fake_quant_ctr=observer_or_fake_quant_ctr,
+            quant_min=-128,
+            quant_max=127,
+            qscheme=torch.per_tensor_symmetric,  # Neutron requires the inputs to have zero point = 0.
+        )
+
         return PartitionAnchors(
             inputs=[
-                (bmm_node, NodeArgsIdx(0)),
-                (bmm_node, NodeArgsIdx(1)),
+                (bmm_node, NodeArgsIdx(0), input_quantization_spec),
+                (bmm_node, NodeArgsIdx(1), input_quantization_spec),
             ],
             biases=[],
             output=[(bmm_node,)],