Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import torch

from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT
from executorch.backends.nxp.backend.edge_helper import input_rank
from executorch.backends.nxp.backend.edge_helper import (
get_quantization_parameters_for,
input_rank,
)
from executorch.backends.nxp.backend.ir.converter.conversion import translator
from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList
from executorch.backends.nxp.backend.ir.converter.node_converter import (
Expand All @@ -14,9 +18,6 @@
from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import (
batch_mat_mul_options,
)
from executorch.backends.nxp.backend.neutron_operator_support import (
transposition_is_supported_on_neutron,
)
from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
from torch.fx import Node
from torch.nn import Parameter
Expand Down Expand Up @@ -44,35 +45,18 @@ def _is_supported_on_target(
parameters_mapping: dict[str, Parameter],
custom_delegation_options: CustomDelegationOptions,
) -> bool:
is_ch_first_1 = node.args[0].meta[NXP_NODE_FORMAT].is_channels_first()
is_ch_first_2 = node.args[1].meta[NXP_NODE_FORMAT].is_channels_first()
# This combination of node formats is not supported on Neutron (`adj_x = True`, `adj_y = False`),
# but it should never happen because both input tensors are expected to share the same format.
if is_ch_first_1 and not is_ch_first_2:
if not NodeConverter.uses_quantization_type_for_io(
node,
supported_types=[torch.int8, torch.uint8],
input_indices=[0, 1],
output_indices=[0],
):
return False

# In case we need to insert transpose after `BatchMatMul`, we also need to check if
# such transposition is supported.
if node.meta[NXP_NODE_FORMAT].is_channels_first():
tensor_shape = node.meta["val"].shape
tensor_rank = len(tensor_shape)
perm = translator.create_channels_first_to_channels_last_permutation(
tensor_rank, return_list=True
)

tensor_shape_channels_last = [tensor_shape[i] for i in perm]
if not transposition_is_supported_on_neutron(
tensor_shape_channels_last, perm, neutron_target_spec
):
return False

_, d1, d2 = node.args[0].meta["val"].shape
_, d3, d4 = node.args[1].meta["val"].shape

# The Neutron converter requires that every dimension participating in the
# multiplication is divisible by NUM_MACS.
num_macs = neutron_target_spec.get_num_macs()
if not all(m % num_macs == 0 for m in [d1, d2, d3, d4]):
_, input_1_zp = get_quantization_parameters_for(node.args[0])
_, input_2_zp = get_quantization_parameters_for(node.args[1])
if not (input_1_zp == input_2_zp == 0):
# Neutron requirement.
return False

return True
Expand Down
21 changes: 19 additions & 2 deletions backends/nxp/quantizer/patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from functools import partial

import torch

from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import (
_is_convertible_to_relu,
)
Expand All @@ -22,6 +23,8 @@
from torch.fx import Node
from torchao.quantization.pt2e import (
FakeQuantize,
MinMaxObserver,
MovingAverageMinMaxObserver,
MovingAveragePerChannelMinMaxObserver,
PerChannelMinMaxObserver,
)
Expand Down Expand Up @@ -326,10 +329,24 @@ def get_anchors(
) -> PartitionAnchors | None:
bmm_node = fused_partition[0].nodes[-1]

# Use per_tensor_symmetric to enforce zero_point=0 for both inputs
observer_or_fake_quant_ctr = (
FakeQuantize.with_args(observer=MovingAverageMinMaxObserver)
if self.is_qat
else MinMaxObserver
)
input_quantization_spec = QuantizationSpec(
dtype=torch.int8,
observer_or_fake_quant_ctr=observer_or_fake_quant_ctr,
quant_min=-128,
quant_max=127,
qscheme=torch.per_tensor_symmetric, # Neutron requires the inputs to have zero point = 0.
)

return PartitionAnchors(
inputs=[
(bmm_node, NodeArgsIdx(0)),
(bmm_node, NodeArgsIdx(1)),
(bmm_node, NodeArgsIdx(0), input_quantization_spec),
(bmm_node, NodeArgsIdx(1), input_quantization_spec),
],
biases=[],
output=[(bmm_node,)],
Expand Down
Loading
Loading