diff --git a/backends/nxp/backend/graph_utils.py b/backends/nxp/backend/graph_utils.py index 88cd996d6fd..f5d8e16475c 100644 --- a/backends/nxp/backend/graph_utils.py +++ b/backends/nxp/backend/graph_utils.py @@ -56,7 +56,7 @@ def get_output_shape(node: Node) -> tuple[torch.Size] | torch.Size | None: def is_clamp_preserved_under_quantization( - node: Node, min_val: int = 0, max_val: int | None = None + node: Node, min_val: float = 0, max_val: float | None = None ) -> bool: """ Checks if Clamp/ReLU/HardTanh is preserved under quantization and did diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py index 25cf6074701..a1e8c19e9bd 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py @@ -42,17 +42,6 @@ from torch.nn import Parameter -def _is_convertible_to_relu(node): - bounds = ClampConverter._get_clamp_bounds(node) - bounds = tuple(v if v is not None and math.isfinite(v) else None for v in bounds) - - # Some specific bounds can be replaced with single op ReLU. - if bounds not in ClampConverter.RELU_COMPATIBLE_BOUNDS.values(): - return False - - return True - - class ClampConverter(NodeConverter): RELU_COMPATIBLE_BOUNDS = { "ReluN1To1": (-1, 1), @@ -70,12 +59,25 @@ class ClampConverter(NodeConverter): # noinspection PyShadowingBuiltins @staticmethod - def _get_clamp_bounds(clamp_node: Node) -> tuple[float | None, float | None]: + def _get_bounds(node: Node) -> tuple[float | None, float | None]: """Extract min and max bounds from `aten.clamp.default` node.""" - min = try_get_arg(clamp_node, 1) - max = try_get_arg(clamp_node, 2) + min = try_get_arg(node, 1) + max = try_get_arg(node, 2) return min, max + @classmethod + def _is_convertible_to_relu(cls, node): + bounds = cls._get_bounds(node) + bounds = tuple( + v if v is not None and math.isfinite(v) else None for v in bounds + ) + + # Some specific bounds can be replaced with single op ReLU. + if bounds not in cls.RELU_COMPATIBLE_BOUNDS.values(): + return False + + return True + @staticmethod def _is_supported_in_IR( node: Node, @@ -100,20 +102,21 @@ def _io_quant_is_same(node: Node): dq_params = dequant.args[1:] return all(q == dq for q, dq in zip(q_params, dq_params)) - @staticmethod + @classmethod def _is_supported_on_target( + cls, node: Node, neutron_target_spec: NeutronTargetSpec, parameters_mapping: dict[str, Parameter], custom_delegation_options: CustomDelegationOptions, ) -> bool: - relu_compatible = _is_convertible_to_relu(node) - bounds = ClampConverter._get_clamp_bounds(node) + relu_compatible = cls._is_convertible_to_relu(node) + bounds = cls._get_bounds(node) if all(b is None or math.isinf(b) for b in bounds): return False - io_quant_consistent = ClampConverter._io_quant_is_same(node) + io_quant_consistent = cls._io_quant_is_same(node) quant_supported = NodeConverter.uses_quantization_type_for_io( node, supported_types=[torch.int8, torch.uint8], @@ -138,19 +141,20 @@ def supports_partitioning_result( neutron_target_spec: NeutronTargetSpec, parameters_mapping: dict[str, Parameter], ) -> bool: - bounds = cls._get_clamp_bounds(node) + bounds = cls._get_bounds(node) # Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator # and at the same time the node does not satisfy delegation requirements. - # In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly. + # In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfully. if bounds in cls.RELU_COMPATIBLE_BOUNDS.values(): is_alone_in_partition = cls.is_node_alone_in_partition( node, partition_list, filter_fn=is_not_qdq_node ) if is_alone_in_partition: + # noinspection PyTypeChecker return is_clamp_preserved_under_quantization( node, - min_val=bounds[0], + min_val=bounds[0] if bounds[0] is not None else 0, max_val=bounds[1], ) @@ -167,9 +171,9 @@ def convert(self, node: Node): ) -> Tensor """ self.assert_convertible(node) - to_relu = _is_convertible_to_relu(node) + to_relu = self._is_convertible_to_relu(node) - bounds = self._get_clamp_bounds(node) + bounds = self._get_bounds(node) bounds = tuple( v if v is not None and math.isfinite(v) else None for v in bounds ) diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/hardtanh_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/hardtanh_converter.py index f67851895c2..0159143c5f7 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/hardtanh_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/hardtanh_converter.py @@ -3,43 +3,16 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from executorch.backends.nxp.backend.ir.converter.node_converter import ( - CustomDelegationOptions, - is_not_qdq_node, - NodeConverter, - Partition, -) -from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import ( - BuiltinOperator, -) -from executorch.backends.nxp.backend.neutron_operator_support import ( - activation_supported_on_target, + +from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import ( + ClampConverter, ) -from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from torch.fx import Node -from torch.nn import Parameter - -class HardTanhConverter(NodeConverter): - - # Maps possible input parameters of HardTanh to equivalent ReLU-based operators supported by TFLite. - SUPPORTED_MODES_MAP = { - (0.0, 6.0): BuiltinOperator.RELU6, - (-1.0, 1.0): BuiltinOperator.RELU_N1_TO_1, - (0.0, 1.0): BuiltinOperator.RELU_0_TO_1, - (0.0, float("inf")): BuiltinOperator.RELU, - } - - # Maps possible modes of HardTanh to equivalent ReLU bounds. - SUPPORTED_BOUNDS_MAP = { - "ReluN1To1": (-1.0, 1.0), - "Relu0To1": (0.0, 1.0), - "Relu6": (0.0, 6.0), - "Relu": (0.0, float("inf")), - } +class HardTanhConverter(ClampConverter): @staticmethod - def _get_hardtanh_bounds(node: Node) -> tuple[float, float]: + def _get_bounds(node: Node) -> tuple[float | None, float | None]: args = node.args match len(args): @@ -62,51 +35,3 @@ def _get_hardtanh_bounds(node: Node) -> tuple[float, float]: ) return min_val, max_val - - @staticmethod - def _is_supported_in_IR( - node: Node, - parameters_mapping: dict[str, Parameter], - custom_delegation_options: CustomDelegationOptions, - ) -> bool: - bounds = HardTanhConverter._get_hardtanh_bounds(node) - return bounds in HardTanhConverter.SUPPORTED_MODES_MAP - - @classmethod - def supports_partitioning_result( - cls, - node: Node, - partition_list: list[Partition], - custom_delegation_options: CustomDelegationOptions, - neutron_target_spec: NeutronTargetSpec, - parameters_mapping: dict[str, Parameter], - ) -> bool: - bounds = HardTanhConverter._get_hardtanh_bounds(node) - - # Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator - # and at the same time the node does not satisfy delegation requirements. - # In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly. - if bounds in [ - cls.SUPPORTED_BOUNDS_MAP["Relu"], - cls.SUPPORTED_BOUNDS_MAP["Relu6"], - ]: - is_alone_in_partition = cls.is_node_alone_in_partition( - node, partition_list, filter_fn=is_not_qdq_node - ) - if is_alone_in_partition: - return activation_supported_on_target(node) - - return True - - def convert(self, node: Node): - """Convert 'aten::hardtanh' to its supported ReLU equivalent.""" - self.assert_convertible(node) - - t_op = self._create_tflite_op_with_io_tensors(node) - - bounds = HardTanhConverter._get_hardtanh_bounds(node) - - op = self.SUPPORTED_MODES_MAP[bounds] - t_op.opcode_index = self.builder.op_code_index_for_op_type(op) - - self.builder.append_operators([t_op]) diff --git a/backends/nxp/backend/ir/converter/quantization_utils.py b/backends/nxp/backend/ir/converter/quantization_utils.py index ba4ad14222b..f3fe868ae83 100755 --- a/backends/nxp/backend/ir/converter/quantization_utils.py +++ b/backends/nxp/backend/ir/converter/quantization_utils.py @@ -1,4 +1,4 @@ -# Copyright 2023-2025 NXP +# Copyright 2023-2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -135,11 +135,12 @@ def set_quantization_parameters_to_tensor( def quantize_int8( data: np.ndarray, scale: List[float], zero_point: List[int] ) -> np.ndarray: + # noinspection PyTypeChecker return quantize(data, zero_point=zero_point, scale=scale) def quantize( - value: np.ndarray | int, + value: np.ndarray | float, zero_point: List[int] | int, scale: List[float] | float, quant_min: int = -128, diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py index 9e21e4f1660..9182ef27d89 100644 --- a/backends/nxp/quantizer/patterns.py +++ b/backends/nxp/quantizer/patterns.py @@ -11,7 +11,10 @@ import torch from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import ( - _is_convertible_to_relu, + ClampConverter, +) +from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.hardtanh_converter import ( + HardTanhConverter, ) from executorch.backends.nxp.quantizer.utils import ( get_bias_qparams, @@ -438,7 +441,7 @@ def get_anchors( ) -> PartitionAnchors | None: node = fused_partition[0].nodes[-1] - if not _is_convertible_to_relu(node): + if not ClampConverter._is_convertible_to_relu(node): return SharedSpecPattern.get_shared_spec_anchors(gm, fused_partition) else: return SingleInputBasicPattern.get_single_input_anchors(gm, fused_partition) @@ -726,11 +729,21 @@ class HardTanhPattern(SingleInputBasicPattern): def partition_types(self): return [torch.ops.aten.hardtanh.default] + def get_anchors( + self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] + ) -> PartitionAnchors | None: + node = fused_partition[0].nodes[-1] + + if not HardTanhConverter._is_convertible_to_relu(node): + return SharedSpecPattern.get_shared_spec_anchors(gm, fused_partition) + else: + return SingleInputBasicPattern.get_single_input_anchors(gm, fused_partition) + def replacement_op(self): raise AssertionError() -class HardTanhInPlacePattern(SingleInputBasicPattern): +class HardTanhInPlacePattern(HardTanhPattern): """ Quantizer for HardTanh operator with param inplace=True. """ @@ -738,21 +751,6 @@ class HardTanhInPlacePattern(SingleInputBasicPattern): def partition_types(self): return [torch.ops.aten.hardtanh_.default] - def get_anchors( - self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] - ) -> PartitionAnchors | None: - node = fused_partition[0].nodes[-1] - - return PartitionAnchors( - inputs=[(node, NodeArgsIdx(0))], - weights=[], - biases=[], - output=[(node,)], - ) - - def replacement_op(self): - raise AssertionError() - class LeakyReluPattern(SingleInputBasicPattern): """Quantizer for the `aten.leaky_relu.default` operator.""" diff --git a/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py index 248063551af..bd296bb856f 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py @@ -24,9 +24,6 @@ ) from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier -from executorch.backends.nxp.tests.model_output_comparator import ( - NumericalStatsOutputComparator, -) from executorch.backends.nxp.tests.nsys_testing import lower_run_compare from executorch.backends.nxp.tests.ops_aliases import ( AddTensor, @@ -68,6 +65,35 @@ def forward(self, x): class TestClamp: + + @pytest.mark.parametrize( + "min, max", + [ + pytest.param(-1, 2, id="min = -1, max = 2 (Max/Min)"), + pytest.param(0.0, None, id="min = 0, max = None (Relu)"), + ], + ) + def test__qat(self, mocker, request, min, max, use_qat): + input_shape = (2, 7, 2) # Indivisible by num_macs + model = AddClampModule(min, max) + + x_input_spec = ModelInputSpec(input_shape) + graph_verifier = DetailedGraphVerifier( + mocker, + expected_delegated_ops={ + AddTensor: 1, + Clamp: 1, + }, + expected_non_delegated_ops={}, + ) + + lower_run_compare( + model=model, + input_spec=[x_input_spec], + request=request, + dlg_model_verifier=graph_verifier, + ) + @pytest.mark.parametrize( "min, max", [ @@ -90,12 +116,11 @@ class TestClamp: pytest.param(0.0, None, id="min = 0, max = None (Relu)"), ], ) - def test_convert_clamp__full_pipeline(self, mocker, request, min, max, use_qat): + def test_convert_clamp__full_pipeline(self, mocker, request, min, max): input_shape = (2, 7, 2) # Indivisible by num_macs model = AddClampModule(min, max) x_input_spec = ModelInputSpec(input_shape) - comparator = NumericalStatsOutputComparator() graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={ @@ -110,8 +135,6 @@ def test_convert_clamp__full_pipeline(self, mocker, request, min, max, use_qat): input_spec=[x_input_spec], dlg_model_verifier=graph_verifier, request=request, - output_comparator=comparator, - use_qat=use_qat, ) @pytest.mark.parametrize( diff --git a/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py index 67d3add978c..3799aa91623 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py @@ -4,22 +4,31 @@ # LICENSE file in the root directory of this source tree. import numpy as np + +# noinspection PyUnusedImports import pytest import torch from executorch.backends.nxp.backend.edge_program_converter import ( EdgeProgramToIRConverter, ) +from executorch.backends.nxp.backend.ir.converter.builder.aten_model_builder_director import ( + AtenModelBuilderDirector, +) +from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import ( + BuiltinOperator as Ops, +) +from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program -from executorch.backends.nxp.tests.executors import ( - convert_run_compare, - graph_contains_any_of_ops, - ToChannelFirstPreprocess, - ToChannelLastPreprocess, +from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops +from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier +from executorch.backends.nxp.tests.models import Conv2dWithActivation, HardTanhModule +from executorch.backends.nxp.tests.nsys_testing import lower_run_compare +from executorch.backends.nxp.tests.ops_aliases import ( + Convolution, + ExecutorchDelegateCall, + HardTanh, ) -from executorch.backends.nxp.tests.models import Conv2dWithActivation -from executorch.exir.dialects._ops import ops as exir_ops -from torch.export import ExportedProgram from executorch.backends.nxp.tests.use_qat import * # noqa F403 @@ -29,91 +38,237 @@ def reseed_model_per_test_run(): np.random.seed(23) -ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate -HardTanh = exir_ops.edge.aten.hardtanh.default -HardTanh_ = exir_ops.edge.aten.hardtanh_.default +class AddHardTanhModule(HardTanhModule): + def forward(self, x): + x = x + x + x = super().forward(x) + return x -@pytest.mark.parametrize("input_shape", [(1, 3, 128, 128)]) -@pytest.mark.parametrize("inplace", [True, False]) -def test_relu6_quant(mocker, input_shape: tuple[int], inplace: bool, use_qat: bool): - # The torch.nn.Relu6 inherits from torch.nn.Hardtanh, and hence represented as HardTanh in ATen. - # Testing the hardtanh originated from torch.nn.Relu6 op. - model = Conv2dWithActivation( - activation=torch.nn.ReLU6(inplace=inplace), in_channels=input_shape[1] - ) +class TestHardTanh: + # noinspection PyMethodMayBeStatic + def assert_delegated( + self, + model, + input_shape, + mocker, + request, + use_qat=False, + expected_delegated_ops=None, + ): + graph_verifier = DetailedGraphVerifier( + mocker, + expected_delegated_ops=( + expected_delegated_ops + if expected_delegated_ops is not None + else {HardTanh: 1} + ), + expected_non_delegated_ops={}, + ) - converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + # Create a RandomDatasetCreator that covers also negative numbers to properly test the operator. + dataset_creator = RandomDatasetCreator(low=-2, high=2) - quantized_program = to_quantized_edge_program( - model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False - ).exported_program() + lower_run_compare( + model, + input_shape, + graph_verifier, + request, + dataset_creator, + use_qat=use_qat, + ) - tflite_flatbuffers_model, io_formats = converter_spy.spy_return - exported_program: ExportedProgram = converter_spy.call_args.args[1] + @pytest.mark.parametrize( + "activation_range", + [ + (-1, 3), + (0, float("inf")), + ], + ) + @pytest.mark.parametrize( + "inplace", [True, False], ids=lambda ip: "Inplace" if ip else "Not inplace" + ) + def test__qat( + self, mocker, request, activation_range: tuple[float, float], use_qat, inplace + ): + input_shape = (23,) + model = HardTanhModule(*activation_range, inplace) - assert not graph_contains_any_of_ops(quantized_program.graph, [HardTanh, HardTanh_]) - assert graph_contains_any_of_ops(quantized_program.graph, [ExecutorchDelegateCall]) + self.assert_delegated(model, input_shape, mocker, request, use_qat=use_qat) - input_data = (np.random.random(input_shape) * 50).astype(np.int8) - convert_run_compare( - exported_program, - tfl_model=tflite_flatbuffers_model, - tflite_input_preprocess=ToChannelLastPreprocess(), - tflite_output_preprocess=ToChannelFirstPreprocess(), - input_data=input_data, - atol=2.0, + @pytest.mark.parametrize( + "inplace", [True, False], ids=lambda ip: "Inplace" if ip else "Not inplace" ) + def test__from_relu6__after_conv(self, mocker, request, inplace: bool): + # The torch.nn.Relu6 inherits from torch.nn.Hardtanh, and hence represented as HardTanh in ATen. + # Testing the hardtanh originated from torch.nn.Relu6 op. + input_shape = (1, 3, 4, 5) + model = Conv2dWithActivation( + activation=torch.nn.ReLU6(inplace=inplace), + in_channels=input_shape[1], + out_channels=2, + ) + self.assert_delegated( + model, + input_shape, + mocker, + request, + expected_delegated_ops={HardTanh: 1, Convolution: 1}, + ) -@pytest.mark.parametrize("input_shape", [(1, 3, 16, 16), (1, 3, 32, 32)]) -@pytest.mark.parametrize( - "activation_range", - [ - (0.0, 6.0), - (-1.0, 1.0), - (0.0, 1.0), - (0.0, float("inf")), - (0, 6), - (-1, 1), - (0, 1), - (0, float("inf")), - ], -) -@pytest.mark.parametrize("inplace", [True, False]) -def test_custom_hardtanh_quant( - mocker, - input_shape: tuple[int], - activation_range: tuple[float, float], - inplace: bool, - use_qat: bool, -): - # TODO(13063): This test suffers from non-ideal testing random quantization, because we always use range <0,1>. - # We should update (decrease atol) when the Conv/Linear + Activation fuse at quantization is in place. - min_val, max_val = activation_range - model = Conv2dWithActivation( - activation=torch.nn.Hardtanh(min_val=min_val, max_val=max_val, inplace=inplace), - in_channels=input_shape[1], + @pytest.mark.parametrize( + "activation_range", + [ + (0.0, 6.0), + (-1.0, 1), + (0, 1), + (0.0, float("inf")), + ], + ) + @pytest.mark.parametrize( + "inplace", [True, False], ids=lambda ip: "Inplace" if ip else "Not inplace" ) + def test__hardtanh__mappable_to_relu__after_conv( + self, + mocker, + request, + activation_range: tuple[float, float], + inplace: bool, + ): + input_shape = (1, 3, 4, 5) + model = Conv2dWithActivation( + activation=torch.nn.Hardtanh(*activation_range, inplace), + in_channels=input_shape[1], + out_channels=2, + ) - converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + self.assert_delegated( + model, + input_shape, + mocker, + request, + expected_delegated_ops={HardTanh: 1, Convolution: 1}, + ) + + @pytest.mark.parametrize( + "activation_range", + [ + (-1, 3), + (2.27, 3.14), + (-0.1, 0), + (float("-inf"), 1.23), + ], + ) + def test__hardtanh__not_mappable_to_relu( + self, + mocker, + request, + activation_range: tuple[float, float], + ): + input_shape = (23,) + model = HardTanhModule(*activation_range) - quantized_program = to_quantized_edge_program( - model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False - ).exported_program() + self.assert_delegated(model, input_shape, mocker, request) - tflite_flatbuffers_model, io_formats = converter_spy.spy_return - exported_program: ExportedProgram = converter_spy.call_args.args[1] + def test__unsupported_bounds(self): + # TODO ONLY WHEN ALONE IN PARTITION + input_shape = (2, 7, 2) + min_value, max_value = float("-inf"), float("inf") + model = HardTanhModule(min_value, max_value) - assert not graph_contains_any_of_ops(quantized_program.graph, [HardTanh, HardTanh_]) - assert graph_contains_any_of_ops(quantized_program.graph, [ExecutorchDelegateCall]) + delegated_ep = to_quantized_edge_program(model, input_shape).exported_program() - input_data = (np.random.random(input_shape) * 50).astype(np.int8) - convert_run_compare( - exported_program, - tfl_model=tflite_flatbuffers_model, - tflite_input_preprocess=ToChannelLastPreprocess(), - tflite_output_preprocess=ToChannelFirstPreprocess(), - input_data=input_data, - atol=2.0, + # Make sure the `hardtanh` was NOT delegated. + assert graph_contains_any_of_ops(delegated_ep.graph, [HardTanh]) + + @pytest.mark.parametrize( + "activation_range", + [ + pytest.param((None, float("inf")), id="min = None, max = inf"), + pytest.param((float("inf"), None), id="min = inf, max = None"), + ], + ) + def test__invalid_bounds(self, activation_range): + # PyTorch doesn't allow these cases, so we cannot test our handling of this edge case. + with pytest.raises(TypeError, match="'<=' not supported between instances of"): + _ = HardTanhModule(*activation_range) + + @pytest.mark.parametrize( + "min, max, expected_neutron_ir_ops", + [ + pytest.param( + 0.1, + 0.5, + [Ops.ADD, Ops.MAXIMUM, Ops.MINIMUM], + id="min = 0.1, max = 0.5 (Max/Min)", + ), + pytest.param( + 0.0, 1.0, [Ops.ADD, Ops.RELU_0_TO_1], id="min = 0, max = 1 (Relu0To1)" + ), + pytest.param( + -1.0, + 1.0, + [Ops.ADD, Ops.RELU_N1_TO_1], + id="min = -1, max = 1 (ReluN1To1)", + ), + pytest.param( + 0.0, + float("inf"), + [Ops.ADD, Ops.RELU], + id="min = 0, max = infinity (Relu)", + ), + pytest.param( + 0, + 1.0, + [Ops.ADD, Ops.RELU_0_TO_1], + id="min = 0, max = 1 (Relu0To1)", + ), + pytest.param( + 0, + 6.0, + [Ops.ADD, Ops.RELU6], + id="min = 0, max = 6 (Relu6)", + ), + ], ) + def test_convert_clamp__relu_vs_maxmin( + self, mocker, min, max, expected_neutron_ir_ops + ): + input_shape = (23,) + model = AddHardTanhModule(min, max) + + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + neutron_ir_spy = mocker.spy(AtenModelBuilderDirector, "finish") + + delegated_ep = to_quantized_edge_program( + model, + input_shape, + ).exported_program() + + # Make sure the `clamp` was delegated. + assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall]) + assert not graph_contains_any_of_ops(delegated_ep.graph, [HardTanh]) + + intermediate_ep = converter_spy.call_args.args[1] + quant_node = list(intermediate_ep.graph.nodes)[-2] + dequant_node = list(intermediate_ep.graph.nodes)[-4] + neutron_ir_internal_ops = [ + op.builtin_code for op in neutron_ir_spy.spy_return.operator_codes.vector + ] + + assert graph_contains_any_of_ops(intermediate_ep.graph, [HardTanh]) + assert ( + len(neutron_ir_internal_ops) == len(expected_neutron_ir_ops) + 1 + ) # Transpose + assert all(op in neutron_ir_internal_ops for op in expected_neutron_ir_ops) + + if len(expected_neutron_ir_ops) == 3: + # Min/Max variant should have same input and output quantization + assert all( + q == dq for q, dq in zip(quant_node.args[1:], dequant_node.args[1:]) + ) + else: + assert not all( + q == dq for q, dq in zip(quant_node.args[1:], dequant_node.args[1:]) + )