diff --git a/backends/arm/public_api_manifests/api_manifest_running.toml b/backends/arm/public_api_manifests/api_manifest_running.toml index 2a263a594a5..431599fd2c1 100644 --- a/backends/arm/public_api_manifests/api_manifest_running.toml +++ b/backends/arm/public_api_manifests/api_manifest_running.toml @@ -62,7 +62,7 @@ signature = "EthosUPartitioner.register_custom_partition_op(self, op: torch._ops [python.EthosUQuantizer] kind = "class" -signature = "EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'" +signature = "EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'" [python.EthosUQuantizer.annotate] kind = "function" @@ -150,7 +150,7 @@ signature = "VgfPartitioner.register_custom_partition_op(self, op: torch._ops.Op [python.VgfQuantizer] kind = "class" -signature = "VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'" +signature = "VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'" [python.VgfQuantizer.annotate] kind = "function" diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py index 0080d77ab69..1668629507b 100644 --- a/backends/arm/quantizer/arm_quantizer.py +++ b/backends/arm/quantizer/arm_quantizer.py @@ -493,21 +493,23 @@ class TOSAQuantizer(Quantizer): """Manage quantization annotations for TOSA-compatible backends. .. warning:: - Setting ``use_composable_quantizer=True`` enables an experimental API - surface that may change without notice. + The composable quantizer is now the default implementation. Setting + ``use_composable_quantizer=False`` is deprecated and will be removed in + two minor releases. """ def __init__( self, compile_spec_or_tosa_spec, - use_composable_quantizer: bool = False, + use_composable_quantizer: bool = True, ) -> None: """Create a TOSA quantizer from a TOSA spec or Arm compile spec. .. warning:: - Setting ``use_composable_quantizer=True`` enables an experimental - API surface that may change without notice. + The composable quantizer is now the default implementation. + Setting ``use_composable_quantizer=False`` is deprecated and will + be removed in two minor releases. """ self.use_composable_quantizer = use_composable_quantizer @@ -519,10 +521,45 @@ def __init__( self.quantizer = _TOSAQuantizerV2(compile_spec_or_tosa_spec) else: logger.info( - "Using default quantizer in the arm backend. This quantizer is planned to be replaced by the composable quantizer implementation in the future, see https://github.com/pytorch/executorch/issues/17701" + "Using deprecated legacy quantizer implementation in the arm backend. Setting use_composable_quantizer=False will be removed in two minor releases. See https://github.com/pytorch/executorch/issues/17701" ) self.quantizer = _TOSAQuantizerV1(compile_spec_or_tosa_spec) + @staticmethod + def _validate_optional_quantization_config( + config_name: str, value: object, value_description: str = "value" + ) -> None: + if value is not None and not isinstance(value, QuantizationConfig): + raise TypeError( + f"{config_name} {value_description} must be " + "QuantizationConfig or None, " + f"got {type(value).__name__}." + ) + + @staticmethod + def _validate_config_dict( + config_name: str, + value: object, + is_valid_key: Callable[[object], bool], + key_description: str, + ) -> Dict[Any, Optional[QuantizationConfig]]: + if not isinstance(value, dict): + raise TypeError( + f"{config_name} must be a dict, got {type(value).__name__}." + ) + + for key, quantization_config in value.items(): + if not is_valid_key(key): + raise TypeError( + f"{config_name} keys must be {key_description}, " + f"got {type(key).__name__}." + ) + TOSAQuantizer._validate_optional_quantization_config( + config_name, quantization_config, "values" + ) + + return value + @property def tosa_spec(self): return self.quantizer.tosa_spec @@ -537,12 +574,11 @@ def global_config(self): @global_config.setter def global_config(self, value: Optional[QuantizationConfig]) -> None: + self._validate_optional_quantization_config("global_config", value) if isinstance(self.quantizer, _TOSAQuantizerV1): self.quantizer.global_config = value else: - raise NotImplementedError( - "Composable quantizer does not allow setting global_config directly. Please use set_global() instead." - ) + self.quantizer.set_global(value) @property def io_config(self): @@ -555,12 +591,12 @@ def io_config(self): @io_config.setter def io_config(self, value: Optional[QuantizationConfig]) -> None: + self._validate_optional_quantization_config("io_config", value) if isinstance(self.quantizer, _TOSAQuantizerV1): self.quantizer.io_config = value else: - raise NotImplementedError( - "Composable quantizer does not allow setting io_config directly. Please use set_io() instead." - ) + self.quantizer.clear_io_config() + self.quantizer.set_io(value) @property def module_type_config(self): @@ -575,12 +611,18 @@ def module_type_config(self): def module_type_config( self, value: Dict[Callable, Optional[QuantizationConfig]] ) -> None: + module_type_config = self._validate_config_dict( + "module_type_config", + value, + callable, + "callable", + ) if isinstance(self.quantizer, _TOSAQuantizerV1): - self.quantizer.module_type_config = value + self.quantizer.module_type_config = module_type_config else: - raise NotImplementedError( - "Composable quantizer does not allow setting module_type_config directly. Please use set_module_type() instead." - ) + self.quantizer.clear_module_type_config() + for module_type, quantization_config in module_type_config.items(): + self.quantizer.set_module_type(module_type, quantization_config) @property def module_name_config(self): @@ -595,12 +637,18 @@ def module_name_config(self): def module_name_config( self, value: Dict[str, Optional[QuantizationConfig]] ) -> None: + module_name_config = self._validate_config_dict( + "module_name_config", + value, + lambda key: isinstance(key, str), + "str", + ) if isinstance(self.quantizer, _TOSAQuantizerV1): - self.quantizer.module_name_config = value + self.quantizer.module_name_config = module_name_config else: - raise NotImplementedError( - "Composable quantizer does not allow setting module_name_config directly. Please use set_module_name() instead." - ) + self.quantizer.clear_module_name_config() + for module_name, quantization_config in module_name_config.items(): + self.quantizer.set_module_name(module_name, quantization_config) def set_global( self, quantization_config: Optional[QuantizationConfig] @@ -1124,6 +1172,30 @@ def quantizers(self, value: List[Quantizer]) -> None: """ self._quantizers = value + def _remove_quantizers_by_node_finder_type( + self, node_finder_types: type[NodeFinder] | tuple[type[NodeFinder], ...] + ) -> None: + self._quantizers = [ + quantizer + for quantizer in self._quantizers + if not ( + isinstance(quantizer, PatternQuantizer) + and isinstance(quantizer.node_finder, node_finder_types) + ) + ] + + def clear_module_type_config(self) -> _TOSAQuantizerV2: + self._remove_quantizers_by_node_finder_type(ModuleTypeNodeFinder) + return self + + def clear_module_name_config(self) -> _TOSAQuantizerV2: + self._remove_quantizers_by_node_finder_type(ModuleNameNodeFinder) + return self + + def clear_io_config(self) -> _TOSAQuantizerV2: + self._remove_quantizers_by_node_finder_type((InputNodeFinder, OutputNodeFinder)) + return self + def annotate(self, model): reporter = QuantizerReporter(self.quantizers, "FINAL QUANTIZATION REPORT") model = super().annotate(model) @@ -1277,20 +1349,25 @@ class EthosUQuantizer(TOSAQuantizer): """Quantizer supported by the Arm Ethos-U backend. .. warning:: - Setting ``use_composable_quantizer=True`` enables an experimental API - surface that may change without notice. + The composable quantizer is now the default implementation. Setting + ``use_composable_quantizer=False`` is deprecated and will be removed in + two minor releases. Args: compile_spec (EthosUCompileSpec): Backend compile specification for Ethos-U targets. - use_composable_quantizer (bool): Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details. + use_composable_quantizer (bool): Whether to use the composable + quantizer implementation. Setting this to ``False`` is deprecated + and will be removed in two minor releases. See + [issue #17701](https://github.com/pytorch/executorch/issues/17701) + for details. """ def __init__( self, compile_spec: EthosUCompileSpec, - use_composable_quantizer: bool = False, + use_composable_quantizer: bool = True, ) -> None: super().__init__(compile_spec, use_composable_quantizer) @@ -1299,19 +1376,24 @@ class VgfQuantizer(TOSAQuantizer): """Quantizer supported by the Arm Vgf backend. .. warning:: - Setting ``use_composable_quantizer=True`` enables an experimental API - surface that may change without notice. + The composable quantizer is now the default implementation. Setting + ``use_composable_quantizer=False`` is deprecated and will be removed in + two minor releases. Args: compile_spec (VgfCompileSpec): Backend compile specification for Vgf targets. - use_composable_quantizer (bool): Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details. + use_composable_quantizer (bool): Whether to use the composable + quantizer implementation. Setting this to ``False`` is deprecated + and will be removed in two minor releases. See + [issue #17701](https://github.com/pytorch/executorch/issues/17701) + for details. """ def __init__( self, compile_spec: VgfCompileSpec, - use_composable_quantizer: bool = False, + use_composable_quantizer: bool = True, ) -> None: super().__init__(compile_spec, use_composable_quantizer) diff --git a/backends/arm/quantizer/arm_quantizer_utils.py b/backends/arm/quantizer/arm_quantizer_utils.py index a59ccff87b1..4173ada370b 100644 --- a/backends/arm/quantizer/arm_quantizer_utils.py +++ b/backends/arm/quantizer/arm_quantizer_utils.py @@ -623,6 +623,42 @@ def _annotate_while_with_additional_inputs( self.report_accept([root_node]) return True + def _should_skip_while_shared_qspec(self, node: Node) -> bool: + return node.target == torch.ops.higher_order.while_loop and bool( + node.meta.get("additional_inputs") + ) + + def _annotate_while_with_additional_inputs( + self, + root_node: Node, + adjacent_qspecs: list[Any], + ) -> bool: + if not self._should_skip_while_shared_qspec(root_node): + return False + if len(adjacent_qspecs) == 0: + self.report_reject( + [root_node], + "Couldn't find any adjacent quantization spec to annotate while_loop.", + ) + return True + + input_qspec = adjacent_qspecs[0] + input_qspec_map: dict[Node, Optional[QuantizationSpec]] = { + n: input_qspec for n in self._get_input_nodes_with_float_output(root_node) + } + output_qspec: Optional[QuantizationSpec] = None + if len(self._get_user_nodes_with_float_input(root_node)) > 0: + output_qspec = input_qspec + + _mark_node_as_quantized( + root_node, + input_qspec_map, + output_qspec, + is_quantized=True, + ) + self.report_accept([root_node]) + return True + def _annotate_shared_cluster(self, root_node: Node) -> None: if ( len(self._get_input_nodes_with_float_output(root_node)) == 0 diff --git a/backends/arm/test/misc/test_quant_custom_meta.py b/backends/arm/test/misc/test_quant_custom_meta.py index cd9964f4511..f64b8067098 100644 --- a/backends/arm/test/misc/test_quant_custom_meta.py +++ b/backends/arm/test/misc/test_quant_custom_meta.py @@ -105,5 +105,6 @@ def test_quantized_to_float_transition_tosa_INT_FP(fp_extension: bool): ) pipeline.quantizer.set_module_type(torch.nn.Sigmoid, None) # type: ignore pipeline.quantizer.set_module_type(torch.nn.Conv1d, None) # type: ignore + pipeline.quantizer.set_io(None) # type: ignore pipeline.run() diff --git a/backends/arm/test/misc/test_shared_qspecs.py b/backends/arm/test/misc/test_shared_qspecs.py index de07bd5f6c2..93129633418 100644 --- a/backends/arm/test/misc/test_shared_qspecs.py +++ b/backends/arm/test/misc/test_shared_qspecs.py @@ -87,8 +87,8 @@ class SharedQspecMulipleClusters(torch.nn.Module): "quantized_decomposed.dequantize_per_tensor.default": {None: 8}, "aten.add.Tensor": {_INT8_QSPEC: 2}, } - inputs_qspecs = {None: 1} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 1} + outputs_qspecs = {_INT8_QSPEC: 1} quant_params = { "quantized_decomposed.dequantize_per_tensor.default": { (0.015678614, 0, -128, 127, torch.int8): 2, @@ -122,8 +122,8 @@ class SharedQspecInputForkNonShared(torch.nn.Module): "quantized_decomposed.quantize_per_tensor.default": {None: 4}, "quantized_decomposed.dequantize_per_tensor.default": {None: 4}, } - inputs_qspecs = {None: 2} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 2} + outputs_qspecs = {_INT8_QSPEC: 1} quant_params = { "quantized_decomposed.dequantize_per_tensor.default": { (0.015678614, -64, -128, 127, torch.int8): 3, @@ -149,8 +149,8 @@ class SharedQspecInputForkShared(torch.nn.Module): "quantized_decomposed.quantize_per_tensor.default": {None: 5}, "quantized_decomposed.dequantize_per_tensor.default": {None: 5}, } - inputs_qspecs = {None: 2} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 2} + outputs_qspecs = {_INT8_QSPEC: 1} quant_params = { "quantized_decomposed.dequantize_per_tensor.default": { (0.015678614, -64, -128, 127, torch.int8): 2, @@ -178,8 +178,8 @@ class SharedQspecInputForkXShared(torch.nn.Module): "quantized_decomposed.quantize_per_tensor.default": {None: 4}, "quantized_decomposed.dequantize_per_tensor.default": {None: 4}, } - inputs_qspecs = {None: 2} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 2} + outputs_qspecs = {_INT8_QSPEC: 1} quant_params = { "quantized_decomposed.dequantize_per_tensor.default": { (0.015678614, -64, -128, 127, torch.int8): 2, @@ -206,8 +206,8 @@ class SharedQspecInputForkYShared(torch.nn.Module): "quantized_decomposed.quantize_per_tensor.default": {None: 5}, "quantized_decomposed.dequantize_per_tensor.default": {None: 5}, } - inputs_qspecs = {None: 2} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 2} + outputs_qspecs = {_INT8_QSPEC: 1} quant_params = { "quantized_decomposed.dequantize_per_tensor.default": { (0.015678614, -64, -128, 127, torch.int8): 2, @@ -234,8 +234,8 @@ class SharedQspecInputForkXConstant(torch.nn.Module): "quantized_decomposed.quantize_per_tensor.default": {None: 2}, "quantized_decomposed.dequantize_per_tensor.default": {None: 3}, } - inputs_qspecs = {None: 1} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 1} + outputs_qspecs = {_INT8_QSPEC: 1} quant_params = { "quantized_decomposed.dequantize_per_tensor.default": { (0.015678614, 0, -128, 127, torch.int8): 2, @@ -260,8 +260,8 @@ class SharedQspecInputForkYConstant(torch.nn.Module): "quantized_decomposed.quantize_per_tensor.default": {None: 2}, "quantized_decomposed.dequantize_per_tensor.default": {None: 3}, } - inputs_qspecs = {None: 1} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 1} + outputs_qspecs = {_INT8_QSPEC: 1} quant_params = { "quantized_decomposed.dequantize_per_tensor.default": { (0.015678614, 0, -128, 127, torch.int8): 1, @@ -287,8 +287,8 @@ class SharedQspecOutputForkNonShared(torch.nn.Module): "quantized_decomposed.dequantize_per_tensor.default": {None: 4}, "aten.add.Tensor": {_INT8_QSPEC: 1}, } - inputs_qspecs = {None: 1} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 1} + outputs_qspecs = {_INT8_QSPEC: 2} quant_params = { "quantized_decomposed.dequantize_per_tensor.default": { (0.015678614, 0, -128, 127, torch.int8): 3, @@ -315,8 +315,8 @@ class SharedQspecOutputForkShared(torch.nn.Module): "quantized_decomposed.quantize_per_tensor.default": {None: 4}, "quantized_decomposed.dequantize_per_tensor.default": {None: 6}, } - inputs_qspecs = {None: 1} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 1} + outputs_qspecs = {_INT8_QSPEC: 3} quant_params = { "quantized_decomposed.dequantize_per_tensor.default": { (0.015678614, 0, -128, 127, torch.int8): 6, @@ -341,10 +341,10 @@ class SharedQspecManyForks(torch.nn.Module): qspecs = { "quantized_decomposed.quantize_per_tensor.default": {None: 6}, "quantized_decomposed.dequantize_per_tensor.default": {None: 9}, - "aten.t.default": {None: 1}, + "aten.t.default": {_INT8_QSPEC: 1}, } - inputs_qspecs = {None: 1} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 1} + outputs_qspecs = {_INT8_QSPEC: 1} quant_params = { "quantized_decomposed.dequantize_per_tensor.default": { (0.086232387, 104, -128, 127, torch.int8): 9, @@ -372,8 +372,8 @@ class SharedQspecSurroundedQuantizedOp(torch.nn.Module): "quantized_decomposed.dequantize_per_tensor.default": {None: 5}, "aten.add.Tensor": {_INT8_QSPEC: 1}, } - inputs_qspecs = {None: 1} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 1} + outputs_qspecs = {_INT8_QSPEC: 1} quant_params = { "quantized_decomposed.dequantize_per_tensor.default": { (0.509554982, 123, -128, 127, torch.int8): 3, @@ -403,8 +403,8 @@ class SharedQspecSurroundedQuantizedOpConstant(torch.nn.Module): "aten.ones.default": {_INT8_QSPEC: 1}, "aten.add.Tensor": {_INT8_QSPEC: 1}, } - inputs_qspecs = {None: 1} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 1} + outputs_qspecs = {_INT8_QSPEC: 1} quant_params = { "quantized_decomposed.dequantize_per_tensor.default": { (0.003921569, -128, -128, 127, torch.int8): 1, @@ -429,18 +429,22 @@ class SharedQspecSub(torch.nn.Module): """A shared qspec node with float input.""" qspecs = { - "quantized_decomposed.quantize_per_tensor.default": {None: 2}, - "quantized_decomposed.dequantize_per_tensor.default": {None: 2}, + "quantized_decomposed.quantize_per_tensor.default": {None: 4}, + "quantized_decomposed.dequantize_per_tensor.default": {None: 4}, "aten.sub.Tensor": {None: 1}, } - inputs_qspecs = {None: 2} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 2} + outputs_qspecs = {_INT8_QSPEC: 1} quant_params = { "quantized_decomposed.dequantize_per_tensor.default": { + (0.003919654, -128, -128, 127, torch.int8): 1, (0.035276882, -128, -128, 127, torch.int8): 2, + (0.03919654, -128, -128, 127, torch.int8): 1, }, "quantized_decomposed.quantize_per_tensor.default": { + (0.003919654, -128, -128, 127, torch.int8): 1, (0.035276882, -128, -128, 127, torch.int8): 2, + (0.03919654, -128, -128, 127, torch.int8): 1, }, } @@ -462,8 +466,8 @@ class SharedQspecCompetingQspecs(torch.nn.Module): "quantized_decomposed.dequantize_per_tensor.default": {None: 4}, "aten.conv2d.default": {_INT8_QSPEC: 1}, } - inputs_qspecs = {None: 1} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 1} + outputs_qspecs = {_INT8_QSPEC: 1} quant_params = { "quantized_decomposed.dequantize_per_channel.default": { (0, -2147483647, 2147483647, torch.int32): 1, @@ -502,20 +506,16 @@ class SharedQspecNoQspecs(torch.nn.Module): "quantized_decomposed.dequantize_per_tensor.default": {None: 2}, "aten.sub.Tensor": {None: 2}, } - inputs_qspecs = {None: 1} - outputs_qspecs = {None: 1} + inputs_qspecs = {_INT8_QSPEC: 1} + outputs_qspecs = {_INT8_QSPEC: 1} quant_params = { "quantized_decomposed.dequantize_per_tensor.default": { - ( - 1.5259e-05, - -128, - -128, - 127, - torch.int8, - ): 2, # The network always has 0 output -> very small scale. + (1.5259e-05, -128, -128, 127, torch.int8): 1, + (0.03919654, -128, -128, 127, torch.int8): 1, }, "quantized_decomposed.quantize_per_tensor.default": { - (1.5259e-05, -128, -128, 127, torch.int8): 2, + (1.5259e-05, -128, -128, 127, torch.int8): 1, + (0.03919654, -128, -128, 127, torch.int8): 1, }, } @@ -542,21 +542,19 @@ class MixedMaximumInt8Int16(torch.nn.Module): """A shared qspec node with int16/int8 inputs.""" qspecs = { - "quantized_decomposed.quantize_per_tensor.default": {None: 6}, - "quantized_decomposed.dequantize_per_tensor.default": {None: 6}, + "quantized_decomposed.quantize_per_tensor.default": {None: 4}, + "quantized_decomposed.dequantize_per_tensor.default": {None: 5}, } - input_qspecs = {None: 1} - output_qspecs = {None: 1} + input_qspecs = {_INT8_QSPEC: 1} + output_qspecs = {_INT8_QSPEC: 1} quant_params = { "quantized_decomposed.quantize_per_tensor.default": { - (0.007839307, -128, -128, 127, torch.int8): 2, - (0.015678614, 0, -128, 127, torch.int8): 2, - (0.000244141, 0, -32767, 32767, torch.int16): 2, + (0.007839307, -128, -128, 127, torch.int8): 1, + (0.015678614, 0, -128, 127, torch.int8): 3, }, "quantized_decomposed.dequantize_per_tensor.default": { - (0.007839307, -128, -128, 127, torch.int8): 2, - (0.015678614, 0, -128, 127, torch.int8): 2, - (0.000244141, 0, -32767, 32767, torch.int16): 2, + (0.007839307, -128, -128, 127, torch.int8): 1, + (0.015678614, 0, -128, 127, torch.int8): 4, }, } diff --git a/backends/arm/test/ops/test_to_copy.py b/backends/arm/test/ops/test_to_copy.py index 6718fedea04..e0d910bd069 100644 --- a/backends/arm/test/ops/test_to_copy.py +++ b/backends/arm/test/ops/test_to_copy.py @@ -330,18 +330,14 @@ def test_to_vgf_quant(test_data: Tuple): ), } -redundant_xfails_FP = { +redundant_xfails = { "rand_int8_int8": "Tracing graph with quantized input is not supported.", "rand_int16_int16": "Tracing graph with quantized input is not supported.", } -redundant_xfails_INT = redundant_xfails_FP | { - "rand_fp16_fp16": "FP16 is not supported", -} - @common.parametrize( - "test_data", _TO_COPY_TEST_DATA_REDUNDANT_CAST, xfails=redundant_xfails_FP + "test_data", _TO_COPY_TEST_DATA_REDUNDANT_CAST, xfails=redundant_xfails ) def test_to_tosa_FP_REDUNDANT_CAST(test_data: Tuple): test_tensor, new_dtype = test_data() @@ -356,7 +352,7 @@ def test_to_tosa_FP_REDUNDANT_CAST(test_data: Tuple): @common.parametrize( - "test_data", _TO_COPY_TEST_DATA_REDUNDANT_CAST, xfails=redundant_xfails_INT + "test_data", _TO_COPY_TEST_DATA_REDUNDANT_CAST, xfails=redundant_xfails ) def test_to_tosa_INT_REDUNDANT_CAST(test_data: Tuple): test_tensor, new_dtype = test_data() diff --git a/backends/arm/test/ops/test_transpose_conv2d.py b/backends/arm/test/ops/test_transpose_conv2d.py index d34679f4bcb..0c627575162 100644 --- a/backends/arm/test/ops/test_transpose_conv2d.py +++ b/backends/arm/test/ops/test_transpose_conv2d.py @@ -7,14 +7,14 @@ import conftest import torch - -from executorch.backends.arm.quantizer import QuantizationConfig from executorch.backends.arm.quantizer.arm_quantizer import ( get_symmetric_a16w8_quantization_config, get_symmetric_a8w4_quantization_config, get_symmetric_quantization_config, TOSAQuantizer, ) + +from executorch.backends.arm.quantizer.quantization_config import TOSAQuantizationConfig from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.test_pipeline import ( EthosU55PipelineINT, @@ -361,7 +361,7 @@ def test_conv_transpose2d_tosa_INT_qat_axis1_uses_non_fused_fake_quant(test_data ), ) quantizer.set_global( - QuantizationConfig( + TOSAQuantizationConfig( input_activation=activation_qspec, output_activation=activation_qspec, weight=weight_qspec, @@ -400,7 +400,7 @@ def test_conv_transpose2d_tosa_INT_grouped_qat_axis0_keeps_fused_fake_quant(test ), ) quantizer.set_global( - QuantizationConfig( + TOSAQuantizationConfig( input_activation=activation_qspec, output_activation=activation_qspec, weight=weight_qspec, @@ -439,7 +439,7 @@ def test_conv_transpose2d_tosa_INT_ptq_observer_updates_axis(test_data): ), ) quantizer.set_global( - QuantizationConfig( + TOSAQuantizationConfig( input_activation=activation_qspec, output_activation=activation_qspec, weight=weight_qspec, @@ -477,7 +477,7 @@ def test_conv_transpose2d_tosa_INT_qat_correct_qspec_wrong_ctor_axis(test_data): ), ) quantizer.set_global( - QuantizationConfig( + TOSAQuantizationConfig( input_activation=activation_qspec, output_activation=activation_qspec, weight=weight_qspec, diff --git a/backends/arm/test/ops/test_unary_combos.py b/backends/arm/test/ops/test_unary_combos.py index bc4bb0b39d9..2ecd04b9c79 100644 --- a/backends/arm/test/ops/test_unary_combos.py +++ b/backends/arm/test/ops/test_unary_combos.py @@ -104,9 +104,7 @@ def test_add_tensor_tosa_INT_combos(model_cls): @common.XfailIfNoCorstone300 -@common.parametrize( - "model_cls", MODEL_DATA, xfails={"NegAdd": "Numerical failure. MLBEDSW-11581"} -) +@common.parametrize("model_cls", MODEL_DATA) def test_add_tensor_u55_INT_combos(model_cls): m, inputs, exir = _build(model_cls) p = EthosU55PipelineINT[Tensor1]( diff --git a/backends/arm/test/ops/test_while.py b/backends/arm/test/ops/test_while.py index b5cab047a50..51b56661b50 100644 --- a/backends/arm/test/ops/test_while.py +++ b/backends/arm/test/ops/test_while.py @@ -8,6 +8,8 @@ import torch import torch.fx +from executorch.backends.arm.quantizer import get_symmetric_quantization_config +from executorch.backends.arm.quantizer.arm_quantizer import _TOSAQuantizerV2 from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.arm_tester import ArmTester from executorch.backends.arm.test.tester.test_pipeline import ( @@ -228,6 +230,28 @@ def test_while_loop_tosa_INT(case: Callable[[], Tuple[torch.nn.Module, Tuple]]): pipeline.run() +def test_while_loop_tosa_INT_composable_large_threshold(): + module, example_inputs = test_cases["large_threshold"]() + pipeline = TosaPipelineINT[tuple]( + module, + example_inputs, + "torch.ops.higher_order.while_loop", + tosa_extensions=["cf"], + ) + + composable_quantizer = _TOSAQuantizerV2(pipeline.tester.compile_spec) + composable_quantizer.set_global(get_symmetric_quantization_config()) + pipeline.quantizer.quantizer = composable_quantizer + + pipeline.add_stage_after( + "to_edge_transform_and_lower", + ArmTester.check_not, + pipeline.tester, + ["torch.ops.higher_order.while_loop"], + ) + pipeline.run() + + @common.parametrize( "case", test_cases, diff --git a/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md b/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md index 68fe9d160aa..c2f7035c89c 100644 --- a/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md +++ b/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md @@ -16,18 +16,23 @@ The Arm Ethos-U delegate supports the following quantization schemes: ### Quantization API ```python -class EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None' +class EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None' ``` Quantizer supported by the Arm Ethos-U backend. .. warning:: - Setting ``use_composable_quantizer=True`` enables an experimental API - surface that may change without notice. + The composable quantizer is now the default implementation. Setting + ``use_composable_quantizer=False`` is deprecated and will be removed in + two minor releases. Args: - **compile_spec (EthosUCompileSpec)**: Backend compile specification for Ethos-U targets. -- **use_composable_quantizer (bool)**: Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details. +- **use_composable_quantizer (bool)**: Whether to use the composable + quantizer implementation. Setting this to ``False`` is deprecated + and will be removed in two minor releases. See + [issue #17701](https://github.com/pytorch/executorch/issues/17701) + for details. ```python def EthosUQuantizer.add_quantizer(self, quantizer: 'Quantizer') -> 'TOSAQuantizer': diff --git a/docs/source/backends/arm-vgf/arm-vgf-quantization.md b/docs/source/backends/arm-vgf/arm-vgf-quantization.md index 49ba41f74e1..2dc5b5631e6 100644 --- a/docs/source/backends/arm-vgf/arm-vgf-quantization.md +++ b/docs/source/backends/arm-vgf/arm-vgf-quantization.md @@ -35,18 +35,23 @@ setting using the `set_module_name` or `set_module_type` methods. ### Quantization API ```python -class VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None' +class VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None' ``` Quantizer supported by the Arm Vgf backend. .. warning:: - Setting ``use_composable_quantizer=True`` enables an experimental API - surface that may change without notice. + The composable quantizer is now the default implementation. Setting + ``use_composable_quantizer=False`` is deprecated and will be removed in + two minor releases. Args: - **compile_spec (VgfCompileSpec)**: Backend compile specification for Vgf targets. -- **use_composable_quantizer (bool)**: Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details. +- **use_composable_quantizer (bool)**: Whether to use the composable + quantizer implementation. Setting this to ``False`` is deprecated + and will be removed in two minor releases. See + [issue #17701](https://github.com/pytorch/executorch/issues/17701) + for details. ```python def VgfQuantizer.add_quantizer(self, quantizer: 'Quantizer') -> 'TOSAQuantizer': diff --git a/examples/arm/quantizer_tutorial.ipynb b/examples/arm/quantizer_tutorial.ipynb index 76979316002..25b99dbd4b5 100644 --- a/examples/arm/quantizer_tutorial.ipynb +++ b/examples/arm/quantizer_tutorial.ipynb @@ -16,13 +16,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# WIP: TOSA/EthosU/VgfQuantizer composable quantizer tutorial\n", + "# TOSA/EthosU/VgfQuantizer composable quantizer tutorial\n", "\n", "This is an in-depth tutorial of the new `TOSA/EthosU/VgfQuantizer` API. While the `TOSAQuantizer` is used in the example, both the\n", "`EthosUQuantizer` and `VgfQuantizer` directly inherit from this base class. \n", "\n", - "Note that the main API and functionality remains largely the same to allow for a drop-in replacement, but the underlying framework is different - as will be explained. **Both the quantizer and this tutorial are currently experimental and may change without prior notice.** Refer to https://github.com/pytorch/executorch/issues/17701 for questions and feedback.\n", - "\n", "Before you begin:\n", "1. (In a clean virtual environment with a compatible Python version) Install executorch using `./install_executorch.sh`\n", "2. Install Arm TOSA dependencies using `examples/arm/setup.sh --disable-ethos-u-deps`\n",