Skip to content
Open
4 changes: 2 additions & 2 deletions backends/arm/public_api_manifests/api_manifest_running.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ signature = "EthosUPartitioner.register_custom_partition_op(self, op: torch._ops

[python.EthosUQuantizer]
kind = "class"
signature = "EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'"
signature = "EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'"

[python.EthosUQuantizer.annotate]
kind = "function"
Expand Down Expand Up @@ -150,7 +150,7 @@ signature = "VgfPartitioner.register_custom_partition_op(self, op: torch._ops.Op

[python.VgfQuantizer]
kind = "class"
signature = "VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'"
signature = "VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'"

[python.VgfQuantizer.annotate]
kind = "function"
Expand Down
138 changes: 110 additions & 28 deletions backends/arm/quantizer/arm_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,21 +493,23 @@ class TOSAQuantizer(Quantizer):
"""Manage quantization annotations for TOSA-compatible backends.

.. warning::
Setting ``use_composable_quantizer=True`` enables an experimental API
surface that may change without notice.
The composable quantizer is now the default implementation. Setting
``use_composable_quantizer=False`` is deprecated and will be removed in
two minor releases.

"""

def __init__(
self,
compile_spec_or_tosa_spec,
use_composable_quantizer: bool = False,
use_composable_quantizer: bool = True,
) -> None:
"""Create a TOSA quantizer from a TOSA spec or Arm compile spec.
Comment thread
AdrianLundell marked this conversation as resolved.

.. warning::
Setting ``use_composable_quantizer=True`` enables an experimental
API surface that may change without notice.
The composable quantizer is now the default implementation.
Setting ``use_composable_quantizer=False`` is deprecated and will
be removed in two minor releases.

"""
self.use_composable_quantizer = use_composable_quantizer
Expand All @@ -519,10 +521,45 @@ def __init__(
self.quantizer = _TOSAQuantizerV2(compile_spec_or_tosa_spec)
else:
logger.info(
"Using default quantizer in the arm backend. This quantizer is planned to be replaced by the composable quantizer implementation in the future, see https://github.com/pytorch/executorch/issues/17701"
"Using deprecated legacy quantizer implementation in the arm backend. Setting use_composable_quantizer=False will be removed in two minor releases. See https://github.com/pytorch/executorch/issues/17701"
)
self.quantizer = _TOSAQuantizerV1(compile_spec_or_tosa_spec)

@staticmethod
def _validate_optional_quantization_config(
config_name: str, value: object, value_description: str = "value"
) -> None:
if value is not None and not isinstance(value, QuantizationConfig):
raise TypeError(
f"{config_name} {value_description} must be "
"QuantizationConfig or None, "
f"got {type(value).__name__}."
)

@staticmethod
def _validate_config_dict(
config_name: str,
value: object,
is_valid_key: Callable[[object], bool],
key_description: str,
) -> Dict[Any, Optional[QuantizationConfig]]:
if not isinstance(value, dict):
raise TypeError(
f"{config_name} must be a dict, got {type(value).__name__}."
)

for key, quantization_config in value.items():
if not is_valid_key(key):
raise TypeError(
f"{config_name} keys must be {key_description}, "
f"got {type(key).__name__}."
)
TOSAQuantizer._validate_optional_quantization_config(
config_name, quantization_config, "values"
)

return value

@property
def tosa_spec(self):
return self.quantizer.tosa_spec
Expand All @@ -537,12 +574,11 @@ def global_config(self):

@global_config.setter
def global_config(self, value: Optional[QuantizationConfig]) -> None:
self._validate_optional_quantization_config("global_config", value)
if isinstance(self.quantizer, _TOSAQuantizerV1):
self.quantizer.global_config = value
else:
raise NotImplementedError(
"Composable quantizer does not allow setting global_config directly. Please use set_global() instead."
)
self.quantizer.set_global(value)

@property
def io_config(self):
Expand All @@ -555,12 +591,12 @@ def io_config(self):

@io_config.setter
def io_config(self, value: Optional[QuantizationConfig]) -> None:
self._validate_optional_quantization_config("io_config", value)
if isinstance(self.quantizer, _TOSAQuantizerV1):
self.quantizer.io_config = value
else:
raise NotImplementedError(
"Composable quantizer does not allow setting io_config directly. Please use set_io() instead."
)
self.quantizer.clear_io_config()
self.quantizer.set_io(value)

@property
def module_type_config(self):
Expand All @@ -575,12 +611,18 @@ def module_type_config(self):
def module_type_config(
self, value: Dict[Callable, Optional[QuantizationConfig]]
) -> None:
module_type_config = self._validate_config_dict(
"module_type_config",
value,
callable,
"callable",
)
if isinstance(self.quantizer, _TOSAQuantizerV1):
self.quantizer.module_type_config = value
self.quantizer.module_type_config = module_type_config
else:
raise NotImplementedError(
"Composable quantizer does not allow setting module_type_config directly. Please use set_module_type() instead."
)
self.quantizer.clear_module_type_config()
for module_type, quantization_config in module_type_config.items():
self.quantizer.set_module_type(module_type, quantization_config)

@property
def module_name_config(self):
Expand All @@ -595,12 +637,18 @@ def module_name_config(self):
def module_name_config(
self, value: Dict[str, Optional[QuantizationConfig]]
) -> None:
module_name_config = self._validate_config_dict(
"module_name_config",
value,
lambda key: isinstance(key, str),
"str",
)
if isinstance(self.quantizer, _TOSAQuantizerV1):
self.quantizer.module_name_config = value
self.quantizer.module_name_config = module_name_config
else:
raise NotImplementedError(
"Composable quantizer does not allow setting module_name_config directly. Please use set_module_name() instead."
)
self.quantizer.clear_module_name_config()
for module_name, quantization_config in module_name_config.items():
self.quantizer.set_module_name(module_name, quantization_config)

def set_global(
self, quantization_config: Optional[QuantizationConfig]
Expand Down Expand Up @@ -1124,6 +1172,30 @@ def quantizers(self, value: List[Quantizer]) -> None:
"""
self._quantizers = value

def _remove_quantizers_by_node_finder_type(
self, node_finder_types: type[NodeFinder] | tuple[type[NodeFinder], ...]
) -> None:
self._quantizers = [
quantizer
for quantizer in self._quantizers
if not (
isinstance(quantizer, PatternQuantizer)
and isinstance(quantizer.node_finder, node_finder_types)
)
]

def clear_module_type_config(self) -> _TOSAQuantizerV2:
self._remove_quantizers_by_node_finder_type(ModuleTypeNodeFinder)
return self

def clear_module_name_config(self) -> _TOSAQuantizerV2:
self._remove_quantizers_by_node_finder_type(ModuleNameNodeFinder)
return self

def clear_io_config(self) -> _TOSAQuantizerV2:
self._remove_quantizers_by_node_finder_type((InputNodeFinder, OutputNodeFinder))
return self

def annotate(self, model):
reporter = QuantizerReporter(self.quantizers, "FINAL QUANTIZATION REPORT")
model = super().annotate(model)
Expand Down Expand Up @@ -1277,20 +1349,25 @@ class EthosUQuantizer(TOSAQuantizer):
"""Quantizer supported by the Arm Ethos-U backend.

.. warning::
Setting ``use_composable_quantizer=True`` enables an experimental API
surface that may change without notice.
The composable quantizer is now the default implementation. Setting
``use_composable_quantizer=False`` is deprecated and will be removed in
two minor releases.

Args:
compile_spec (EthosUCompileSpec): Backend compile specification for
Ethos-U targets.
use_composable_quantizer (bool): Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
use_composable_quantizer (bool): Whether to use the composable
quantizer implementation. Setting this to ``False`` is deprecated
and will be removed in two minor releases. See
[issue #17701](https://github.com/pytorch/executorch/issues/17701)
for details.

"""

def __init__(
self,
compile_spec: EthosUCompileSpec,
use_composable_quantizer: bool = False,
use_composable_quantizer: bool = True,
) -> None:
super().__init__(compile_spec, use_composable_quantizer)

Expand All @@ -1299,19 +1376,24 @@ class VgfQuantizer(TOSAQuantizer):
"""Quantizer supported by the Arm Vgf backend.

.. warning::
Setting ``use_composable_quantizer=True`` enables an experimental API
surface that may change without notice.
The composable quantizer is now the default implementation. Setting
``use_composable_quantizer=False`` is deprecated and will be removed in
two minor releases.

Args:
compile_spec (VgfCompileSpec): Backend compile specification for Vgf
targets.
use_composable_quantizer (bool): Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
use_composable_quantizer (bool): Whether to use the composable
quantizer implementation. Setting this to ``False`` is deprecated
and will be removed in two minor releases. See
[issue #17701](https://github.com/pytorch/executorch/issues/17701)
for details.

"""

def __init__(
self,
compile_spec: VgfCompileSpec,
use_composable_quantizer: bool = False,
use_composable_quantizer: bool = True,
) -> None:
super().__init__(compile_spec, use_composable_quantizer)
36 changes: 36 additions & 0 deletions backends/arm/quantizer/arm_quantizer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,42 @@
self.report_accept([root_node])
return True

def _should_skip_while_shared_qspec(self, node: Node) -> bool:

Check warning on line 626 in backends/arm/quantizer/arm_quantizer_utils.py

View workflow job for this annotation

GitHub Actions / lintrunner

FLAKE8 F811

redefinition of unused '_should_skip_while_shared_qspec' from line 590 See https://www.flake8rules.com/rules/F811.html.

Check failure on line 626 in backends/arm/quantizer/arm_quantizer_utils.py

View workflow job for this annotation

GitHub Actions / lintrunner-mypy

MYPY no-redef

Name "_should_skip_while_shared_qspec" already defined on line 590 To disable, use ` # type: ignore[no-redef]`
return node.target == torch.ops.higher_order.while_loop and bool(
node.meta.get("additional_inputs")
)

def _annotate_while_with_additional_inputs(

Check warning on line 631 in backends/arm/quantizer/arm_quantizer_utils.py

View workflow job for this annotation

GitHub Actions / lintrunner

FLAKE8 F811

redefinition of unused '_annotate_while_with_additional_inputs' from line 595 See https://www.flake8rules.com/rules/F811.html.

Check failure on line 631 in backends/arm/quantizer/arm_quantizer_utils.py

View workflow job for this annotation

GitHub Actions / lintrunner-mypy

MYPY no-redef

Name "_annotate_while_with_additional_inputs" already defined on line 595 To disable, use ` # type: ignore[no-redef]`
self,
root_node: Node,
adjacent_qspecs: list[Any],
) -> bool:
if not self._should_skip_while_shared_qspec(root_node):
return False
if len(adjacent_qspecs) == 0:
self.report_reject(
[root_node],
"Couldn't find any adjacent quantization spec to annotate while_loop.",
)
return True

input_qspec = adjacent_qspecs[0]
input_qspec_map: dict[Node, Optional[QuantizationSpec]] = {
n: input_qspec for n in self._get_input_nodes_with_float_output(root_node)
}
output_qspec: Optional[QuantizationSpec] = None
if len(self._get_user_nodes_with_float_input(root_node)) > 0:
output_qspec = input_qspec

_mark_node_as_quantized(
root_node,
input_qspec_map,
output_qspec,
is_quantized=True,
)
self.report_accept([root_node])
return True

def _annotate_shared_cluster(self, root_node: Node) -> None:
if (
len(self._get_input_nodes_with_float_output(root_node)) == 0
Expand Down
1 change: 1 addition & 0 deletions backends/arm/test/misc/test_quant_custom_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,5 +105,6 @@ def test_quantized_to_float_transition_tosa_INT_FP(fp_extension: bool):
)
pipeline.quantizer.set_module_type(torch.nn.Sigmoid, None) # type: ignore
pipeline.quantizer.set_module_type(torch.nn.Conv1d, None) # type: ignore
pipeline.quantizer.set_io(None) # type: ignore

pipeline.run()
Loading
Loading