From f6ffc2c2392b8a13f0bebc03cdcadb8398132a9a Mon Sep 17 00:00:00 2001 From: Max Ren Date: Tue, 17 Jun 2025 11:16:13 -0700 Subject: [PATCH 1/2] [Quantized DeConv Support] Enable Quantized Transposed Convs with groups==1 Pull Request resolved: https://github.com/pytorch/executorch/pull/11730 Supporting Quantized Transposed Convs with Groups being 1. Previously, There was some added support for Quantized Transposed Convolutions but only when the channel axis is 1 and when the groups is 1. The current Quantizer didn't support this because it only allows quantizaing along the zero dim, which is generally the output channels. However for TransposedConvs, the dimension of the weights are: ``` [in_channels, out_channels/groups, h, w] ``` Since we want to keep quantization along the output channels, we now need to quantize along axis = 1. The reason we require groups to be one is because XNNPACK takes in filters of the dimension: ``` [out_channels, H, W, in_channels/groups] ``` Since we are quantizing along the output channels, in pytorch we expect to have out_channels/groups scales, but in xnnpack we have out_channels scales! Realistically we would need to support this with some affine quantization, where we provide a scale for every group, every out_channel. However for now, we just ensure the constraint where groups == 1. ghstack-source-id: 291033630 @exported-using-ghexport Differential Revision: [D76631781](https://our.internmc.facebook.com/intern/diff/D76631781/) --- .../quantizer/xnnpack_quantizer_utils.py | 28 +++- backends/xnnpack/test/ops/test_conv2d.py | 130 +++++------------- 2 files changed, 58 insertions(+), 100 deletions(-) diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py index 0dcfb4484ed..2ebf69da4f5 100644 --- a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py +++ b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py @@ -238,7 +238,19 @@ def _do_annotate_conv( weight = conv_node.args[1] assert isinstance(weight, Node) - input_qspec_map[weight] = get_weight_qspec(quantization_config) + weight_qspec = get_weight_qspec(quantization_config) + if is_conv_transpose: + # transposed convs per output channel quantization + weight_qspec = QuantizationSpec( + dtype=weight_qspec.dtype, + quant_min=weight_qspec.quant_min, + quant_max=weight_qspec.quant_max, + qscheme=weight_qspec.qscheme, + ch_axis=1, + is_dynamic=False, + observer_or_fake_quant_ctr=weight_qspec.observer_or_fake_quant_ctr, + ) + input_qspec_map[weight] = weight_qspec # Only annotate dynamically quantized conv if it's 2D and not depthwise if ( @@ -311,7 +323,19 @@ def _do_annotate_conv_relu( weight = conv_node.args[1] assert isinstance(weight, Node) - input_qspec_map[weight] = get_weight_qspec(quantization_config) + weight_qspec = get_weight_qspec(quantization_config) + if is_conv_transpose: + # transposed convs per output channel quantization + weight_qspec = QuantizationSpec( + dtype=weight_qspec.dtype, + quant_min=weight_qspec.quant_min, + quant_max=weight_qspec.quant_max, + qscheme=weight_qspec.qscheme, + ch_axis=1, + is_dynamic=False, + observer_or_fake_quant_ctr=weight_qspec.observer_or_fake_quant_ctr, + ) + input_qspec_map[weight] = weight_qspec # adding weight node to the partition as well partition = [relu_node, conv_node, conv_node.args[1]] diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py index 92bb03c907a..d838ef0ffe9 100644 --- a/backends/xnnpack/test/ops/test_conv2d.py +++ b/backends/xnnpack/test/ops/test_conv2d.py @@ -221,7 +221,6 @@ def _test( conv_count=1, dtype: torch.dtype = torch.float, check_quantized=True, - delegated=True, ): # pyre-fixme[29]: `Union[torch._tensor.Tensor, # torch.nn.modules.module.Module]` is not a function. @@ -240,29 +239,20 @@ def _test( (tester.export().check_count({op: conv_count}).to_edge_transform_and_lower()) - if delegated: - ( - tester.check_not( - ["executorch_exir_dialects_edge__ops_aten_convolution_default"] - ) - .check_not( - [ - "executorch_exir_dialects_edge__ops__native_batch_norm_legit_no_training_default" - ] - ) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - .run_method_and_compare_outputs(qtol=1) + ( + tester.check_not( + ["executorch_exir_dialects_edge__ops_aten_convolution_default"] ) - else: - # need quantize ops when ops are not delegated to xnnpack - if has_quantized_ops: - ( - tester.to_executorch() - .serialize() - .run_method_and_compare_outputs(qtol=1) - ) + .check_not( + [ + "executorch_exir_dialects_edge__ops__native_batch_norm_legit_no_training_default" + ] + ) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .serialize() + .run_method_and_compare_outputs(qtol=1) + ) def _test_dq( self, @@ -325,7 +315,6 @@ def test_qs8_conv2d_per_channel(self) -> None: self._test( Conv2d(transpose=transpose), quant_config=get_symmetric_quantization_config(is_per_channel=True), - delegated=not transpose, # XNNPACK does not support per input channel quantization for transpose convolutions with groups > 1 ) def test_fp32_conv2d_seq(self) -> None: @@ -485,7 +474,6 @@ def get_inputs(self): self._test( ConvReLU(transpose=transpose), quant_config=get_symmetric_quantization_config(is_per_channel=True), - delegated=not transpose, # XNNPACK does not support per input channel quantization for transpose convolutions with groups > 1 ) def test_qs8_conv2d_dw_relu(self): @@ -537,8 +525,6 @@ def get_inputs(self): quant_config=get_symmetric_quantization_config( is_per_channel=per_channel_quant ), - # XNNPACK does not support per input channel quantization for transpose convolutions with groups > 1 - delegated=not (transpose and per_channel_quant), ) def test_qs8_conv2d_relu_seq(self): @@ -593,7 +579,7 @@ def get_inputs(self): conv_count=2, ) - def test_qs8_conv_transpose_2d_quantize_per_channel(self): + def test_qs8_conv_transpose_2d_quantize_per_channel_multi_axis(self): class PerChannelConvTranspose2d(torch.nn.Module): def __init__(self, input_channels, output_channels, groups, axis): super().__init__() @@ -662,76 +648,24 @@ def get_inputs(self): ) for groups in (1, 2): - for axis in (0, 1): - self._test( - PerChannelConvTranspose2d(3 * groups, 5 * groups, groups, axis), - quant_config=None, - conv_count=1, - delegated=axis == 1 - and groups - == 1, # xnnpack only support output channel axis quantization with groups == 1 - ) - - def test_qs8_conv_transpose_2d_dqd_f32_weights(self): - class TransposeConv2dDQDf32weights(torch.nn.Module): - def __init__(self, input_channels, output_channels, groups, axis): - super().__init__() - self.input_channels = input_channels - self.output_channels = output_channels - self.axis = axis - self.groups = groups - self.transpose = True - self.weights = torch.nn.Parameter( - torch.randn((input_channels, output_channels // groups, 4, 4)), - requires_grad=False, - ) - - axis_size = self.weights.shape[axis] - self.scale = torch.nn.Parameter(torch.ones(axis_size) * 0.12345) - self.zero_point = torch.nn.Parameter( - torch.zeros((axis_size,), dtype=torch.int64), requires_grad=False - ) - - def forward(self, x): - dequantize_input = ( - exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default( - x, 0.12345, 0, -127, 127, torch.int8 + for ch_axis in (1, 2): + if ch_axis == 1 and groups == 1: + self._test( + PerChannelConvTranspose2d( + 3 * groups, 5 * groups, groups, ch_axis + ), # ch_axis=0 + quant_config=None, + conv_count=1, ) - ) - x = torch.nn.functional.conv_transpose2d( - dequantize_input, self.weights, groups=self.groups - ) - - return exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default( - exir_ops.edge.quantized_decomposed.quantize_per_tensor.default( - x, - 0.12345, - 0, - -127, - 127, - torch.int8, - ), - 0.12345, - 0, - -127, - 127, - torch.int8, - ) - - def get_inputs(self): - return ( - torch.randint( - low=-127, high=127, size=(3, self.input_channels, 4, 4) - ).type(dtype=torch.int8), - ) - - for groups in (1, 2): - for axis in (0, 1): - self._test( - TransposeConv2dDQDf32weights(3 * groups, 5 * groups, groups, axis), - quant_config=None, - conv_count=1, - ) + else: + with self.assertRaises(RuntimeError): + self._test( + PerChannelConvTranspose2d( + 3 * groups, 5 * groups, groups, ch_axis + ), # ch_axis=0 + quant_config=None, + conv_count=1, + ) def test_padded_output_tconv(self): class TConv2d(torch.nn.Module): @@ -761,7 +695,7 @@ def forward(self, x): (tester.export().check_count({op: conv_count}).to_edge_transform_and_lower()) - # tconv should not be offloaded to XNNPack, since output padding is not + # tconv should not be offloaded to XNNPack, since output padding is not supported ( tester.check( ["executorch_exir_dialects_edge__ops_aten_convolution_default"] From 648122038350e5d9a077cd07e344bd6980aa2e22 Mon Sep 17 00:00:00 2001 From: Max Ren Date: Tue, 17 Jun 2025 11:16:14 -0700 Subject: [PATCH 2/2] [Quantized DeConv Support] Dynamically Quantized Deconvolutions with groups ==1 Pull Request resolved: https://github.com/pytorch/executorch/pull/11731 Here we support dynamically quantized Deconvolutions. There is some refactoring of the previous diff, but in general, we just remove the constraint in the Dynamism check that the convolution isn't transposed. For the same reasons as before, this only supports channel_axis = 1 and groups = 1. ghstack-source-id: 291033632 @exported-using-ghexport Differential Revision: [D76638904](https://our.internmc.facebook.com/intern/diff/D76638904/) --- .../xnnpack/quantizer/xnnpack_quantizer.py | 2 +- .../quantizer/xnnpack_quantizer_utils.py | 82 ++++++++++++------- backends/xnnpack/test/ops/test_conv2d.py | 81 ++++++++++-------- backends/xnnpack/utils/utils.py | 31 +++++++ 4 files changed, 133 insertions(+), 63 deletions(-) diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer.py b/backends/xnnpack/quantizer/xnnpack_quantizer.py index 130eda03f88..c07d27e4231 100644 --- a/backends/xnnpack/quantizer/xnnpack_quantizer.py +++ b/backends/xnnpack/quantizer/xnnpack_quantizer.py @@ -274,7 +274,7 @@ class XNNPACKQuantizer(Quantizer): QuantPattern("linear_relu", False, False, LINEAR_TARGETS), QuantPattern("linear", True, False, LINEAR_TARGETS), QuantPattern("conv", True, False, CONV_TARGETS), - QuantPattern("conv_transpose", False, False, CONV_TARGETS), + QuantPattern("conv_transpose", True, False, CONV_TARGETS), QuantPattern("conv_relu", False, False, CONV_TARGETS), QuantPattern("conv_transpose_relu", False, False, CONV_TARGETS), QuantPattern("adaptive_avg_pool2d", False, False, ADAPTIVE_AVG_POOL2D_TARGETS), diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py index 2ebf69da4f5..3d687d0b513 100644 --- a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py +++ b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py @@ -4,7 +4,10 @@ import torch import torch.nn.functional as F -from executorch.backends.xnnpack.utils.utils import is_depthwise_conv +from executorch.backends.xnnpack.utils.utils import ( + get_groups_from_conv, + is_depthwise_conv, +) from torch._subclasses import FakeTensor from torch.fx import Node from torch.fx.passes.utils.matcher_with_name_node_map_utils import ( @@ -65,6 +68,28 @@ def decorator(annotator: AnnotatorType) -> None: return decorator +def change_quantization_config( + original_qspec, + dtype=None, + quant_min=None, + quant_max=None, + qscheme=None, + ch_axis=None, + is_dynamic=None, + observer_or_fake_quant_ctr=None, +): + return QuantizationSpec( + dtype=dtype or original_qspec.dtype, + quant_min=quant_min or original_qspec.quant_min, + quant_max=quant_max or original_qspec.quant_max, + qscheme=qscheme or original_qspec.qscheme, + ch_axis=ch_axis or original_qspec.ch_axis, + is_dynamic=is_dynamic or original_qspec.is_dynamic, + observer_or_fake_quant_ctr=observer_or_fake_quant_ctr + or original_qspec.observer_or_fake_quant_ctr, + ) + + def is_relu_node(node: Node) -> bool: """ Check if a given node is a relu node @@ -231,6 +256,9 @@ def _do_annotate_conv( if is_relu_node(user): continue + # Tracks conditions for whether or not to skip + skip = False + input_qspec_map = {} input_act = conv_node.args[0] assert isinstance(input_act, Node) @@ -239,35 +267,33 @@ def _do_annotate_conv( weight = conv_node.args[1] assert isinstance(weight, Node) weight_qspec = get_weight_qspec(quantization_config) + num_groups = get_groups_from_conv(conv_node) + + # skip if transposed conv has more than 1 group + skip = skip or (is_conv_transpose and num_groups != 1) + print(f"{skip} conv transpose and num_groups") + if is_conv_transpose: # transposed convs per output channel quantization - weight_qspec = QuantizationSpec( - dtype=weight_qspec.dtype, - quant_min=weight_qspec.quant_min, - quant_max=weight_qspec.quant_max, - qscheme=weight_qspec.qscheme, - ch_axis=1, - is_dynamic=False, - observer_or_fake_quant_ctr=weight_qspec.observer_or_fake_quant_ctr, - ) - input_qspec_map[weight] = weight_qspec + weight_qspec = change_quantization_config(weight_qspec, ch_axis=1) - # Only annotate dynamically quantized conv if it's 2D and not depthwise - if ( + input_qspec_map[weight] = weight_qspec + is_dynamic = ( quantization_config and quantization_config.input_activation and quantization_config.input_activation.is_dynamic - ): + ) + + # Only annotate dynamically quantized conv if it's 2D and not depthwise + if is_dynamic: weight_val = weight.meta.get("val", None) weight_shape = getattr(weight_val, "shape", None) - # Skip if not a 4D weight tensor (i.e. not conv2d) - if weight_shape is not None and len(weight_shape) != 4: - continue - + skip = skip or (weight_shape is not None and len(weight_shape) != 4) # Skip if depthwise (default to groups=1 since it's not an arg) - if is_depthwise_conv(weight_shape, 1, is_conv_transpose): - continue + skip = skip or ( + not is_conv_transpose and is_depthwise_conv(weight_shape, 1, False) + ) # adding weight node to the partition as well partition = [conv_node, conv_node.args[1]] @@ -277,7 +303,7 @@ def _do_annotate_conv( input_qspec_map[bias] = get_bias_qspec(quantization_config) partition.append(bias) - if _is_annotated(partition): + if _is_annotated(partition) or skip: continue if filter_fn and any(not filter_fn(n) for n in partition): @@ -324,17 +350,10 @@ def _do_annotate_conv_relu( weight = conv_node.args[1] assert isinstance(weight, Node) weight_qspec = get_weight_qspec(quantization_config) + groups = get_groups_from_conv(conv_node) if is_conv_transpose: # transposed convs per output channel quantization - weight_qspec = QuantizationSpec( - dtype=weight_qspec.dtype, - quant_min=weight_qspec.quant_min, - quant_max=weight_qspec.quant_max, - qscheme=weight_qspec.qscheme, - ch_axis=1, - is_dynamic=False, - observer_or_fake_quant_ctr=weight_qspec.observer_or_fake_quant_ctr, - ) + weight_qspec = change_quantization_config(weight_qspec, ch_axis=1) input_qspec_map[weight] = weight_qspec # adding weight node to the partition as well @@ -347,6 +366,9 @@ def _do_annotate_conv_relu( if _is_annotated(partition): continue + if is_conv_transpose and groups != 1: + continue + if filter_fn and any(not filter_fn(n) for n in partition): continue diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py index d838ef0ffe9..2a0a82d99b6 100644 --- a/backends/xnnpack/test/ops/test_conv2d.py +++ b/backends/xnnpack/test/ops/test_conv2d.py @@ -174,14 +174,11 @@ def get_inputs(self): class Conv2dDQSeq(torch.nn.Module): - def __init__(self): + def __init__(self, transpose=False): super().__init__() - self.first = torch.nn.Conv2d( - in_channels=3, out_channels=8, kernel_size=3, padding=1 - ) - self.second = torch.nn.Conv2d( - in_channels=8, out_channels=10, kernel_size=3, padding=1 - ) + op = torch.nn.ConvTranspose2d if transpose else torch.nn.Conv2d + self.first = op(in_channels=3, out_channels=8, kernel_size=3, padding=1) + self.second = op(in_channels=8, out_channels=10, kernel_size=3, padding=1) def forward(self, x): y = self.first(x) @@ -192,14 +189,11 @@ def get_inputs(self): class Conv2dDQParallel(torch.nn.Module): - def __init__(self): + def __init__(self, transpose=False): super().__init__() - self.first = torch.nn.Conv2d( - in_channels=3, out_channels=8, kernel_size=3, padding=1 - ) - self.second = torch.nn.Conv2d( - in_channels=3, out_channels=8, kernel_size=3, padding=1 - ) + op = torch.nn.ConvTranspose2d if transpose else torch.nn.Conv2d + self.first = op(in_channels=3, out_channels=8, kernel_size=3, padding=1) + self.second = op(in_channels=3, out_channels=10, kernel_size=3, padding=1) def forward(self, x): first = self.first(x) @@ -266,8 +260,7 @@ def _test_dq( ) DynamicallyQuantizedPartitioner = XnnpackPartitioner( - config_precisions=ConfigPrecisionType.DYNAMIC_QUANT, - per_op_mode=True, + config_precisions=ConfigPrecisionType.DYNAMIC_QUANT, per_op_mode=True ) tester = Tester(m, m.get_inputs(), dynamic_shapes=dynamic_shapes) @@ -349,11 +342,10 @@ def test_fp32_conv2d_depthwise(self): ) def test_qs8_conv2d_depthwise(self): - for transpose in (True, False): - self._test( - Conv2d(groups=2, in_channels=2, out_channels=6, transpose=transpose), - quant_config=get_symmetric_quantization_config(), - ) + self._test( + Conv2d(groups=2, in_channels=2, out_channels=6), + quant_config=get_symmetric_quantization_config(), + ) def test_fp32_conv2d_bn(self): class Conv2dBatchNorm(torch.nn.Module): @@ -515,17 +507,14 @@ def forward(self, x): def get_inputs(self): return (torch.randn(batches, in_channels, height, width) * 11,) - for transpose in (True, False): - for per_channel_quant in (False, True): - if transpose and per_channel_quant: - continue - model = ModelConvReLU(transpose=transpose) - self._test( - model, - quant_config=get_symmetric_quantization_config( - is_per_channel=per_channel_quant - ), - ) + for per_channel_quant in (False, True): + model = ModelConvReLU() + self._test( + model, + quant_config=get_symmetric_quantization_config( + is_per_channel=per_channel_quant + ), + ) def test_qs8_conv2d_relu_seq(self): class ConvReLUSeq(torch.nn.Module): @@ -728,3 +717,31 @@ def test_dq_conv2d_parallel(self) -> None: model = Conv2dDQParallel() conv_count = sum(1 for m in model.modules() if type(m) is torch.nn.Conv2d) self._test_dq(model, conv_count) + + def test_dq_conv2d_transpose(self) -> None: + model = Conv2d( + in_channels=3, + out_channels=10, + kernel_size=(3, 3), + stride=(1, 1), + padding=(0, 0), + batches=1, + width=8, + height=8, + transpose=True, + ) + self._test_dq(model) + + def test_dq_conv2d_transpose_seq(self) -> None: + model = Conv2dDQSeq(transpose=True) + conv_count = sum( + 1 for m in model.modules() if type(m) is torch.nn.ConvTranspose2d + ) + self._test_dq(model, conv_count) + + def test_dq_conv2d_transpose_parallel(self) -> None: + model = Conv2dDQParallel(transpose=True) + conv_count = sum( + 1 for m in model.modules() if type(m) is torch.nn.ConvTranspose2d + ) + self._test_dq(model, conv_count) diff --git a/backends/xnnpack/utils/utils.py b/backends/xnnpack/utils/utils.py index b23fd444117..a8f3178f98f 100644 --- a/backends/xnnpack/utils/utils.py +++ b/backends/xnnpack/utils/utils.py @@ -25,6 +25,7 @@ is_lifted_tensor_constant, is_param, ) +from torchao.quantization.pt2e.utils import _is_conv_node, _is_conv_transpose_node ### XNNPACK Capture ### @@ -160,6 +161,36 @@ def get_source_fn(node: torch.fx.Node) -> Optional[torch.fx.Node]: return source_fn[1] +def get_groups_from_conv(conv_node: torch.fx.Node) -> int: + if _is_conv_node(conv_node): + in_node = cast(torch.fx.Node, conv_node.args[0]) + weight_node = cast(torch.fx.Node, conv_node.args[1]) + # groups isn't given to us in the training graph so we deduce it from the weight shape + # and the input shape + + # input shape is (N, C_in, H_in, W_in) + in_channels = in_node.meta["val"].shape[1] + + # weight shape is (C_out, C_in/groups, kernel_size[0], kernel_size[1]) + in_groups = weight_node.meta["val"].shape[1] + + return in_channels // in_groups + elif _is_conv_transpose_node(conv_node): + weight_node = cast(torch.fx.Node, conv_node.args[1]) + # groups isn't given to us in the training graph so we deduce it from the weight shape + # and the output shape + + # weight shape is (C_in, C_out/groups, kernel_size[0], kernel_size[1]) + out_groups = weight_node.meta["val"].shape[1] + + # output shape is (N, C_out, H_out, W_out) + out_channels = conv_node.meta["val"].shape[1] + + return out_channels // out_groups + + raise RuntimeError(f"expected {conv_node} to be a conv or conv_transpose node") + + def is_depthwise_conv( kernel_shape: Tuple[int, ...], groups: int = 1, is_transpose: bool = False ) -> bool: