Skip to content

Commit 154ffff

Browse files
committed
NXP backend: Add support for softmax with the new Neutron flow.
1 parent 4f3ac17 commit 154ffff

6 files changed

Lines changed: 131 additions & 33 deletions

File tree

backends/nxp/backend/ir/converter/node_converters/ops_converters/softmax_converter.py

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# LICENSE file in the root directory of this source tree.
55

66
import numpy as np
7+
import torch
78

89
from executorch.backends.nxp.backend.custom_delegation_options import (
910
CustomDelegationOptions,
@@ -58,14 +59,43 @@ def _is_supported_on_target(
5859
parameters_mapping: dict[str, Parameter],
5960
custom_delegation_options: CustomDelegationOptions,
6061
) -> bool:
61-
"""Check if the softmax operation can be executed on Neutron hardware.
62-
63-
Hardware constraints:
62+
if custom_delegation_options.use_new_flow_neutron_c:
63+
"""New flow: Hardware constraints for the new flow:
64+
1. Input and Output must be INT8/UINT8
65+
2. Channels < 4096 / num_pipes * 4
66+
3. Total spatial size (N*H*W) <= 4096
67+
4. (channels * spatial_size) / num_macs <= 65536
68+
"""
69+
# Constraint 1: Input and Output must be INT8/UINT8.
70+
supported_types = [torch.int8, torch.uint8]
71+
if not NodeConverter.uses_quantization_type_for_io(
72+
node, supported_types, [0], [0]
73+
):
74+
return False
75+
76+
# Constraint 2: Channel size limit
77+
num_pipes = neutron_target_spec.get_num_pipes()
78+
channels = SoftmaxConverter._get_channels(node)
79+
if channels >= 4096 / num_pipes * 4:
80+
return False
81+
82+
# Constraint 3: Spatial size limit
83+
total_spatial_size = SoftmaxConverter._get_total_spatial_size(node)
84+
if total_spatial_size > 4096:
85+
return False
86+
87+
# Constraint 4: Total processing size limit
88+
num_macs = neutron_target_spec.get_num_macs()
89+
if channels * total_spatial_size / num_macs > 65536:
90+
return False
91+
92+
return True
93+
94+
"""Old flow. Hardware constraints for the old flow:
6495
1. Input rank must be >= 2 (Neutron does not support 1D)
65-
2. Channels must be a multiple of num_macs
66-
3. Channels < 4096 / num_pipes * 4
67-
4. Total spatial size (N*H*W) <= 4096
68-
5. (channels * spatial_size) / num_macs <= 65536
96+
2. Channels < 4096 / num_pipes * 4
97+
3. Total spatial size (N*H*W) <= 4096
98+
4. (channels * spatial_size) / num_macs <= 65536
6999
"""
70100
input_shape = node.meta["val"].shape
71101

@@ -78,19 +108,15 @@ def _is_supported_on_target(
78108
channels = SoftmaxConverter._get_channels(node)
79109
total_spatial_size = SoftmaxConverter._get_total_spatial_size(node)
80110

81-
# Constraint 2: Channels must be a multiple of num_macs
82-
if channels % num_macs != 0:
83-
return False
84-
85-
# Constraint 3: Channel size limit
111+
# Constraint 2: Channel size limit
86112
if channels >= 4096 / num_pipes * 4:
87113
return False
88114

89-
# Constraint 4: Spatial size limit
115+
# Constraint 3: Spatial size limit
90116
if total_spatial_size > 4096:
91117
return False
92118

93-
# Constraint 5: Total processing size limit
119+
# Constraint 4: Total processing size limit
94120
if channels * total_spatial_size / num_macs > 65536:
95121
return False
96122

backends/nxp/tests/generic_tests/test_cifarnet.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,7 @@
1111
from executorch.backends.nxp.tests.config_importer import test_config
1212
from executorch.backends.nxp.tests.dataset_creator import CopyDatasetCreator
1313
from executorch.backends.nxp.tests.executorch_pipeline import ModelInputSpec
14-
from executorch.backends.nxp.tests.graph_verifier import (
15-
BaseGraphVerifier,
16-
NonDelegatedNode,
17-
)
14+
from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier
1815
from executorch.backends.nxp.tests.model_output_comparator import (
1916
NumericalStatsOutputComparator,
2017
)
@@ -56,17 +53,15 @@ def test_cifarnet(mocker, cifar_test_files, channels_last):
5653
model.to(memory_format=torch.channels_last)
5754
input_spec.dim_order = torch.channels_last
5855

59-
non_dlg_nodes = [NonDelegatedNode("aten__softmax_default", 1)]
60-
6156
comparator = NumericalStatsOutputComparator(
62-
max_mse_error=1.0e-3, is_classification_task=True
57+
max_mse_error=2.0e-2, is_classification_task=True
6358
)
6459
lower_run_compare(
6560
model,
6661
[input_spec],
6762
dataset_creator=CopyDatasetCreator(cifar_test_files),
6863
output_comparator=comparator,
69-
dlg_model_verifier=BaseGraphVerifier(1, non_dlg_nodes),
64+
dlg_model_verifier=BaseGraphVerifier(1, []),
7065
mocker=mocker,
7166
# Run the channels last reference in PyTorch as the ExecuTorch CPU model contains incorrectly
7267
# lowered channels last convolution weights, which cause incorrect inference results. The issue
@@ -83,7 +78,6 @@ def test_cifarnet_qat(mocker, cifar_test_files):
8378
model = CifarNet().get_eager_model().eval()
8479

8580
input_shape = (1, 3, 32, 32)
86-
non_dlg_nodes = [NonDelegatedNode("aten__softmax_default", 1)]
8781

8882
# The higher MSE threshold is due to using weaker "MovingAbs" observers instead of "MinMax" observers.
8983
# The "MovingAbs" observers capture only limited number of past calibration samples compared to "MinMax",
@@ -96,7 +90,7 @@ def test_cifarnet_qat(mocker, cifar_test_files):
9690
input_shape,
9791
dataset_creator=CopyDatasetCreator(cifar_test_files),
9892
output_comparator=comparator,
99-
dlg_model_verifier=BaseGraphVerifier(1, non_dlg_nodes),
93+
dlg_model_verifier=BaseGraphVerifier(1, []),
10094
mocker=mocker,
10195
use_qat=True,
10296
)

backends/nxp/tests/generic_tests/test_integration.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ def test_conv_fc_softmax__to_executorch_program(use_qat):
2828

2929
delegation_info = get_delegation_info(program.graph_module)
3030
assert delegation_info.num_delegated_subgraphs == 1
31-
assert delegation_info.num_non_delegated_nodes == 11
32-
assert delegation_info.num_delegated_nodes == 13
31+
assert delegation_info.num_non_delegated_nodes == 5
32+
assert delegation_info.num_delegated_nodes == 16
3333

3434
for node in program.graph.nodes:
3535
# Make sure Convolution and AddMM are delegated
@@ -46,8 +46,8 @@ def test_cifarnet(use_qat):
4646

4747
delegation_info = get_delegation_info(exec_prog.exported_program().graph_module)
4848
assert delegation_info.num_delegated_subgraphs == 1
49-
assert delegation_info.num_non_delegated_nodes == 11
50-
assert delegation_info.num_delegated_nodes == 45
49+
assert delegation_info.num_non_delegated_nodes == 5
50+
assert delegation_info.num_delegated_nodes == 48
5151

5252
nodes = list(exec_prog.exported_program().graph.nodes)
5353
assert nodes[2].name == "quantized_decomposed_quantize_per_tensor_default"

backends/nxp/tests/generic_tests/test_neutron_backend_executor.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def test_conv_fc__lowered_program_and_tflite_output_match(mocker):
8585
# No Transpose ops in produced TFLite model
8686
tflite_subgraph = Model.GetRootAs(tflite_flatbuffers_model).Subgraphs(0)
8787

88-
assert tflite_subgraph.OperatorsLength() == 3
88+
assert tflite_subgraph.OperatorsLength() == 4
8989
assert (
9090
tflite_subgraph.Operators(0).BuiltinOptionsType()
9191
== BuiltinOptions.Conv2DOptions
@@ -98,6 +98,10 @@ def test_conv_fc__lowered_program_and_tflite_output_match(mocker):
9898
tflite_subgraph.Operators(2).BuiltinOptionsType()
9999
== BuiltinOptions.FullyConnectedOptions
100100
)
101+
assert (
102+
tflite_subgraph.Operators(3).BuiltinOptionsType()
103+
== BuiltinOptions.SoftmaxOptions
104+
)
101105

102106
# Verify outputs of program and TFLite model
103107
input_data = (

backends/nxp/tests/ir/converter/node_converter/test_softmax_converter.py

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,19 @@
1717
ToChannelFirstPreprocess,
1818
ToChannelLastPreprocess,
1919
)
20+
21+
from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier
22+
23+
from executorch.backends.nxp.tests.model_output_comparator import (
24+
NumericalStatsOutputComparator,
25+
)
2026
from executorch.backends.nxp.tests.models import SoftmaxModule
21-
from executorch.exir.dialects._ops import ops as exir_ops
27+
from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
28+
from executorch.backends.nxp.tests.ops_aliases import Softmax
2229

2330
# noinspection PyProtectedMember
31+
2432
ExecutorchDelegateCall = torch._higher_order_ops.executorch_call_delegate
25-
Softmax = exir_ops.edge.aten._softmax.default
2633

2734

2835
@pytest.fixture(autouse=True)
@@ -207,3 +214,70 @@ def test_softmax_delegation__1d():
207214
model = SoftmaxModule(dim)
208215
delegated_ep = to_quantized_edge_program(model, input_shape).exported_program()
209216
assert_softmax_not_delegated(delegated_ep.graph)
217+
218+
219+
class TestSoftmaxNewNeutronFlow:
220+
@pytest.mark.parametrize(
221+
"input_shape, dim",
222+
[
223+
# Dim must always be the last dimension.
224+
pytest.param((10,), -1, id="1D_dim_-1"),
225+
pytest.param((5, 21), -1, id="2D_dim_-1"),
226+
pytest.param((2, 3, 13), -1, id="3D_dim_-1"),
227+
pytest.param((1, 3, 3, 200), -1, id="4D_dim_-1"),
228+
pytest.param((5, 4, 3, 2, 180), -1, id="5D_dim_-1"),
229+
],
230+
)
231+
def test__basic_nsys_inference(self, input_shape, dim):
232+
model = SoftmaxModule(dim)
233+
graph_verifier = BaseGraphVerifier(
234+
exp_num_delegate_call_nodes=1, # Delegated Softmax.
235+
exp_non_delegated_nodes=[],
236+
)
237+
output_comparator = NumericalStatsOutputComparator(
238+
max_mse_error=0.001, is_classification_task=True
239+
)
240+
lower_run_compare(
241+
model,
242+
input_shape,
243+
graph_verifier,
244+
use_new_flow_neutron_c=True,
245+
output_comparator=output_comparator,
246+
)
247+
248+
@pytest.mark.parametrize(
249+
"input_shape, dim",
250+
[
251+
pytest.param((4096, 8), -1, id="2D_spatial_size_limit"),
252+
pytest.param((2040,), -1, id="1D_channels_limit"),
253+
pytest.param((4096, 128), -1, id="2D_total_size_limit"),
254+
pytest.param((1, 64, 64, 8), -1, id="4D_spatial_size_limit"),
255+
],
256+
)
257+
def test__limits(self, input_shape, dim, mocker):
258+
model = SoftmaxModule(dim)
259+
delegated_ep = to_quantized_edge_program(
260+
model, input_shape, use_new_flow_neutron_c=True
261+
).exported_program()
262+
263+
# Make sure the `softmax` was delegated.
264+
assert_softmax_delegated(delegated_ep.graph)
265+
266+
@pytest.mark.parametrize(
267+
"input_shape, dim",
268+
[
269+
pytest.param((4097, 8), -1, id="2D_spatial_size_exceeded"),
270+
pytest.param((2048,), -1, id="1D_channels_exceeded"),
271+
pytest.param((4096, 129), -1, id="2D_total_size_exceeded"),
272+
pytest.param((1, 64, 65, 8), -1, id="4D_spatial_size_exceeded"),
273+
],
274+
)
275+
def test__limits_exceeded(self, input_shape, dim):
276+
model = SoftmaxModule(dim)
277+
delegated_ep = to_quantized_edge_program(
278+
model, input_shape, use_new_flow_neutron_c=True
279+
).exported_program()
280+
281+
# Make sure the `softmax` was NOT delegated.
282+
283+
assert_softmax_not_delegated(delegated_ep.graph)

backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,12 @@ def test_remove_io_quant_ops_pass__cifarnet():
6262
)
6363

6464
nodes = list(exec_prog.exported_program().graph.nodes)
65-
assert len(nodes) == 11
65+
assert len(nodes) == 5
6666
assert (
6767
nodes[0].meta["val"].dtype == torch.int8
6868
), "Input tensor doesn't have type INT8."
6969
assert (
70-
nodes[10].meta["val"][0].dtype == torch.int8
70+
nodes[4].meta["val"][0].dtype == torch.int8
7171
), "Output tensor doesn't have type INT8."
7272

7373
assert (

0 commit comments

Comments
 (0)