Arm backend: Add FP8 support for gather/scatter-based composite ops

YufengShi-dudu · YufengShi-dudu · commit 366de24539db · 2026-06-16T11:30:09.000+01:00
Support FP8 tensors for the following composite ops:
- TOSA GATHER: embedding, index_select, index.Tensor, unfold_copy
- TOSA SCATTER: index_put, index_copy, slice_scatter

Run all FP8 tests through the TOSA reference model. For ops without
eager CPU FP8 support, only execute the TOSA reference model; otherwise
keep the default output comparison against eager.

Change-Id: I3d81cd6dd426f16b5f2db8937228cad12184b6a6
Signed-off-by: Yufeng Shi &lt;yufeng.shi@arm.com&gt;
diff --git a/backends/arm/operator_support/index_select_support.py b/backends/arm/operator_support/index_select_support.py
@@ -77,8 +77,14 @@ def is_node_tosa_supported(
                     f"{node.target}: dtype {values_dtype} requires INT profile.",
                 )
                 return False
-        # fp16/fp32/bf16: either FP profile, or INT profile (via quantization)
-        elif values_dtype in (torch.float16, torch.float32, torch.bfloat16):
+        # fp16/fp32/bf16/fp8: either FP profile, or INT profile (via quantization)
+        elif values_dtype in (
+            torch.float16,
+            torch.float32,
+            torch.bfloat16,
+            torch.float8_e4m3fn,
+            torch.float8_e5m2,
+        ):
             if values_dtype == torch.bfloat16 and not tosa_spec.support_extension(
                 "bf16"
             ):
@@ -87,6 +93,22 @@ def is_node_tosa_supported(
                     f"{node.target}: dtype {values_dtype} requires bf16 extension.",
                 )
                 return False
+            if values_dtype == torch.float8_e4m3fn and not tosa_spec.support_extension(
+                "fp8e4m3"
+            ):
+                self.reporter.report_reject(
+                    node,
+                    f"{node.target}: dtype {values_dtype} requires fp8e4m3 extension.",
+                )
+                return False
+            if values_dtype == torch.float8_e5m2 and not tosa_spec.support_extension(
+                "fp8e5m2"
+            ):
+                self.reporter.report_reject(
+                    node,
+                    f"{node.target}: dtype {values_dtype} requires fp8e5m2 extension.",
+                )
+                return False
             if not (tosa_spec.support_float() or tosa_spec.support_integer()):
                 self.reporter.report_reject(
                     node,
@@ -98,7 +120,8 @@ def is_node_tosa_supported(
             self.reporter.report_reject(
                 node,
                 f"{node.target}: unsupported values dtype {values_dtype}; "
-                "expected bool/int8/int16/int32/float16/bfloat16/float32.",
+                "expected bool/int8/int16/int32/float16/bfloat16/float32/"
+                "float8_e4m3fn/float8_e5m2.",
             )
             return False
 
diff --git a/backends/arm/operator_support/index_tensor_support.py b/backends/arm/operator_support/index_tensor_support.py
@@ -144,7 +144,13 @@ def is_node_tosa_supported(
                     f"{node.target}: dtype {values_dtype} requires INT profile.",
                 )
                 return False
-        elif values_dtype in (torch.float16, torch.float32, torch.bfloat16):
+        elif values_dtype in (
+            torch.float16,
+            torch.float32,
+            torch.bfloat16,
+            torch.float8_e4m3fn,
+            torch.float8_e5m2,
+        ):
             if values_dtype == torch.bfloat16 and not tosa_spec.support_extension(
                 "bf16"
             ):
@@ -153,6 +159,22 @@ def is_node_tosa_supported(
                     f"{node.target}: dtype {values_dtype} requires bf16 extension.",
                 )
                 return False
+            if values_dtype == torch.float8_e4m3fn and not tosa_spec.support_extension(
+                "fp8e4m3"
+            ):
+                self.reporter.report_reject(
+                    node,
+                    f"{node.target}: dtype {values_dtype} requires fp8e4m3 extension.",
+                )
+                return False
+            if values_dtype == torch.float8_e5m2 and not tosa_spec.support_extension(
+                "fp8e5m2"
+            ):
+                self.reporter.report_reject(
+                    node,
+                    f"{node.target}: dtype {values_dtype} requires fp8e5m2 extension.",
+                )
+                return False
             if not (tosa_spec.support_float() or tosa_spec.support_integer()):
                 self.reporter.report_reject(
                     node,
@@ -164,7 +186,7 @@ def is_node_tosa_supported(
             self.reporter.report_reject(
                 node,
                 f"{node.target}: unsupported values dtype {values_dtype}; "
-                "expected bool/int8/int16/int32/float16/bfloat16/float32.",
+                "expected bool/int8/int16/int32/float16/bfloat16/float32/float8_e4m3fn/float8_e5m2.",
             )
             return False
 
diff --git a/backends/arm/operator_support/unfold_copy_support.py b/backends/arm/operator_support/unfold_copy_support.py
@@ -84,8 +84,14 @@ def is_node_tosa_supported(
                     f"{node.target}: dtype {values_dtype} requires INT profile.",
                 )
                 return False
-        # fp16/fp32/bf16: either FP profile, or INT profile (via quantization)
-        elif values_dtype in (torch.float16, torch.float32, torch.bfloat16):
+        # fp16/fp32/bf16/fp8: either FP profile, or INT profile (via quantization)
+        elif values_dtype in (
+            torch.float16,
+            torch.float32,
+            torch.bfloat16,
+            torch.float8_e4m3fn,
+            torch.float8_e5m2,
+        ):
             if values_dtype == torch.bfloat16 and not tosa_spec.support_extension(
                 "bf16"
             ):
@@ -94,6 +100,22 @@ def is_node_tosa_supported(
                     f"{node.target}: dtype {values_dtype} requires bf16 extension.",
                 )
                 return False
+            if values_dtype == torch.float8_e4m3fn and not tosa_spec.support_extension(
+                "fp8e4m3"
+            ):
+                self.reporter.report_reject(
+                    node,
+                    f"{node.target}: dtype {values_dtype} requires fp8e4m3 extension.",
+                )
+                return False
+            if values_dtype == torch.float8_e5m2 and not tosa_spec.support_extension(
+                "fp8e5m2"
+            ):
+                self.reporter.report_reject(
+                    node,
+                    f"{node.target}: dtype {values_dtype} requires fp8e5m2 extension.",
+                )
+                return False
             if not (tosa_spec.support_float() or tosa_spec.support_integer()):
                 self.reporter.report_reject(
                     node,
@@ -105,7 +127,8 @@ def is_node_tosa_supported(
             self.reporter.report_reject(
                 node,
                 f"{node.target}: unsupported values dtype {values_dtype}; "
-                "expected bool/int8/int16/int32/float16/bfloat16/float32.",
+                "expected bool/int8/int16/int32/float16/bfloat16/float32/"
+                "float8_e4m3fn/float8_e5m2.",
             )
             return False
 
diff --git a/backends/arm/test/ops/test_embedding.py b/backends/arm/test/ops/test_embedding.py
@@ -1,4 +1,4 @@
-# Copyright 2025 Arm Limited and/or its affiliates.
+# Copyright 2025-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -63,6 +63,22 @@ def forward(self, weights: torch.Tensor, indices: torch.Tensor):
         torch.randint(low=0, high=10, size=(4, 3, 2, 5), dtype=torch.int64),
     ),
 }
+test_input_fp8: dict[str, tuple[input_params, str]] = {
+    "test_fp8e4m3_int32_indices": (
+        (
+            torch.randn(10, 3, dtype=torch.float32).to(torch.float8_e4m3fn),
+            torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.int32),
+        ),
+        "fp8e4m3",
+    ),
+    "test_fp8e5m2_int64_indices": (
+        (
+            torch.randn(11, 5, dtype=torch.float32).to(torch.float8_e5m2),
+            torch.randint(low=0, high=10, size=(4, 3), dtype=torch.int64),
+        ),
+        "fp8e5m2",
+    ),
+}
 
 
 @pytest.mark.skip(reason="MLETORCH-1274 Improve data type checks during partitioning")
@@ -74,7 +90,6 @@ def test_embedding_tosa_FP(test_input: input_params):
         test_input,
         op.aten_op,
         op.exir_op,
-        use_to_edge_transform_and_lower=True,
         transform_passes=[InsertInt32CastsAfterInt64PlaceholdersPass()],
     )
     pipeline.run()
@@ -88,22 +103,36 @@ def test_embedding_tosa_INT(test_input: input_params):
         test_input,
         op.aten_op,
         op.exir_op,
-        use_to_edge_transform_and_lower=True,
     )
     pipeline.pop_stage("check.aten")
     pipeline.pop_stage("check_count.exir")
 
     pipeline.run()
 
 
+@common.parametrize("test_input", test_input_fp8)
+def test_embedding_tosa_FP_fp8(test_input):
+    inputs, tosa_extension = test_input
+    op = Embedding()
+    pipeline = TosaPipelineFP[input_params](
+        op,
+        inputs,
+        op.aten_op,
+        op.exir_op,
+        transform_passes=[InsertInt32CastsAfterInt64PlaceholdersPass()],
+        compare_tosa_ref_model_outputs=False,
+        tosa_extensions=[tosa_extension],
+    )
+    pipeline.run()
+
+
 def test_embedding_tosa_INT_expand():
     op = ExpandEmbedding()
     pipeline = TosaPipelineINT(
         op,
         ExpandEmbedding.example_inputs,
         ExpandEmbedding.aten_op,
         ExpandEmbedding.exir_op,
-        use_to_edge_transform_and_lower=True,
     )
     pipeline.pop_stage("check.aten")
     pipeline.pop_stage("check_count.exir")
@@ -121,7 +150,6 @@ def test_embedding_vgf_no_quant(test_input: input_params):
         test_input,
         op.aten_op,
         op.exir_op,
-        use_to_edge_transform_and_lower=True,
         transform_passes=[InsertInt32CastsAfterInt64PlaceholdersPass()],
         quantize=False,
     )
@@ -137,7 +165,6 @@ def test_embedding_vgf_quant(test_input: input_params):
         test_input,
         op.aten_op,
         op.exir_op,
-        use_to_edge_transform_and_lower=True,
         quantize=True,
     )
     pipeline.pop_stage("check.aten")
diff --git a/backends/arm/test/ops/test_index_copy.py b/backends/arm/test/ops/test_index_copy.py
@@ -74,6 +74,32 @@ class IndexCopyModule(torch.nn.Module):
             ("in_place", True),
         )
     }
+    test_data_fp8 = {
+        "rand_single_index_fp8e4m3_out_of_place": (
+            lambda: (
+                (
+                    0,
+                    torch.rand((4, 5), dtype=torch.float32).to(torch.float8_e4m3fn),
+                    torch.LongTensor([0]),
+                    torch.zeros((1, 5), dtype=torch.float32).to(torch.float8_e4m3fn),
+                ),
+                False,
+                "fp8e4m3",
+            )
+        ),
+        "rand_3d_dim_1_fp8e5m2_in_place": (
+            lambda: (
+                (
+                    1,
+                    torch.rand((4, 2, 3), dtype=torch.float32).to(torch.float8_e5m2),
+                    torch.LongTensor([0, 1]),
+                    torch.ones((4, 2, 3), dtype=torch.float32).to(torch.float8_e5m2),
+                ),
+                True,
+                "fp8e5m2",
+            )
+        ),
+    }
 
     aten_ops = {
         False: ["torch.ops.aten.index_put.default"],
@@ -112,6 +138,21 @@ def test_index_copy_tosa_FP(test_data):
     pipeline.run()
 
 
+@common.parametrize("test_data", IndexCopyModule.test_data_fp8)
+def test_index_copy_tosa_FP_fp8(test_data):
+    inputs, inplace, tosa_extension = test_data()
+    module = IndexCopyModule(inplace=inplace)
+    pipeline = TosaPipelineFP(
+        module=module,
+        test_data=inputs,
+        aten_op=[],
+        compare_tosa_ref_model_outputs=False,
+        transform_passes=[InsertInt32CastsAfterInt64PlaceholdersPass()],
+        tosa_extensions=[tosa_extension],
+    )
+    pipeline.run()
+
+
 @common.parametrize("test_data", IndexCopyModule.test_data)
 def test_index_copy_tosa_INT(test_data):
     inputs, inplace = test_data()
diff --git a/backends/arm/test/ops/test_index_put.py b/backends/arm/test/ops/test_index_put.py
@@ -333,6 +333,29 @@
         0,
     ),
 }
+test_data_suite_fp8 = {
+    "rank2_fp8e4m3": (
+        lambda: (
+            torch.rand((4, 5), dtype=torch.float32).to(torch.float8_e4m3fn),
+            (torch.tensor([0, 2], dtype=torch.int32),),
+            torch.rand((2, 5), dtype=torch.float32).to(torch.float8_e4m3fn),
+            False,
+        ),
+        "fp8e4m3",
+    ),
+    "rank3_fp8e5m2": (
+        lambda: (
+            torch.rand((3, 4, 2), dtype=torch.float32).to(torch.float8_e5m2),
+            (
+                torch.tensor([0, 2], dtype=torch.int32),
+                torch.tensor([1, 3], dtype=torch.int32),
+            ),
+            torch.rand((2, 2), dtype=torch.float32).to(torch.float8_e5m2),
+            False,
+        ),
+        "fp8e5m2",
+    ),
+}
 
 
 class IndexPut(torch.nn.Module):
@@ -375,6 +398,19 @@ def test_index_put_tosa_FP(test_module: input_t):
     pipeline.run()
 
 
+@common.parametrize("test_module", test_data_suite_fp8)
+def test_index_put_tosa_FP_fp8(test_module):
+    test_data, tosa_extension = test_module
+    pipeline = TosaPipelineFP(
+        IndexPut(),
+        test_data(),
+        aten_op=IndexPut.aten_op,
+        exir_op=IndexPut.exir_op,
+        tosa_extensions=[tosa_extension],
+    )
+    pipeline.run()
+
+
 @common.parametrize("test_module", test_data_suite_fp | test_data_int, xfails=xfails)
 def test_index_put_tosa_INT(test_module: input_t):
     pipeline = TosaPipelineINT[input_t](
diff --git a/backends/arm/test/ops/test_index_select.py b/backends/arm/test/ops/test_index_select.py
@@ -81,6 +81,26 @@ def forward(self, input_: torch.Tensor, dim: int, index_: torch.Tensor):
         torch.tensor([1, 0], dtype=torch.int32),  # [W=2]
     ),
 }
+test_data_fp8: dict[str, input_params] = {
+    # Rank-3: [N, K, C] -> index_select dim=1 => [N, W, C]
+    "test_fp8e4m3_rank3_dim1": (
+        torch.randn(2, 4, 3, dtype=torch.float32).to(
+            torch.float8_e4m3fn
+        ),  # [N=2, K=4, C=3]
+        1,
+        torch.tensor([1, 3], dtype=torch.int32),  # [W=2]
+        "fp8e4m3",
+    ),
+    # Rank-4: [A, B, K, C] -> index_select dim=2 => [A, B, W, C]
+    "test_fp8e5m2_rank4_dim2": (
+        torch.randn(2, 3, 4, 5, dtype=torch.float32).to(
+            torch.float8_e5m2
+        ),  # [A=2, B=3, K=4, C=5]
+        2,
+        torch.tensor([3, 1], dtype=torch.int32),  # [W=2]
+        "fp8e5m2",
+    ),
+}
 
 # ---- INT profile: integer inputs + bool ----
 test_data_int: dict[str, input_params] = {
@@ -136,6 +156,20 @@ def test_index_select_tosa_FP_bf16(test_data: input_params):
     pipeline.run()
 
 
+@common.parametrize("test_data", test_data_fp8)
+def test_index_select_tosa_FP_fp8(test_data):
+    input_, dim, index_, tosa_extension = test_data
+    pipeline = TosaPipelineFP[input_params](
+        IndexSelect(),
+        (input_, dim, index_),
+        aten_op=IndexSelect.aten_op,
+        exir_op=IndexSelect.exir_op,
+        compare_tosa_ref_model_outputs=False,
+        tosa_extensions=[tosa_extension],
+    )
+    pipeline.run()
+
+
 @common.parametrize("test_data", test_data_int | test_data_fp)
 def test_index_select_tosa_INT(test_data: input_params):
     # INT profile runs quantized, so we test both int inputs and float inputs here.
diff --git a/backends/arm/test/ops/test_index_tensor.py b/backends/arm/test/ops/test_index_tensor.py
diff --git a/backends/arm/test/ops/test_slice_scatter.py b/backends/arm/test/ops/test_slice_scatter.py
diff --git a/backends/arm/test/ops/test_unfold_copy.py b/backends/arm/test/ops/test_unfold_copy.py