diff --git a/backends/arm/public_api_manifests/api_manifest_running.toml b/backends/arm/public_api_manifests/api_manifest_running.toml
index 2a263a594a5..431599fd2c1 100644
--- a/backends/arm/public_api_manifests/api_manifest_running.toml
+++ b/backends/arm/public_api_manifests/api_manifest_running.toml
@@ -62,7 +62,7 @@ signature = "EthosUPartitioner.register_custom_partition_op(self, op: torch._ops
 
 [python.EthosUQuantizer]
 kind = "class"
-signature = "EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'"
+signature = "EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'"
 
 [python.EthosUQuantizer.annotate]
 kind = "function"
@@ -150,7 +150,7 @@ signature = "VgfPartitioner.register_custom_partition_op(self, op: torch._ops.Op
 
 [python.VgfQuantizer]
 kind = "class"
-signature = "VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'"
+signature = "VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'"
 
 [python.VgfQuantizer.annotate]
 kind = "function"
diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py
index 0080d77ab69..1668629507b 100644
--- a/backends/arm/quantizer/arm_quantizer.py
+++ b/backends/arm/quantizer/arm_quantizer.py
@@ -493,21 +493,23 @@ class TOSAQuantizer(Quantizer):
     """Manage quantization annotations for TOSA-compatible backends.
 
     .. warning::
-        Setting ``use_composable_quantizer=True`` enables an experimental API
-        surface that may change without notice.
+        The composable quantizer is now the default implementation. Setting
+        ``use_composable_quantizer=False`` is deprecated and will be removed in
+        two minor releases.
 
     """
 
     def __init__(
         self,
         compile_spec_or_tosa_spec,
-        use_composable_quantizer: bool = False,
+        use_composable_quantizer: bool = True,
     ) -> None:
         """Create a TOSA quantizer from a TOSA spec or Arm compile spec.
 
         .. warning::
-            Setting ``use_composable_quantizer=True`` enables an experimental
-            API surface that may change without notice.
+            The composable quantizer is now the default implementation.
+            Setting ``use_composable_quantizer=False`` is deprecated and will
+            be removed in two minor releases.
 
         """
         self.use_composable_quantizer = use_composable_quantizer
@@ -519,10 +521,45 @@ def __init__(
             self.quantizer = _TOSAQuantizerV2(compile_spec_or_tosa_spec)
         else:
             logger.info(
-                "Using default quantizer in the arm backend. This quantizer is planned to be replaced by the composable quantizer implementation in the future, see https://github.com/pytorch/executorch/issues/17701"
+                "Using deprecated legacy quantizer implementation in the arm backend. Setting use_composable_quantizer=False will be removed in two minor releases. See https://github.com/pytorch/executorch/issues/17701"
             )
             self.quantizer = _TOSAQuantizerV1(compile_spec_or_tosa_spec)
 
+    @staticmethod
+    def _validate_optional_quantization_config(
+        config_name: str, value: object, value_description: str = "value"
+    ) -> None:
+        if value is not None and not isinstance(value, QuantizationConfig):
+            raise TypeError(
+                f"{config_name} {value_description} must be "
+                "QuantizationConfig or None, "
+                f"got {type(value).__name__}."
+            )
+
+    @staticmethod
+    def _validate_config_dict(
+        config_name: str,
+        value: object,
+        is_valid_key: Callable[[object], bool],
+        key_description: str,
+    ) -> Dict[Any, Optional[QuantizationConfig]]:
+        if not isinstance(value, dict):
+            raise TypeError(
+                f"{config_name} must be a dict, got {type(value).__name__}."
+            )
+
+        for key, quantization_config in value.items():
+            if not is_valid_key(key):
+                raise TypeError(
+                    f"{config_name} keys must be {key_description}, "
+                    f"got {type(key).__name__}."
+                )
+            TOSAQuantizer._validate_optional_quantization_config(
+                config_name, quantization_config, "values"
+            )
+
+        return value
+
     @property
     def tosa_spec(self):
         return self.quantizer.tosa_spec
@@ -537,12 +574,11 @@ def global_config(self):
 
     @global_config.setter
     def global_config(self, value: Optional[QuantizationConfig]) -> None:
+        self._validate_optional_quantization_config("global_config", value)
         if isinstance(self.quantizer, _TOSAQuantizerV1):
             self.quantizer.global_config = value
         else:
-            raise NotImplementedError(
-                "Composable quantizer does not allow setting global_config directly. Please use set_global() instead."
-            )
+            self.quantizer.set_global(value)
 
     @property
     def io_config(self):
@@ -555,12 +591,12 @@ def io_config(self):
 
     @io_config.setter
     def io_config(self, value: Optional[QuantizationConfig]) -> None:
+        self._validate_optional_quantization_config("io_config", value)
         if isinstance(self.quantizer, _TOSAQuantizerV1):
             self.quantizer.io_config = value
         else:
-            raise NotImplementedError(
-                "Composable quantizer does not allow setting io_config directly. Please use set_io() instead."
-            )
+            self.quantizer.clear_io_config()
+            self.quantizer.set_io(value)
 
     @property
     def module_type_config(self):
@@ -575,12 +611,18 @@ def module_type_config(self):
     def module_type_config(
         self, value: Dict[Callable, Optional[QuantizationConfig]]
     ) -> None:
+        module_type_config = self._validate_config_dict(
+            "module_type_config",
+            value,
+            callable,
+            "callable",
+        )
         if isinstance(self.quantizer, _TOSAQuantizerV1):
-            self.quantizer.module_type_config = value
+            self.quantizer.module_type_config = module_type_config
         else:
-            raise NotImplementedError(
-                "Composable quantizer does not allow setting module_type_config directly. Please use set_module_type() instead."
-            )
+            self.quantizer.clear_module_type_config()
+            for module_type, quantization_config in module_type_config.items():
+                self.quantizer.set_module_type(module_type, quantization_config)
 
     @property
     def module_name_config(self):
@@ -595,12 +637,18 @@ def module_name_config(self):
     def module_name_config(
         self, value: Dict[str, Optional[QuantizationConfig]]
     ) -> None:
+        module_name_config = self._validate_config_dict(
+            "module_name_config",
+            value,
+            lambda key: isinstance(key, str),
+            "str",
+        )
         if isinstance(self.quantizer, _TOSAQuantizerV1):
-            self.quantizer.module_name_config = value
+            self.quantizer.module_name_config = module_name_config
         else:
-            raise NotImplementedError(
-                "Composable quantizer does not allow setting module_name_config directly. Please use set_module_name() instead."
-            )
+            self.quantizer.clear_module_name_config()
+            for module_name, quantization_config in module_name_config.items():
+                self.quantizer.set_module_name(module_name, quantization_config)
 
     def set_global(
         self, quantization_config: Optional[QuantizationConfig]
@@ -1124,6 +1172,30 @@ def quantizers(self, value: List[Quantizer]) -> None:
         """
         self._quantizers = value
 
+    def _remove_quantizers_by_node_finder_type(
+        self, node_finder_types: type[NodeFinder] | tuple[type[NodeFinder], ...]
+    ) -> None:
+        self._quantizers = [
+            quantizer
+            for quantizer in self._quantizers
+            if not (
+                isinstance(quantizer, PatternQuantizer)
+                and isinstance(quantizer.node_finder, node_finder_types)
+            )
+        ]
+
+    def clear_module_type_config(self) -> _TOSAQuantizerV2:
+        self._remove_quantizers_by_node_finder_type(ModuleTypeNodeFinder)
+        return self
+
+    def clear_module_name_config(self) -> _TOSAQuantizerV2:
+        self._remove_quantizers_by_node_finder_type(ModuleNameNodeFinder)
+        return self
+
+    def clear_io_config(self) -> _TOSAQuantizerV2:
+        self._remove_quantizers_by_node_finder_type((InputNodeFinder, OutputNodeFinder))
+        return self
+
     def annotate(self, model):
         reporter = QuantizerReporter(self.quantizers, "FINAL QUANTIZATION REPORT")
         model = super().annotate(model)
@@ -1277,20 +1349,25 @@ class EthosUQuantizer(TOSAQuantizer):
     """Quantizer supported by the Arm Ethos-U backend.
 
     .. warning::
-        Setting ``use_composable_quantizer=True`` enables an experimental API
-        surface that may change without notice.
+        The composable quantizer is now the default implementation. Setting
+        ``use_composable_quantizer=False`` is deprecated and will be removed in
+        two minor releases.
 
     Args:
         compile_spec (EthosUCompileSpec): Backend compile specification for
             Ethos-U targets.
-        use_composable_quantizer (bool): Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
+        use_composable_quantizer (bool): Whether to use the composable
+            quantizer implementation. Setting this to ``False`` is deprecated
+            and will be removed in two minor releases. See
+            [issue #17701](https://github.com/pytorch/executorch/issues/17701)
+            for details.
 
     """
 
     def __init__(
         self,
         compile_spec: EthosUCompileSpec,
-        use_composable_quantizer: bool = False,
+        use_composable_quantizer: bool = True,
     ) -> None:
         super().__init__(compile_spec, use_composable_quantizer)
 
@@ -1299,19 +1376,24 @@ class VgfQuantizer(TOSAQuantizer):
     """Quantizer supported by the Arm Vgf backend.
 
     .. warning::
-        Setting ``use_composable_quantizer=True`` enables an experimental API
-        surface that may change without notice.
+        The composable quantizer is now the default implementation. Setting
+        ``use_composable_quantizer=False`` is deprecated and will be removed in
+        two minor releases.
 
     Args:
         compile_spec (VgfCompileSpec): Backend compile specification for Vgf
             targets.
-        use_composable_quantizer (bool): Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
+        use_composable_quantizer (bool): Whether to use the composable
+            quantizer implementation. Setting this to ``False`` is deprecated
+            and will be removed in two minor releases. See
+            [issue #17701](https://github.com/pytorch/executorch/issues/17701)
+            for details.
 
     """
 
     def __init__(
         self,
         compile_spec: VgfCompileSpec,
-        use_composable_quantizer: bool = False,
+        use_composable_quantizer: bool = True,
     ) -> None:
         super().__init__(compile_spec, use_composable_quantizer)
diff --git a/backends/arm/quantizer/arm_quantizer_utils.py b/backends/arm/quantizer/arm_quantizer_utils.py
index a59ccff87b1..4173ada370b 100644
--- a/backends/arm/quantizer/arm_quantizer_utils.py
+++ b/backends/arm/quantizer/arm_quantizer_utils.py
@@ -623,6 +623,42 @@ def _annotate_while_with_additional_inputs(
         self.report_accept([root_node])
         return True
 
+    def _should_skip_while_shared_qspec(self, node: Node) -> bool:
+        return node.target == torch.ops.higher_order.while_loop and bool(
+            node.meta.get("additional_inputs")
+        )
+
+    def _annotate_while_with_additional_inputs(
+        self,
+        root_node: Node,
+        adjacent_qspecs: list[Any],
+    ) -> bool:
+        if not self._should_skip_while_shared_qspec(root_node):
+            return False
+        if len(adjacent_qspecs) == 0:
+            self.report_reject(
+                [root_node],
+                "Couldn't find any adjacent quantization spec to annotate while_loop.",
+            )
+            return True
+
+        input_qspec = adjacent_qspecs[0]
+        input_qspec_map: dict[Node, Optional[QuantizationSpec]] = {
+            n: input_qspec for n in self._get_input_nodes_with_float_output(root_node)
+        }
+        output_qspec: Optional[QuantizationSpec] = None
+        if len(self._get_user_nodes_with_float_input(root_node)) > 0:
+            output_qspec = input_qspec
+
+        _mark_node_as_quantized(
+            root_node,
+            input_qspec_map,
+            output_qspec,
+            is_quantized=True,
+        )
+        self.report_accept([root_node])
+        return True
+
     def _annotate_shared_cluster(self, root_node: Node) -> None:
         if (
             len(self._get_input_nodes_with_float_output(root_node)) == 0
diff --git a/backends/arm/test/misc/test_quant_custom_meta.py b/backends/arm/test/misc/test_quant_custom_meta.py
index cd9964f4511..f64b8067098 100644
--- a/backends/arm/test/misc/test_quant_custom_meta.py
+++ b/backends/arm/test/misc/test_quant_custom_meta.py
@@ -105,5 +105,6 @@ def test_quantized_to_float_transition_tosa_INT_FP(fp_extension: bool):
         )
     pipeline.quantizer.set_module_type(torch.nn.Sigmoid, None)  # type: ignore
     pipeline.quantizer.set_module_type(torch.nn.Conv1d, None)  # type: ignore
+    pipeline.quantizer.set_io(None)  # type: ignore
 
     pipeline.run()
diff --git a/backends/arm/test/misc/test_shared_qspecs.py b/backends/arm/test/misc/test_shared_qspecs.py
index de07bd5f6c2..93129633418 100644
--- a/backends/arm/test/misc/test_shared_qspecs.py
+++ b/backends/arm/test/misc/test_shared_qspecs.py
@@ -87,8 +87,8 @@ class SharedQspecMulipleClusters(torch.nn.Module):
         "quantized_decomposed.dequantize_per_tensor.default": {None: 8},
         "aten.add.Tensor": {_INT8_QSPEC: 2},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, 0, -128, 127, torch.int8): 2,
@@ -122,8 +122,8 @@ class SharedQspecInputForkNonShared(torch.nn.Module):
         "quantized_decomposed.quantize_per_tensor.default": {None: 4},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 4},
     }
-    inputs_qspecs = {None: 2}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 2}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, -64, -128, 127, torch.int8): 3,
@@ -149,8 +149,8 @@ class SharedQspecInputForkShared(torch.nn.Module):
         "quantized_decomposed.quantize_per_tensor.default": {None: 5},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 5},
     }
-    inputs_qspecs = {None: 2}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 2}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, -64, -128, 127, torch.int8): 2,
@@ -178,8 +178,8 @@ class SharedQspecInputForkXShared(torch.nn.Module):
         "quantized_decomposed.quantize_per_tensor.default": {None: 4},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 4},
     }
-    inputs_qspecs = {None: 2}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 2}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, -64, -128, 127, torch.int8): 2,
@@ -206,8 +206,8 @@ class SharedQspecInputForkYShared(torch.nn.Module):
         "quantized_decomposed.quantize_per_tensor.default": {None: 5},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 5},
     }
-    inputs_qspecs = {None: 2}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 2}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, -64, -128, 127, torch.int8): 2,
@@ -234,8 +234,8 @@ class SharedQspecInputForkXConstant(torch.nn.Module):
         "quantized_decomposed.quantize_per_tensor.default": {None: 2},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 3},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, 0, -128, 127, torch.int8): 2,
@@ -260,8 +260,8 @@ class SharedQspecInputForkYConstant(torch.nn.Module):
         "quantized_decomposed.quantize_per_tensor.default": {None: 2},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 3},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, 0, -128, 127, torch.int8): 1,
@@ -287,8 +287,8 @@ class SharedQspecOutputForkNonShared(torch.nn.Module):
         "quantized_decomposed.dequantize_per_tensor.default": {None: 4},
         "aten.add.Tensor": {_INT8_QSPEC: 1},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 2}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, 0, -128, 127, torch.int8): 3,
@@ -315,8 +315,8 @@ class SharedQspecOutputForkShared(torch.nn.Module):
         "quantized_decomposed.quantize_per_tensor.default": {None: 4},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 6},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 3}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, 0, -128, 127, torch.int8): 6,
@@ -341,10 +341,10 @@ class SharedQspecManyForks(torch.nn.Module):
     qspecs = {
         "quantized_decomposed.quantize_per_tensor.default": {None: 6},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 9},
-        "aten.t.default": {None: 1},
+        "aten.t.default": {_INT8_QSPEC: 1},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.086232387, 104, -128, 127, torch.int8): 9,
@@ -372,8 +372,8 @@ class SharedQspecSurroundedQuantizedOp(torch.nn.Module):
         "quantized_decomposed.dequantize_per_tensor.default": {None: 5},
         "aten.add.Tensor": {_INT8_QSPEC: 1},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.509554982, 123, -128, 127, torch.int8): 3,
@@ -403,8 +403,8 @@ class SharedQspecSurroundedQuantizedOpConstant(torch.nn.Module):
         "aten.ones.default": {_INT8_QSPEC: 1},
         "aten.add.Tensor": {_INT8_QSPEC: 1},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.003921569, -128, -128, 127, torch.int8): 1,
@@ -429,18 +429,22 @@ class SharedQspecSub(torch.nn.Module):
     """A shared qspec node with float input."""
 
     qspecs = {
-        "quantized_decomposed.quantize_per_tensor.default": {None: 2},
-        "quantized_decomposed.dequantize_per_tensor.default": {None: 2},
+        "quantized_decomposed.quantize_per_tensor.default": {None: 4},
+        "quantized_decomposed.dequantize_per_tensor.default": {None: 4},
         "aten.sub.Tensor": {None: 1},
     }
-    inputs_qspecs = {None: 2}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 2}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
+            (0.003919654, -128, -128, 127, torch.int8): 1,
             (0.035276882, -128, -128, 127, torch.int8): 2,
+            (0.03919654, -128, -128, 127, torch.int8): 1,
         },
         "quantized_decomposed.quantize_per_tensor.default": {
+            (0.003919654, -128, -128, 127, torch.int8): 1,
             (0.035276882, -128, -128, 127, torch.int8): 2,
+            (0.03919654, -128, -128, 127, torch.int8): 1,
         },
     }
 
@@ -462,8 +466,8 @@ class SharedQspecCompetingQspecs(torch.nn.Module):
         "quantized_decomposed.dequantize_per_tensor.default": {None: 4},
         "aten.conv2d.default": {_INT8_QSPEC: 1},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_channel.default": {
             (0, -2147483647, 2147483647, torch.int32): 1,
@@ -502,20 +506,16 @@ class SharedQspecNoQspecs(torch.nn.Module):
         "quantized_decomposed.dequantize_per_tensor.default": {None: 2},
         "aten.sub.Tensor": {None: 2},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
-            (
-                1.5259e-05,
-                -128,
-                -128,
-                127,
-                torch.int8,
-            ): 2,  # The network always has 0 output -> very small scale.
+            (1.5259e-05, -128, -128, 127, torch.int8): 1,
+            (0.03919654, -128, -128, 127, torch.int8): 1,
         },
         "quantized_decomposed.quantize_per_tensor.default": {
-            (1.5259e-05, -128, -128, 127, torch.int8): 2,
+            (1.5259e-05, -128, -128, 127, torch.int8): 1,
+            (0.03919654, -128, -128, 127, torch.int8): 1,
         },
     }
 
@@ -542,21 +542,19 @@ class MixedMaximumInt8Int16(torch.nn.Module):
     """A shared qspec node with int16/int8 inputs."""
 
     qspecs = {
-        "quantized_decomposed.quantize_per_tensor.default": {None: 6},
-        "quantized_decomposed.dequantize_per_tensor.default": {None: 6},
+        "quantized_decomposed.quantize_per_tensor.default": {None: 4},
+        "quantized_decomposed.dequantize_per_tensor.default": {None: 5},
     }
-    input_qspecs = {None: 1}
-    output_qspecs = {None: 1}
+    input_qspecs = {_INT8_QSPEC: 1}
+    output_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.quantize_per_tensor.default": {
-            (0.007839307, -128, -128, 127, torch.int8): 2,
-            (0.015678614, 0, -128, 127, torch.int8): 2,
-            (0.000244141, 0, -32767, 32767, torch.int16): 2,
+            (0.007839307, -128, -128, 127, torch.int8): 1,
+            (0.015678614, 0, -128, 127, torch.int8): 3,
         },
         "quantized_decomposed.dequantize_per_tensor.default": {
-            (0.007839307, -128, -128, 127, torch.int8): 2,
-            (0.015678614, 0, -128, 127, torch.int8): 2,
-            (0.000244141, 0, -32767, 32767, torch.int16): 2,
+            (0.007839307, -128, -128, 127, torch.int8): 1,
+            (0.015678614, 0, -128, 127, torch.int8): 4,
         },
     }
 
diff --git a/backends/arm/test/ops/test_to_copy.py b/backends/arm/test/ops/test_to_copy.py
index 6718fedea04..e0d910bd069 100644
--- a/backends/arm/test/ops/test_to_copy.py
+++ b/backends/arm/test/ops/test_to_copy.py
@@ -330,18 +330,14 @@ def test_to_vgf_quant(test_data: Tuple):
     ),
 }
 
-redundant_xfails_FP = {
+redundant_xfails = {
     "rand_int8_int8": "Tracing graph with quantized input is not supported.",
     "rand_int16_int16": "Tracing graph with quantized input is not supported.",
 }
 
-redundant_xfails_INT = redundant_xfails_FP | {
-    "rand_fp16_fp16": "FP16 is not supported",
-}
-
 
 @common.parametrize(
-    "test_data", _TO_COPY_TEST_DATA_REDUNDANT_CAST, xfails=redundant_xfails_FP
+    "test_data", _TO_COPY_TEST_DATA_REDUNDANT_CAST, xfails=redundant_xfails
 )
 def test_to_tosa_FP_REDUNDANT_CAST(test_data: Tuple):
     test_tensor, new_dtype = test_data()
@@ -356,7 +352,7 @@ def test_to_tosa_FP_REDUNDANT_CAST(test_data: Tuple):
 
 
 @common.parametrize(
-    "test_data", _TO_COPY_TEST_DATA_REDUNDANT_CAST, xfails=redundant_xfails_INT
+    "test_data", _TO_COPY_TEST_DATA_REDUNDANT_CAST, xfails=redundant_xfails
 )
 def test_to_tosa_INT_REDUNDANT_CAST(test_data: Tuple):
     test_tensor, new_dtype = test_data()
diff --git a/backends/arm/test/ops/test_transpose_conv2d.py b/backends/arm/test/ops/test_transpose_conv2d.py
index d34679f4bcb..0c627575162 100644
--- a/backends/arm/test/ops/test_transpose_conv2d.py
+++ b/backends/arm/test/ops/test_transpose_conv2d.py
@@ -7,14 +7,14 @@
 
 import conftest
 import torch
-
-from executorch.backends.arm.quantizer import QuantizationConfig
 from executorch.backends.arm.quantizer.arm_quantizer import (
     get_symmetric_a16w8_quantization_config,
     get_symmetric_a8w4_quantization_config,
     get_symmetric_quantization_config,
     TOSAQuantizer,
 )
+
+from executorch.backends.arm.quantizer.quantization_config import TOSAQuantizationConfig
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
     EthosU55PipelineINT,
@@ -361,7 +361,7 @@ def test_conv_transpose2d_tosa_INT_qat_axis1_uses_non_fused_fake_quant(test_data
         ),
     )
     quantizer.set_global(
-        QuantizationConfig(
+        TOSAQuantizationConfig(
             input_activation=activation_qspec,
             output_activation=activation_qspec,
             weight=weight_qspec,
@@ -400,7 +400,7 @@ def test_conv_transpose2d_tosa_INT_grouped_qat_axis0_keeps_fused_fake_quant(test
         ),
     )
     quantizer.set_global(
-        QuantizationConfig(
+        TOSAQuantizationConfig(
             input_activation=activation_qspec,
             output_activation=activation_qspec,
             weight=weight_qspec,
@@ -439,7 +439,7 @@ def test_conv_transpose2d_tosa_INT_ptq_observer_updates_axis(test_data):
         ),
     )
     quantizer.set_global(
-        QuantizationConfig(
+        TOSAQuantizationConfig(
             input_activation=activation_qspec,
             output_activation=activation_qspec,
             weight=weight_qspec,
@@ -477,7 +477,7 @@ def test_conv_transpose2d_tosa_INT_qat_correct_qspec_wrong_ctor_axis(test_data):
         ),
     )
     quantizer.set_global(
-        QuantizationConfig(
+        TOSAQuantizationConfig(
             input_activation=activation_qspec,
             output_activation=activation_qspec,
             weight=weight_qspec,
diff --git a/backends/arm/test/ops/test_unary_combos.py b/backends/arm/test/ops/test_unary_combos.py
index bc4bb0b39d9..2ecd04b9c79 100644
--- a/backends/arm/test/ops/test_unary_combos.py
+++ b/backends/arm/test/ops/test_unary_combos.py
@@ -104,9 +104,7 @@ def test_add_tensor_tosa_INT_combos(model_cls):
 
 
 @common.XfailIfNoCorstone300
-@common.parametrize(
-    "model_cls", MODEL_DATA, xfails={"NegAdd": "Numerical failure. MLBEDSW-11581"}
-)
+@common.parametrize("model_cls", MODEL_DATA)
 def test_add_tensor_u55_INT_combos(model_cls):
     m, inputs, exir = _build(model_cls)
     p = EthosU55PipelineINT[Tensor1](
diff --git a/backends/arm/test/ops/test_while.py b/backends/arm/test/ops/test_while.py
index b5cab047a50..51b56661b50 100644
--- a/backends/arm/test/ops/test_while.py
+++ b/backends/arm/test/ops/test_while.py
@@ -8,6 +8,8 @@
 import torch
 import torch.fx
 
+from executorch.backends.arm.quantizer import get_symmetric_quantization_config
+from executorch.backends.arm.quantizer.arm_quantizer import _TOSAQuantizerV2
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
 from executorch.backends.arm.test.tester.test_pipeline import (
@@ -228,6 +230,28 @@ def test_while_loop_tosa_INT(case: Callable[[], Tuple[torch.nn.Module, Tuple]]):
     pipeline.run()
 
 
+def test_while_loop_tosa_INT_composable_large_threshold():
+    module, example_inputs = test_cases["large_threshold"]()
+    pipeline = TosaPipelineINT[tuple](
+        module,
+        example_inputs,
+        "torch.ops.higher_order.while_loop",
+        tosa_extensions=["cf"],
+    )
+
+    composable_quantizer = _TOSAQuantizerV2(pipeline.tester.compile_spec)
+    composable_quantizer.set_global(get_symmetric_quantization_config())
+    pipeline.quantizer.quantizer = composable_quantizer
+
+    pipeline.add_stage_after(
+        "to_edge_transform_and_lower",
+        ArmTester.check_not,
+        pipeline.tester,
+        ["torch.ops.higher_order.while_loop"],
+    )
+    pipeline.run()
+
+
 @common.parametrize(
     "case",
     test_cases,
diff --git a/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md b/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md
index 68fe9d160aa..c2f7035c89c 100644
--- a/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md
+++ b/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md
@@ -16,18 +16,23 @@ The Arm Ethos-U delegate supports the following quantization schemes:
 ### Quantization API
 
 ```python
-class EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'
+class EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'
 ```
 Quantizer supported by the Arm Ethos-U backend.
 
 .. warning::
-    Setting ``use_composable_quantizer=True`` enables an experimental API
-    surface that may change without notice.
+    The composable quantizer is now the default implementation. Setting
+    ``use_composable_quantizer=False`` is deprecated and will be removed in
+    two minor releases.
 
 Args:
 - **compile_spec (EthosUCompileSpec)**: Backend compile specification for
         Ethos-U targets.
-- **use_composable_quantizer (bool)**: Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
+- **use_composable_quantizer (bool)**: Whether to use the composable
+        quantizer implementation. Setting this to ``False`` is deprecated
+        and will be removed in two minor releases. See
+        [issue #17701](https://github.com/pytorch/executorch/issues/17701)
+        for details.
 
 ```python
 def EthosUQuantizer.add_quantizer(self, quantizer: 'Quantizer') -> 'TOSAQuantizer':
diff --git a/docs/source/backends/arm-vgf/arm-vgf-quantization.md b/docs/source/backends/arm-vgf/arm-vgf-quantization.md
index 49ba41f74e1..2dc5b5631e6 100644
--- a/docs/source/backends/arm-vgf/arm-vgf-quantization.md
+++ b/docs/source/backends/arm-vgf/arm-vgf-quantization.md
@@ -35,18 +35,23 @@ setting using the `set_module_name` or `set_module_type` methods.
 ### Quantization API
 
 ```python
-class VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'
+class VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'
 ```
 Quantizer supported by the Arm Vgf backend.
 
 .. warning::
-    Setting ``use_composable_quantizer=True`` enables an experimental API
-    surface that may change without notice.
+    The composable quantizer is now the default implementation. Setting
+    ``use_composable_quantizer=False`` is deprecated and will be removed in
+    two minor releases.
 
 Args:
 - **compile_spec (VgfCompileSpec)**: Backend compile specification for Vgf
         targets.
-- **use_composable_quantizer (bool)**: Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
+- **use_composable_quantizer (bool)**: Whether to use the composable
+        quantizer implementation. Setting this to ``False`` is deprecated
+        and will be removed in two minor releases. See
+        [issue #17701](https://github.com/pytorch/executorch/issues/17701)
+        for details.
 
 ```python
 def VgfQuantizer.add_quantizer(self, quantizer: 'Quantizer') -> 'TOSAQuantizer':
diff --git a/examples/arm/quantizer_tutorial.ipynb b/examples/arm/quantizer_tutorial.ipynb
index 76979316002..25b99dbd4b5 100644
--- a/examples/arm/quantizer_tutorial.ipynb
+++ b/examples/arm/quantizer_tutorial.ipynb
@@ -16,13 +16,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# WIP: TOSA/EthosU/VgfQuantizer composable quantizer tutorial\n",
+    "# TOSA/EthosU/VgfQuantizer composable quantizer tutorial\n",
     "\n",
     "This is an in-depth tutorial of the new `TOSA/EthosU/VgfQuantizer` API. While the `TOSAQuantizer` is used in the example, both the\n",
     "`EthosUQuantizer` and `VgfQuantizer` directly inherit from this base class. \n",
     "\n",
-    "Note that the main API and functionality remains largely the same to allow for a drop-in replacement, but the underlying framework is different - as will be explained. **Both the quantizer and this tutorial are currently experimental and may change without prior notice.** Refer to https://github.com/pytorch/executorch/issues/17701 for questions and feedback.\n",
-    "\n",
     "Before you begin:\n",
     "1. (In a clean virtual environment with a compatible Python version) Install executorch using `./install_executorch.sh`\n",
     "2. Install Arm TOSA dependencies using `examples/arm/setup.sh --disable-ethos-u-deps`\n",