From 98dc4bbba5538836af300bbfaf0ae91477a3b8a9 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Fri, 22 May 2026 14:59:58 -0700 Subject: [PATCH 1/3] Add mobius to capture onnx graph cli --- olive/cli/capture_onnx.py | 45 ++++++++++++++++++++- test/cli/test_cli.py | 84 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 2 deletions(-) diff --git a/olive/cli/capture_onnx.py b/olive/cli/capture_onnx.py index 87a2b92ea9..8d55a22ebf 100644 --- a/olive/cli/capture_onnx.py +++ b/olive/cli/capture_onnx.py @@ -113,6 +113,16 @@ def register_subcommand(parser: ArgumentParser): action="store_true", help="Whether to use Model Builder to capture ONNX model.", ) + mb_group.add_argument( + "--use_mobius_builder", + action="store_true", + help=( + "Whether to use MobiusBuilder (mobius-ai) to capture ONNX model. " + "Supports multi-component multimodal models (VLMs). " + "Requires 'pip install mobius-ai'. " + "Mutually exclusive with --use_model_builder and --use_dynamo_exporter." + ), + ) mb_group.add_argument( "--precision", type=str, @@ -197,8 +207,14 @@ def _get_run_config(self, tempdir: str) -> dict: is_diffusers_model = input_model_config["type"].lower() == "diffusersmodel" # whether model is in fp16 or bf16 (currently not supported by CPU EP) - is_fp16_or_bf16 = (not self.args.use_model_builder and self.args.torch_dtype == "float16") or ( - self.args.use_model_builder and self.args.precision in ("fp16", "bf16") + is_fp16_or_bf16 = ( + ( + not self.args.use_model_builder + and not self.args.use_mobius_builder + and self.args.torch_dtype == "float16" + ) + or (self.args.use_model_builder and self.args.precision in ("fp16", "bf16")) + or (self.args.use_mobius_builder and self.args.precision in ("fp16", "bf16")) ) to_replace = [ ("input_model", input_model_config), @@ -213,6 +229,7 @@ def _get_run_config(self, tempdir: str) -> dict: if is_diffusers_model: del config["passes"]["m"] + del config["passes"]["b"] to_replace.extend( [ ( @@ -223,8 +240,30 @@ def _get_run_config(self, tempdir: str) -> dict: (("passes", "c", "target_opset"), self.args.target_opset), ] ) + elif self.args.use_mobius_builder: + if self.args.use_model_builder or self.args.use_dynamo_exporter: + raise ValueError( + "--use_mobius_builder cannot be combined with --use_model_builder or --use_dynamo_exporter." + ) + if self.args.precision not in ("fp32", "fp16", "bf16"): + raise ValueError( + f"MobiusBuilder supports precisions fp32/fp16/bf16; got '{self.args.precision}'. " + "For INT4, capture in fp32/fp16/bf16 first and run a quantization pass afterwards." + ) + del config["passes"]["c"] + del config["passes"]["m"] + to_replace.extend( + [ + (("passes", "b", "precision"), self.args.precision), + ( + ("passes", "b", "runtime"), + "ort-genai" if self.args.use_ort_genai else "none", + ), + ] + ) elif self.args.use_model_builder: del config["passes"]["c"] + del config["passes"]["b"] to_replace.extend( [ (("passes", "m", "precision"), self.args.precision), @@ -245,6 +284,7 @@ def _get_run_config(self, tempdir: str) -> dict: if self.args.int4_accuracy_level is not None: to_replace.append((("passes", "m", "int4_accuracy_level"), self.args.int4_accuracy_level)) else: + del config["passes"]["b"] to_replace.extend( [ ( @@ -300,6 +340,7 @@ def _get_run_config(self, tempdir: str) -> dict: "type": "OnnxConversion", }, "m": {"type": "ModelBuilder", "metadata_only": False}, + "b": {"type": "MobiusBuilder"}, "f": {"type": "DynamicToFixedShape"}, }, "host": "local_system", diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index a7cb39e244..f329c6f609 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -321,6 +321,90 @@ def test_capture_onnx_command_fix_shape(_, mock_run, use_model_builder, tmp_path assert mock_run.call_count == 1 +@patch("olive.workflows.run") +@patch("huggingface_hub.repo_exists", return_value=True) +@pytest.mark.parametrize( + ("precision", "use_ort_genai"), + [ + ("fp16", True), + ("fp32", False), + ("bf16", True), + ], +) +def test_capture_onnx_command_use_mobius_builder(_, mock_run, precision, use_ort_genai, tmp_path): + # setup + output_dir = tmp_path / "output_dir" + model_id = "dummy-model-id" + command_args = [ + "capture-onnx-graph", + "-m", + model_id, + "-o", + str(output_dir), + "--use_mobius_builder", + "--precision", + precision, + ] + if use_ort_genai: + command_args.append("--use_ort_genai") + + # execute + cli_main(command_args) + + config = mock_run.call_args[0][0] + assert config["input_model"]["model_path"] == model_id + # MobiusBuilder ("b") is the only conversion pass; "c" (OnnxConversion) and "m" (ModelBuilder) are removed. + assert "b" in config["passes"] + assert "c" not in config["passes"] + assert "m" not in config["passes"] + assert config["passes"]["b"]["type"] == "MobiusBuilder" + assert config["passes"]["b"]["precision"] == precision + assert config["passes"]["b"]["runtime"] == ("ort-genai" if use_ort_genai else "none") + assert mock_run.call_count == 1 + + +@patch("olive.workflows.run") +@patch("huggingface_hub.repo_exists", return_value=True) +def test_capture_onnx_command_use_mobius_builder_rejects_int4(_, __, tmp_path): + # setup + output_dir = tmp_path / "output_dir" + command_args = [ + "capture-onnx-graph", + "-m", + "dummy-model-id", + "-o", + str(output_dir), + "--use_mobius_builder", + "--precision", + "int4", + ] + + # execute / verify + with pytest.raises(ValueError, match="MobiusBuilder supports precisions fp32/fp16/bf16"): + cli_main(command_args) + + +@patch("olive.workflows.run") +@patch("huggingface_hub.repo_exists", return_value=True) +@pytest.mark.parametrize("conflicting_flag", ["--use_model_builder", "--use_dynamo_exporter"]) +def test_capture_onnx_command_use_mobius_builder_rejects_conflicts(_, __, conflicting_flag, tmp_path): + # setup + output_dir = tmp_path / "output_dir" + command_args = [ + "capture-onnx-graph", + "-m", + "dummy-model-id", + "-o", + str(output_dir), + "--use_mobius_builder", + conflicting_flag, + ] + + # execute / verify + with pytest.raises(ValueError, match="cannot be combined"): + cli_main(command_args) + + @patch("olive.cli.shared_cache.AzureContainerClientFactory") def test_shared_cache_command(mock_AzureContainerClientFactory): # setup From 09f88746ddaef9de358f4292f737285b427a8a89 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 26 May 2026 23:20:05 +0000 Subject: [PATCH 2/3] Enforce mutual exclusion of exporter flags at argparse layer --- olive/cli/capture_onnx.py | 42 +++++++++++++++++++-------------------- test/cli/test_cli.py | 3 ++- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/olive/cli/capture_onnx.py b/olive/cli/capture_onnx.py index 8d55a22ebf..28b9e7e3e3 100644 --- a/olive/cli/capture_onnx.py +++ b/olive/cli/capture_onnx.py @@ -66,13 +66,30 @@ def register_subcommand(parser: ArgumentParser): help="The device used to run the model to capture the ONNX graph.", ) - # PyTorch Exporter options - pte_group = sub_parser.add_argument_group("PyTorch Exporter options") - pte_group.add_argument( + # Mutually exclusive exporter flags + exporter_group = sub_parser.add_mutually_exclusive_group() + exporter_group.add_argument( "--use_dynamo_exporter", action="store_true", help="Whether to use dynamo_export API to export ONNX model.", ) + exporter_group.add_argument( + "--use_model_builder", + action="store_true", + help="Whether to use Model Builder to capture ONNX model.", + ) + exporter_group.add_argument( + "--use_mobius_builder", + action="store_true", + help=( + "Whether to use MobiusBuilder (mobius-ai) to capture ONNX model. " + "Supports multi-component multimodal models (VLMs). " + "Requires 'pip install mobius-ai'." + ), + ) + + # PyTorch Exporter options + pte_group = sub_parser.add_argument_group("PyTorch Exporter options") pte_group.add_argument( "--fixed_param_dict", type=parse_dim_dict, @@ -108,21 +125,6 @@ def register_subcommand(parser: ArgumentParser): # Model Builder options mb_group = sub_parser.add_argument_group("Model Builder options") - mb_group.add_argument( - "--use_model_builder", - action="store_true", - help="Whether to use Model Builder to capture ONNX model.", - ) - mb_group.add_argument( - "--use_mobius_builder", - action="store_true", - help=( - "Whether to use MobiusBuilder (mobius-ai) to capture ONNX model. " - "Supports multi-component multimodal models (VLMs). " - "Requires 'pip install mobius-ai'. " - "Mutually exclusive with --use_model_builder and --use_dynamo_exporter." - ), - ) mb_group.add_argument( "--precision", type=str, @@ -241,10 +243,6 @@ def _get_run_config(self, tempdir: str) -> dict: ] ) elif self.args.use_mobius_builder: - if self.args.use_model_builder or self.args.use_dynamo_exporter: - raise ValueError( - "--use_mobius_builder cannot be combined with --use_model_builder or --use_dynamo_exporter." - ) if self.args.precision not in ("fp32", "fp16", "bf16"): raise ValueError( f"MobiusBuilder supports precisions fp32/fp16/bf16; got '{self.args.precision}'. " diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index f329c6f609..cb49181302 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -401,8 +401,9 @@ def test_capture_onnx_command_use_mobius_builder_rejects_conflicts(_, __, confli ] # execute / verify - with pytest.raises(ValueError, match="cannot be combined"): + with pytest.raises(SystemExit) as exc_info: cli_main(command_args) + assert exc_info.value.code == 2 @patch("olive.cli.shared_cache.AzureContainerClientFactory") From 90773dbde3e89a3251b95e27a42e054152537f24 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Wed, 27 May 2026 14:55:11 -0700 Subject: [PATCH 3/3] Prioritize --use_mobius_builder over diffusers detection in capture-onnx-graph When --use_mobius_builder is set, skip is_valid_diffusers_model() and always build an HfModel input config, so mobius handles diffusion pipelines too instead of being silently overridden by the diffusers branch. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- olive/cli/capture_onnx.py | 42 +++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/olive/cli/capture_onnx.py b/olive/cli/capture_onnx.py index 28b9e7e3e3..25349e4b30 100644 --- a/olive/cli/capture_onnx.py +++ b/olive/cli/capture_onnx.py @@ -194,12 +194,16 @@ def run(self): def _get_run_config(self, tempdir: str) -> dict: config = deepcopy(TEMPLATE) - # Check if diffusers model detection is needed - is_diffusers = is_valid_diffusers_model(self.args.model_name_or_path) if self.args.model_name_or_path else False - if is_diffusers: - input_model_config = get_diffusers_input_model(self.args, self.args.model_name_or_path) - else: + if self.args.use_mobius_builder: input_model_config = get_input_model_config(self.args) + else: + is_diffusers = ( + is_valid_diffusers_model(self.args.model_name_or_path) if self.args.model_name_or_path else False + ) + if is_diffusers: + input_model_config = get_diffusers_input_model(self.args, self.args.model_name_or_path) + else: + input_model_config = get_input_model_config(self.args) assert input_model_config["type"].lower() in { "hfmodel", "pytorchmodel", @@ -229,20 +233,7 @@ def _get_run_config(self, tempdir: str) -> dict: ), ] - if is_diffusers_model: - del config["passes"]["m"] - del config["passes"]["b"] - to_replace.extend( - [ - ( - ("passes", "c", "device"), - self.args.conversion_device if self.args.conversion_device == "cpu" else "cuda", - ), - (("passes", "c", "torch_dtype"), self.args.torch_dtype), - (("passes", "c", "target_opset"), self.args.target_opset), - ] - ) - elif self.args.use_mobius_builder: + if self.args.use_mobius_builder: if self.args.precision not in ("fp32", "fp16", "bf16"): raise ValueError( f"MobiusBuilder supports precisions fp32/fp16/bf16; got '{self.args.precision}'. " @@ -259,6 +250,19 @@ def _get_run_config(self, tempdir: str) -> dict: ), ] ) + elif is_diffusers_model: + del config["passes"]["m"] + del config["passes"]["b"] + to_replace.extend( + [ + ( + ("passes", "c", "device"), + self.args.conversion_device if self.args.conversion_device == "cpu" else "cuda", + ), + (("passes", "c", "torch_dtype"), self.args.torch_dtype), + (("passes", "c", "target_opset"), self.args.target_opset), + ] + ) elif self.args.use_model_builder: del config["passes"]["c"] del config["passes"]["b"]