From 8b0b4569527b4837b6ebd25897fdcf222ff6e750 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Mon, 1 Jun 2026 15:58:56 +0200 Subject: [PATCH 1/7] add fast test for whisper --- test/cli/test_cli_test_model_smoke.py | 115 +++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) diff --git a/test/cli/test_cli_test_model_smoke.py b/test/cli/test_cli_test_model_smoke.py index b28130658..b39df7242 100644 --- a/test/cli/test_cli_test_model_smoke.py +++ b/test/cli/test_cli_test_model_smoke.py @@ -12,7 +12,13 @@ from tokenizers import Tokenizer from tokenizers.models import WordLevel from tokenizers.pre_tokenizers import Whitespace -from transformers import LlamaConfig, LlamaForCausalLM, PreTrainedTokenizerFast +from transformers import ( + LlamaConfig, + LlamaForCausalLM, + PreTrainedTokenizerFast, + WhisperConfig, + WhisperForConditionalGeneration, +) from olive.cli.base import TEST_OUTPUT_MARKER_FILE from olive.common.hf.utils import TEST_MODEL_MARKER_FILE @@ -25,6 +31,11 @@ "local/tiny-random-llama-a", "local/tiny-random-llama-b", "mistralai/Mistral-7B-Instruct-v0.3", + "microsoft/Phi-3-mini-4k-instruct", +) +DEFAULT_WHISPER_MODEL_IDS = ( + "openai/whisper-tiny", + "microsoft/whisper-base", ) MAX_ARTIFACT_SIZE_BYTES = 1024 * 1024 @@ -60,6 +71,43 @@ def _save_local_tiny_llama(model_path: Path): ).save_pretrained(model_path) +def _save_local_tiny_whisper(model_path: Path): + model = WhisperForConditionalGeneration( + WhisperConfig( + vocab_size=32, + num_mel_bins=80, + encoder_layers=2, + encoder_attention_heads=4, + decoder_layers=2, + decoder_attention_heads=4, + d_model=64, + encoder_ffn_dim=128, + decoder_ffn_dim=128, + max_source_positions=16, + max_target_positions=16, + pad_token_id=0, + bos_token_id=1, + eos_token_id=2, + decoder_start_token_id=1, + ) + ) + model.save_pretrained(model_path) + + tokenizer = Tokenizer( + WordLevel( + vocab={"": 0, "": 1, "": 2, "hello": 3, "world": 4}, + unk_token="", + ) + ) + tokenizer.pre_tokenizer = Whitespace() + PreTrainedTokenizerFast( + tokenizer_object=tokenizer, + bos_token="", + eos_token="", + pad_token="", + ).save_pretrained(model_path) + + def _set_offline_gptq_data_config(config_path: Path): config = json.loads(config_path.read_text()) config["passes"]["gptq"]["data_config"] = { @@ -235,6 +283,70 @@ def _list_relative_files(path: Path): return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()} +def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str): + """Export a tiny local Whisper model to ONNX via capture-onnx-graph with Model Builder.""" + model_name = model_id.replace("/", "--") + model_path = tmp_path / "models" / model_name + run_output_dir = tmp_path / f"{model_name}-onnx" + + _save_local_tiny_whisper(model_path) + _run_cli_main( + [ + "capture-onnx-graph", + "-m", + str(model_path), + "--use_model_builder", + "--precision", + "fp32", + "--output_path", + str(run_output_dir), + ] + ) + + return run_output_dir + + +class TestCliWhisperSmoke(unittest.TestCase): + """Smoke tests for Whisper encoder-decoder models exported via capture-onnx-graph.""" + + model_ids = DEFAULT_WHISPER_MODEL_IDS + workdir = None + + def test_whisper_capture_onnx_graph(self): + """Verify that Whisper encoder and decoder are exported to ONNX successfully.""" + if self.workdir is None: + with tempfile.TemporaryDirectory() as temp_dir: + self._assert_whisper_export(Path(temp_dir)) + else: + workdir = Path(self.workdir) + workdir.mkdir(parents=True, exist_ok=True) + self._assert_whisper_export(workdir) + + def _assert_whisper_export(self, tmp_path: Path): + expected_encoder_file = "encoder.onnx" + expected_decoder_file = "decoder.onnx" + for model_id in self.model_ids: + with self.subTest(model_id=model_id): + run_output_dir = _run_whisper_capture_onnx_flow(tmp_path, model_id) + output_files = self._list_relative_files(run_output_dir) + assert expected_encoder_file in output_files, ( + f"Expected {expected_encoder_file} in output, got: {output_files}" + ) + assert expected_decoder_file in output_files, ( + f"Expected {expected_decoder_file} in output, got: {output_files}" + ) + self._assert_file_size_below_limit(run_output_dir / expected_encoder_file) + self._assert_file_size_below_limit(run_output_dir / expected_decoder_file) + + def _assert_file_size_below_limit(self, path: Path): + assert path.exists() + assert path.stat().st_size < MAX_ARTIFACT_SIZE_BYTES + + @staticmethod + def _list_relative_files(path: Path): + return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()} + + def _parse_args(): parser = argparse.ArgumentParser(add_help=False) parser.add_argument("--workdir") @@ -246,6 +358,7 @@ def _parse_args(): parsed_args, remaining = _parse_args() if parsed_args.workdir: TestCliTestModelSmoke.workdir = Path(parsed_args.workdir) + TestCliWhisperSmoke.workdir = Path(parsed_args.workdir) if parsed_args.model_ids: TestCliTestModelSmoke.model_ids = tuple(parsed_args.model_ids) unittest.main(argv=[__file__, *remaining]) From 8409f5975cf0e2828dd4b0f3c7711787beec8954 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:03:15 +0000 Subject: [PATCH 2/7] move transformers import to where it is needed --- test/cli/test_cli_test_model_smoke.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/test/cli/test_cli_test_model_smoke.py b/test/cli/test_cli_test_model_smoke.py index b39df7242..2c731a42b 100644 --- a/test/cli/test_cli_test_model_smoke.py +++ b/test/cli/test_cli_test_model_smoke.py @@ -12,14 +12,6 @@ from tokenizers import Tokenizer from tokenizers.models import WordLevel from tokenizers.pre_tokenizers import Whitespace -from transformers import ( - LlamaConfig, - LlamaForCausalLM, - PreTrainedTokenizerFast, - WhisperConfig, - WhisperForConditionalGeneration, -) - from olive.cli.base import TEST_OUTPUT_MARKER_FILE from olive.common.hf.utils import TEST_MODEL_MARKER_FILE @@ -41,6 +33,8 @@ def _save_local_tiny_llama(model_path: Path): + from transformers import LlamaConfig, LlamaForCausalLM, PreTrainedTokenizerFast + model = LlamaForCausalLM( LlamaConfig.from_dict( { @@ -72,6 +66,8 @@ def _save_local_tiny_llama(model_path: Path): def _save_local_tiny_whisper(model_path: Path): + from transformers import PreTrainedTokenizerFast, WhisperConfig, WhisperForConditionalGeneration + model = WhisperForConditionalGeneration( WhisperConfig( vocab_size=32, From b8fff906a4e83ff1ed9e27f9ea266e8bbba44365 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Tue, 2 Jun 2026 16:35:03 +0200 Subject: [PATCH 3/7] lint --- test/cli/test_cli_test_model_smoke.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/cli/test_cli_test_model_smoke.py b/test/cli/test_cli_test_model_smoke.py index 2c731a42b..2209b97dd 100644 --- a/test/cli/test_cli_test_model_smoke.py +++ b/test/cli/test_cli_test_model_smoke.py @@ -12,6 +12,7 @@ from tokenizers import Tokenizer from tokenizers.models import WordLevel from tokenizers.pre_tokenizers import Whitespace + from olive.cli.base import TEST_OUTPUT_MARKER_FILE from olive.common.hf.utils import TEST_MODEL_MARKER_FILE @@ -70,7 +71,6 @@ def _save_local_tiny_whisper(model_path: Path): model = WhisperForConditionalGeneration( WhisperConfig( - vocab_size=32, num_mel_bins=80, encoder_layers=2, encoder_attention_heads=4, From a8e0e4c7c2702258308f27f70d12a89bd73428e8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:52:05 +0000 Subject: [PATCH 4/7] fix WhisperConfig kwargs lint and ensure HF hub cache dir exists for ModelBuilder --- olive/passes/onnx/model_builder.py | 5 +++++ test/cli/test_cli_test_model_smoke.py | 32 ++++++++++++++------------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py index c575f035b..98471fd16 100644 --- a/olive/passes/onnx/model_builder.py +++ b/olive/passes/onnx/model_builder.py @@ -280,6 +280,11 @@ def _run_for_config( model_attributes = copy.deepcopy(model.model_attributes or {}) + # onnxruntime-genai's create_model calls os.listdir(cache_dir) unconditionally during + # save_model, which raises FileNotFoundError if the directory does not yet exist (e.g. + # in a fresh CI environment where no HuggingFace model has been downloaded before). + Path(HF_HUB_CACHE).mkdir(parents=True, exist_ok=True) + try: logger.debug("Building model with the following args: %s", extra_args) create_model( diff --git a/test/cli/test_cli_test_model_smoke.py b/test/cli/test_cli_test_model_smoke.py index 2209b97dd..9d1ef695f 100644 --- a/test/cli/test_cli_test_model_smoke.py +++ b/test/cli/test_cli_test_model_smoke.py @@ -70,21 +70,23 @@ def _save_local_tiny_whisper(model_path: Path): from transformers import PreTrainedTokenizerFast, WhisperConfig, WhisperForConditionalGeneration model = WhisperForConditionalGeneration( - WhisperConfig( - num_mel_bins=80, - encoder_layers=2, - encoder_attention_heads=4, - decoder_layers=2, - decoder_attention_heads=4, - d_model=64, - encoder_ffn_dim=128, - decoder_ffn_dim=128, - max_source_positions=16, - max_target_positions=16, - pad_token_id=0, - bos_token_id=1, - eos_token_id=2, - decoder_start_token_id=1, + WhisperConfig.from_dict( + { + "num_mel_bins": 80, + "encoder_layers": 2, + "encoder_attention_heads": 4, + "decoder_layers": 2, + "decoder_attention_heads": 4, + "d_model": 64, + "encoder_ffn_dim": 128, + "decoder_ffn_dim": 128, + "max_source_positions": 16, + "max_target_positions": 16, + "pad_token_id": 0, + "bos_token_id": 1, + "eos_token_id": 2, + "decoder_start_token_id": 1, + } ) ) model.save_pretrained(model_path) From be34bc8a9519eb73d24c38a4b934a243725038f1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 2 Jun 2026 15:22:44 +0000 Subject: [PATCH 5/7] fix: use marker file to prevent onnxruntime_genai from deleting cache_dir between Whisper encoder and decoder saves --- olive/passes/onnx/model_builder.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py index 98471fd16..54c8eb999 100644 --- a/olive/passes/onnx/model_builder.py +++ b/olive/passes/onnx/model_builder.py @@ -8,6 +8,7 @@ import importlib import json import logging +import os from enum import IntEnum from pathlib import Path from typing import Any, ClassVar, Union @@ -280,10 +281,17 @@ def _run_for_config( model_attributes = copy.deepcopy(model.model_attributes or {}) - # onnxruntime-genai's create_model calls os.listdir(cache_dir) unconditionally during - # save_model, which raises FileNotFoundError if the directory does not yet exist (e.g. - # in a fresh CI environment where no HuggingFace model has been downloaded before). - Path(HF_HUB_CACHE).mkdir(parents=True, exist_ok=True) + # onnxruntime-genai's save_model cleanup deletes cache_dir when it is empty + # (base.py: `if not os.listdir(self.cache_dir): os.rmdir(self.cache_dir)`). + # For multi-component models like Whisper the encoder and decoder share the + # same cache_dir; the encoder's save_model removes the empty directory before + # the decoder's save_model can run, raising FileNotFoundError. + # Fix: keep a marker file in the cache_dir for the duration of create_model so + # that the directory is never considered empty and never deleted prematurely. + cache_dir_path = Path(HF_HUB_CACHE) + cache_dir_path.mkdir(parents=True, exist_ok=True) + marker_path = cache_dir_path / f".olive_build_{os.getpid()}" + marker_path.touch() try: logger.debug("Building model with the following args: %s", extra_args) @@ -301,12 +309,13 @@ def _run_for_config( except Exception: # if model building fails, clean up the intermediate files in the cache_dir - cache_dir = Path(HF_HUB_CACHE) - if cache_dir.is_dir(): - for file in cache_dir.iterdir(): + if cache_dir_path.is_dir(): + for file in cache_dir_path.iterdir(): if file.suffix == ".bin": file.unlink() raise + finally: + marker_path.unlink(missing_ok=True) # Override default search options with ones from user config genai_config_filepath = str(output_model_filepath.parent / "genai_config.json") From c5f399819d0cc6e6b105dadeef3a6f55c5c510dd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 2 Jun 2026 16:20:06 +0000 Subject: [PATCH 6/7] make precision a parameter in _run_whisper_capture_onnx_flow and TestCliWhisperSmoke --- test/cli/test_cli_test_model_smoke.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/cli/test_cli_test_model_smoke.py b/test/cli/test_cli_test_model_smoke.py index 9d1ef695f..3e25c6964 100644 --- a/test/cli/test_cli_test_model_smoke.py +++ b/test/cli/test_cli_test_model_smoke.py @@ -281,7 +281,7 @@ def _list_relative_files(path: Path): return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()} -def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str): +def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str, precision: str = "fp32"): """Export a tiny local Whisper model to ONNX via capture-onnx-graph with Model Builder.""" model_name = model_id.replace("/", "--") model_path = tmp_path / "models" / model_name @@ -295,7 +295,7 @@ def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str): str(model_path), "--use_model_builder", "--precision", - "fp32", + precision, "--output_path", str(run_output_dir), ] @@ -308,6 +308,7 @@ class TestCliWhisperSmoke(unittest.TestCase): """Smoke tests for Whisper encoder-decoder models exported via capture-onnx-graph.""" model_ids = DEFAULT_WHISPER_MODEL_IDS + precision = "fp32" workdir = None def test_whisper_capture_onnx_graph(self): @@ -325,7 +326,7 @@ def _assert_whisper_export(self, tmp_path: Path): expected_decoder_file = "decoder.onnx" for model_id in self.model_ids: with self.subTest(model_id=model_id): - run_output_dir = _run_whisper_capture_onnx_flow(tmp_path, model_id) + run_output_dir = _run_whisper_capture_onnx_flow(tmp_path, model_id, self.precision) output_files = self._list_relative_files(run_output_dir) assert expected_encoder_file in output_files, ( f"Expected {expected_encoder_file} in output, got: {output_files}" From af377c353de7b6c590172edb715bfcc69edb049d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 2 Jun 2026 17:01:47 +0000 Subject: [PATCH 7/7] move Whisper smoke tests to test_cli_whisper_smoke.py --- .github/workflows/test-model-fast.yml | 2 +- test/cli/test_cli_test_model_smoke.py | 110 ------------------- test/cli/test_cli_whisper_smoke.py | 145 ++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 111 deletions(-) create mode 100644 test/cli/test_cli_whisper_smoke.py diff --git a/.github/workflows/test-model-fast.yml b/.github/workflows/test-model-fast.yml index 72c9e8547..20af9b524 100644 --- a/.github/workflows/test-model-fast.yml +++ b/.github/workflows/test-model-fast.yml @@ -36,4 +36,4 @@ jobs: - name: Run fast test run: | - python -m pytest -v -s -p no:warnings --disable-warnings --log-cli-level=WARNING test/cli/test_cli_test_model_smoke.py + python -m pytest -v -s -p no:warnings --disable-warnings --log-cli-level=WARNING test/cli/test_cli_test_model_smoke.py test/cli/test_cli_whisper_smoke.py diff --git a/test/cli/test_cli_test_model_smoke.py b/test/cli/test_cli_test_model_smoke.py index 3e25c6964..c52a771eb 100644 --- a/test/cli/test_cli_test_model_smoke.py +++ b/test/cli/test_cli_test_model_smoke.py @@ -26,10 +26,6 @@ "mistralai/Mistral-7B-Instruct-v0.3", "microsoft/Phi-3-mini-4k-instruct", ) -DEFAULT_WHISPER_MODEL_IDS = ( - "openai/whisper-tiny", - "microsoft/whisper-base", -) MAX_ARTIFACT_SIZE_BYTES = 1024 * 1024 @@ -66,46 +62,6 @@ def _save_local_tiny_llama(model_path: Path): ).save_pretrained(model_path) -def _save_local_tiny_whisper(model_path: Path): - from transformers import PreTrainedTokenizerFast, WhisperConfig, WhisperForConditionalGeneration - - model = WhisperForConditionalGeneration( - WhisperConfig.from_dict( - { - "num_mel_bins": 80, - "encoder_layers": 2, - "encoder_attention_heads": 4, - "decoder_layers": 2, - "decoder_attention_heads": 4, - "d_model": 64, - "encoder_ffn_dim": 128, - "decoder_ffn_dim": 128, - "max_source_positions": 16, - "max_target_positions": 16, - "pad_token_id": 0, - "bos_token_id": 1, - "eos_token_id": 2, - "decoder_start_token_id": 1, - } - ) - ) - model.save_pretrained(model_path) - - tokenizer = Tokenizer( - WordLevel( - vocab={"": 0, "": 1, "": 2, "hello": 3, "world": 4}, - unk_token="", - ) - ) - tokenizer.pre_tokenizer = Whitespace() - PreTrainedTokenizerFast( - tokenizer_object=tokenizer, - bos_token="", - eos_token="", - pad_token="", - ).save_pretrained(model_path) - - def _set_offline_gptq_data_config(config_path: Path): config = json.loads(config_path.read_text()) config["passes"]["gptq"]["data_config"] = { @@ -281,71 +237,6 @@ def _list_relative_files(path: Path): return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()} -def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str, precision: str = "fp32"): - """Export a tiny local Whisper model to ONNX via capture-onnx-graph with Model Builder.""" - model_name = model_id.replace("/", "--") - model_path = tmp_path / "models" / model_name - run_output_dir = tmp_path / f"{model_name}-onnx" - - _save_local_tiny_whisper(model_path) - _run_cli_main( - [ - "capture-onnx-graph", - "-m", - str(model_path), - "--use_model_builder", - "--precision", - precision, - "--output_path", - str(run_output_dir), - ] - ) - - return run_output_dir - - -class TestCliWhisperSmoke(unittest.TestCase): - """Smoke tests for Whisper encoder-decoder models exported via capture-onnx-graph.""" - - model_ids = DEFAULT_WHISPER_MODEL_IDS - precision = "fp32" - workdir = None - - def test_whisper_capture_onnx_graph(self): - """Verify that Whisper encoder and decoder are exported to ONNX successfully.""" - if self.workdir is None: - with tempfile.TemporaryDirectory() as temp_dir: - self._assert_whisper_export(Path(temp_dir)) - else: - workdir = Path(self.workdir) - workdir.mkdir(parents=True, exist_ok=True) - self._assert_whisper_export(workdir) - - def _assert_whisper_export(self, tmp_path: Path): - expected_encoder_file = "encoder.onnx" - expected_decoder_file = "decoder.onnx" - for model_id in self.model_ids: - with self.subTest(model_id=model_id): - run_output_dir = _run_whisper_capture_onnx_flow(tmp_path, model_id, self.precision) - output_files = self._list_relative_files(run_output_dir) - assert expected_encoder_file in output_files, ( - f"Expected {expected_encoder_file} in output, got: {output_files}" - ) - assert expected_decoder_file in output_files, ( - f"Expected {expected_decoder_file} in output, got: {output_files}" - ) - self._assert_file_size_below_limit(run_output_dir / expected_encoder_file) - self._assert_file_size_below_limit(run_output_dir / expected_decoder_file) - - def _assert_file_size_below_limit(self, path: Path): - assert path.exists() - assert path.stat().st_size < MAX_ARTIFACT_SIZE_BYTES - - @staticmethod - def _list_relative_files(path: Path): - return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()} - - def _parse_args(): parser = argparse.ArgumentParser(add_help=False) parser.add_argument("--workdir") @@ -357,7 +248,6 @@ def _parse_args(): parsed_args, remaining = _parse_args() if parsed_args.workdir: TestCliTestModelSmoke.workdir = Path(parsed_args.workdir) - TestCliWhisperSmoke.workdir = Path(parsed_args.workdir) if parsed_args.model_ids: TestCliTestModelSmoke.model_ids = tuple(parsed_args.model_ids) unittest.main(argv=[__file__, *remaining]) diff --git a/test/cli/test_cli_whisper_smoke.py b/test/cli/test_cli_whisper_smoke.py new file mode 100644 index 000000000..11c5948e9 --- /dev/null +++ b/test/cli/test_cli_whisper_smoke.py @@ -0,0 +1,145 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import argparse +import tempfile +import unittest +from pathlib import Path + +from tokenizers import Tokenizer +from tokenizers.models import WordLevel +from tokenizers.pre_tokenizers import Whitespace + +DEFAULT_WHISPER_MODEL_IDS = ( + "openai/whisper-tiny", + "microsoft/whisper-base", +) +MAX_ARTIFACT_SIZE_BYTES = 1024 * 1024 + + +def _run_cli_main(args): + from olive.cli.launcher import main as cli_main + + cli_main(args) + + +def _save_local_tiny_whisper(model_path: Path): + from transformers import PreTrainedTokenizerFast, WhisperConfig, WhisperForConditionalGeneration + + model = WhisperForConditionalGeneration( + WhisperConfig.from_dict( + { + "num_mel_bins": 80, + "encoder_layers": 2, + "encoder_attention_heads": 4, + "decoder_layers": 2, + "decoder_attention_heads": 4, + "d_model": 64, + "encoder_ffn_dim": 128, + "decoder_ffn_dim": 128, + "max_source_positions": 16, + "max_target_positions": 16, + "pad_token_id": 0, + "bos_token_id": 1, + "eos_token_id": 2, + "decoder_start_token_id": 1, + } + ) + ) + model.save_pretrained(model_path) + + tokenizer = Tokenizer( + WordLevel( + vocab={"": 0, "": 1, "": 2, "hello": 3, "world": 4}, + unk_token="", + ) + ) + tokenizer.pre_tokenizer = Whitespace() + PreTrainedTokenizerFast( + tokenizer_object=tokenizer, + bos_token="", + eos_token="", + pad_token="", + ).save_pretrained(model_path) + + +def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str, precision: str = "fp32"): + """Export a tiny local Whisper model to ONNX via capture-onnx-graph with Model Builder.""" + model_name = model_id.replace("/", "--") + model_path = tmp_path / "models" / model_name + run_output_dir = tmp_path / f"{model_name}-onnx" + + _save_local_tiny_whisper(model_path) + _run_cli_main( + [ + "capture-onnx-graph", + "-m", + str(model_path), + "--use_model_builder", + "--precision", + precision, + "--output_path", + str(run_output_dir), + ] + ) + + return run_output_dir + + +class TestCliWhisperSmoke(unittest.TestCase): + """Smoke tests for Whisper encoder-decoder models exported via capture-onnx-graph.""" + + model_ids = DEFAULT_WHISPER_MODEL_IDS + precision = "fp32" + workdir = None + + def test_whisper_capture_onnx_graph(self): + """Verify that Whisper encoder and decoder are exported to ONNX successfully.""" + if self.workdir is None: + with tempfile.TemporaryDirectory() as temp_dir: + self._assert_whisper_export(Path(temp_dir)) + else: + workdir = Path(self.workdir) + workdir.mkdir(parents=True, exist_ok=True) + self._assert_whisper_export(workdir) + + def _assert_whisper_export(self, tmp_path: Path): + expected_encoder_file = "encoder.onnx" + expected_decoder_file = "decoder.onnx" + for model_id in self.model_ids: + with self.subTest(model_id=model_id): + run_output_dir = _run_whisper_capture_onnx_flow(tmp_path, model_id, self.precision) + output_files = self._list_relative_files(run_output_dir) + assert expected_encoder_file in output_files, ( + f"Expected {expected_encoder_file} in output, got: {output_files}" + ) + assert expected_decoder_file in output_files, ( + f"Expected {expected_decoder_file} in output, got: {output_files}" + ) + self._assert_file_size_below_limit(run_output_dir / expected_encoder_file) + self._assert_file_size_below_limit(run_output_dir / expected_decoder_file) + + def _assert_file_size_below_limit(self, path: Path): + assert path.exists() + assert path.stat().st_size < MAX_ARTIFACT_SIZE_BYTES + + @staticmethod + def _list_relative_files(path: Path): + return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()} + + +def _parse_args(): + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument("--workdir") + parser.add_argument("--model-id", dest="model_ids", action="append") + return parser.parse_known_args() + + +if __name__ == "__main__": + parsed_args, remaining = _parse_args() + if parsed_args.workdir: + TestCliWhisperSmoke.workdir = Path(parsed_args.workdir) + if parsed_args.model_ids: + TestCliWhisperSmoke.model_ids = tuple(parsed_args.model_ids) + unittest.main(argv=[__file__, *remaining])