From 8b0b4569527b4837b6ebd25897fdcf222ff6e750 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@microsoft.com>
Date: Mon, 1 Jun 2026 15:58:56 +0200
Subject: [PATCH 1/7] add fast test for whisper

---
 test/cli/test_cli_test_model_smoke.py | 115 +++++++++++++++++++++++++-
 1 file changed, 114 insertions(+), 1 deletion(-)
diff --git a/test/cli/test_cli_test_model_smoke.py b/test/cli/test_cli_test_model_smoke.py
index b28130658..b39df7242 100644
--- a/test/cli/test_cli_test_model_smoke.py
+++ b/test/cli/test_cli_test_model_smoke.py
@@ -12,7 +12,13 @@
 from tokenizers import Tokenizer
 from tokenizers.models import WordLevel
 from tokenizers.pre_tokenizers import Whitespace
-from transformers import LlamaConfig, LlamaForCausalLM, PreTrainedTokenizerFast
+from transformers import (
+    LlamaConfig,
+    LlamaForCausalLM,
+    PreTrainedTokenizerFast,
+    WhisperConfig,
+    WhisperForConditionalGeneration,
+)
 
 from olive.cli.base import TEST_OUTPUT_MARKER_FILE
 from olive.common.hf.utils import TEST_MODEL_MARKER_FILE
@@ -25,6 +31,11 @@
     "local/tiny-random-llama-a",
     "local/tiny-random-llama-b",
     "mistralai/Mistral-7B-Instruct-v0.3",
+    "microsoft/Phi-3-mini-4k-instruct",
+)
+DEFAULT_WHISPER_MODEL_IDS = (
+    "openai/whisper-tiny",
+    "microsoft/whisper-base",
 )
 MAX_ARTIFACT_SIZE_BYTES = 1024 * 1024
 
@@ -60,6 +71,43 @@ def _save_local_tiny_llama(model_path: Path):
     ).save_pretrained(model_path)
 
 
+def _save_local_tiny_whisper(model_path: Path):
+    model = WhisperForConditionalGeneration(
+        WhisperConfig(
+            vocab_size=32,
+            num_mel_bins=80,
+            encoder_layers=2,
+            encoder_attention_heads=4,
+            decoder_layers=2,
+            decoder_attention_heads=4,
+            d_model=64,
+            encoder_ffn_dim=128,
+            decoder_ffn_dim=128,
+            max_source_positions=16,
+            max_target_positions=16,
+            pad_token_id=0,
+            bos_token_id=1,
+            eos_token_id=2,
+            decoder_start_token_id=1,
+        )
+    )
+    model.save_pretrained(model_path)
+
+    tokenizer = Tokenizer(
+        WordLevel(
+            vocab={"<pad>": 0, "<bos>": 1, "<eos>": 2, "hello": 3, "world": 4},
+            unk_token="<pad>",
+        )
+    )
+    tokenizer.pre_tokenizer = Whitespace()
+    PreTrainedTokenizerFast(
+        tokenizer_object=tokenizer,
+        bos_token="<bos>",
+        eos_token="<eos>",
+        pad_token="<pad>",
+    ).save_pretrained(model_path)
+
+
 def _set_offline_gptq_data_config(config_path: Path):
     config = json.loads(config_path.read_text())
     config["passes"]["gptq"]["data_config"] = {
@@ -235,6 +283,70 @@ def _list_relative_files(path: Path):
         return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()}
 
 
+def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str):
+    """Export a tiny local Whisper model to ONNX via capture-onnx-graph with Model Builder."""
+    model_name = model_id.replace("/", "--")
+    model_path = tmp_path / "models" / model_name
+    run_output_dir = tmp_path / f"{model_name}-onnx"
+
+    _save_local_tiny_whisper(model_path)
+    _run_cli_main(
+        [
+            "capture-onnx-graph",
+            "-m",
+            str(model_path),
+            "--use_model_builder",
+            "--precision",
+            "fp32",
+            "--output_path",
+            str(run_output_dir),
+        ]
+    )
+
+    return run_output_dir
+
+
+class TestCliWhisperSmoke(unittest.TestCase):
+    """Smoke tests for Whisper encoder-decoder models exported via capture-onnx-graph."""
+
+    model_ids = DEFAULT_WHISPER_MODEL_IDS
+    workdir = None
+
+    def test_whisper_capture_onnx_graph(self):
+        """Verify that Whisper encoder and decoder are exported to ONNX successfully."""
+        if self.workdir is None:
+            with tempfile.TemporaryDirectory() as temp_dir:
+                self._assert_whisper_export(Path(temp_dir))
+        else:
+            workdir = Path(self.workdir)
+            workdir.mkdir(parents=True, exist_ok=True)
+            self._assert_whisper_export(workdir)
+
+    def _assert_whisper_export(self, tmp_path: Path):
+        expected_encoder_file = "encoder.onnx"
+        expected_decoder_file = "decoder.onnx"
+        for model_id in self.model_ids:
+            with self.subTest(model_id=model_id):
+                run_output_dir = _run_whisper_capture_onnx_flow(tmp_path, model_id)
+                output_files = self._list_relative_files(run_output_dir)
+                assert expected_encoder_file in output_files, (
+                    f"Expected {expected_encoder_file} in output, got: {output_files}"
+                )
+                assert expected_decoder_file in output_files, (
+                    f"Expected {expected_decoder_file} in output, got: {output_files}"
+                )
+                self._assert_file_size_below_limit(run_output_dir / expected_encoder_file)
+                self._assert_file_size_below_limit(run_output_dir / expected_decoder_file)
+
+    def _assert_file_size_below_limit(self, path: Path):
+        assert path.exists()
+        assert path.stat().st_size < MAX_ARTIFACT_SIZE_BYTES
+
+    @staticmethod
+    def _list_relative_files(path: Path):
+        return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()}
+
+
 def _parse_args():
     parser = argparse.ArgumentParser(add_help=False)
     parser.add_argument("--workdir")
@@ -246,6 +358,7 @@ def _parse_args():
     parsed_args, remaining = _parse_args()
     if parsed_args.workdir:
         TestCliTestModelSmoke.workdir = Path(parsed_args.workdir)
+        TestCliWhisperSmoke.workdir = Path(parsed_args.workdir)
     if parsed_args.model_ids:
         TestCliTestModelSmoke.model_ids = tuple(parsed_args.model_ids)
     unittest.main(argv=[__file__, *remaining])

From 8409f5975cf0e2828dd4b0f3c7711787beec8954 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 1 Jun 2026 14:03:15 +0000
Subject: [PATCH 2/7] move transformers import to where it is needed

---
 test/cli/test_cli_test_model_smoke.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/test/cli/test_cli_test_model_smoke.py b/test/cli/test_cli_test_model_smoke.py
index b39df7242..2c731a42b 100644
--- a/test/cli/test_cli_test_model_smoke.py
+++ b/test/cli/test_cli_test_model_smoke.py
@@ -12,14 +12,6 @@
 from tokenizers import Tokenizer
 from tokenizers.models import WordLevel
 from tokenizers.pre_tokenizers import Whitespace
-from transformers import (
-    LlamaConfig,
-    LlamaForCausalLM,
-    PreTrainedTokenizerFast,
-    WhisperConfig,
-    WhisperForConditionalGeneration,
-)
-
 from olive.cli.base import TEST_OUTPUT_MARKER_FILE
 from olive.common.hf.utils import TEST_MODEL_MARKER_FILE
 
@@ -41,6 +33,8 @@
 
 
 def _save_local_tiny_llama(model_path: Path):
+    from transformers import LlamaConfig, LlamaForCausalLM, PreTrainedTokenizerFast
+
     model = LlamaForCausalLM(
         LlamaConfig.from_dict(
             {
@@ -72,6 +66,8 @@ def _save_local_tiny_llama(model_path: Path):
 
 
 def _save_local_tiny_whisper(model_path: Path):
+    from transformers import PreTrainedTokenizerFast, WhisperConfig, WhisperForConditionalGeneration
+
     model = WhisperForConditionalGeneration(
         WhisperConfig(
             vocab_size=32,

From b8fff906a4e83ff1ed9e27f9ea266e8bbba44365 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@microsoft.com>
Date: Tue, 2 Jun 2026 16:35:03 +0200
Subject: [PATCH 3/7] lint

---
 test/cli/test_cli_test_model_smoke.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/cli/test_cli_test_model_smoke.py b/test/cli/test_cli_test_model_smoke.py
index 2c731a42b..2209b97dd 100644
--- a/test/cli/test_cli_test_model_smoke.py
+++ b/test/cli/test_cli_test_model_smoke.py
@@ -12,6 +12,7 @@
 from tokenizers import Tokenizer
 from tokenizers.models import WordLevel
 from tokenizers.pre_tokenizers import Whitespace
+
 from olive.cli.base import TEST_OUTPUT_MARKER_FILE
 from olive.common.hf.utils import TEST_MODEL_MARKER_FILE
 
@@ -70,7 +71,6 @@ def _save_local_tiny_whisper(model_path: Path):
 
     model = WhisperForConditionalGeneration(
         WhisperConfig(
-            vocab_size=32,
             num_mel_bins=80,
             encoder_layers=2,
             encoder_attention_heads=4,

From a8e0e4c7c2702258308f27f70d12a89bd73428e8 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 2 Jun 2026 14:52:05 +0000
Subject: [PATCH 4/7] fix WhisperConfig kwargs lint and ensure HF hub cache dir
 exists for ModelBuilder

---
 olive/passes/onnx/model_builder.py    |  5 +++++
 test/cli/test_cli_test_model_smoke.py | 32 ++++++++++++++-------------
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py
index c575f035b..98471fd16 100644
--- a/olive/passes/onnx/model_builder.py
+++ b/olive/passes/onnx/model_builder.py
@@ -280,6 +280,11 @@ def _run_for_config(
 
         model_attributes = copy.deepcopy(model.model_attributes or {})
 
+        # onnxruntime-genai's create_model calls os.listdir(cache_dir) unconditionally during
+        # save_model, which raises FileNotFoundError if the directory does not yet exist (e.g.
+        # in a fresh CI environment where no HuggingFace model has been downloaded before).
+        Path(HF_HUB_CACHE).mkdir(parents=True, exist_ok=True)
+
         try:
             logger.debug("Building model with the following args: %s", extra_args)
             create_model(
diff --git a/test/cli/test_cli_test_model_smoke.py b/test/cli/test_cli_test_model_smoke.py
index 2209b97dd..9d1ef695f 100644
--- a/test/cli/test_cli_test_model_smoke.py
+++ b/test/cli/test_cli_test_model_smoke.py
@@ -70,21 +70,23 @@ def _save_local_tiny_whisper(model_path: Path):
     from transformers import PreTrainedTokenizerFast, WhisperConfig, WhisperForConditionalGeneration
 
     model = WhisperForConditionalGeneration(
-        WhisperConfig(
-            num_mel_bins=80,
-            encoder_layers=2,
-            encoder_attention_heads=4,
-            decoder_layers=2,
-            decoder_attention_heads=4,
-            d_model=64,
-            encoder_ffn_dim=128,
-            decoder_ffn_dim=128,
-            max_source_positions=16,
-            max_target_positions=16,
-            pad_token_id=0,
-            bos_token_id=1,
-            eos_token_id=2,
-            decoder_start_token_id=1,
+        WhisperConfig.from_dict(
+            {
+                "num_mel_bins": 80,
+                "encoder_layers": 2,
+                "encoder_attention_heads": 4,
+                "decoder_layers": 2,
+                "decoder_attention_heads": 4,
+                "d_model": 64,
+                "encoder_ffn_dim": 128,
+                "decoder_ffn_dim": 128,
+                "max_source_positions": 16,
+                "max_target_positions": 16,
+                "pad_token_id": 0,
+                "bos_token_id": 1,
+                "eos_token_id": 2,
+                "decoder_start_token_id": 1,
+            }
         )
     )
     model.save_pretrained(model_path)

From be34bc8a9519eb73d24c38a4b934a243725038f1 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 2 Jun 2026 15:22:44 +0000
Subject: [PATCH 5/7] fix: use marker file to prevent onnxruntime_genai from
 deleting cache_dir between Whisper encoder and decoder saves

---
 olive/passes/onnx/model_builder.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py
index 98471fd16..54c8eb999 100644
--- a/olive/passes/onnx/model_builder.py
+++ b/olive/passes/onnx/model_builder.py
@@ -8,6 +8,7 @@
 import importlib
 import json
 import logging
+import os
 from enum import IntEnum
 from pathlib import Path
 from typing import Any, ClassVar, Union
@@ -280,10 +281,17 @@ def _run_for_config(
 
         model_attributes = copy.deepcopy(model.model_attributes or {})
 
-        # onnxruntime-genai's create_model calls os.listdir(cache_dir) unconditionally during
-        # save_model, which raises FileNotFoundError if the directory does not yet exist (e.g.
-        # in a fresh CI environment where no HuggingFace model has been downloaded before).
-        Path(HF_HUB_CACHE).mkdir(parents=True, exist_ok=True)
+        # onnxruntime-genai's save_model cleanup deletes cache_dir when it is empty
+        # (base.py: `if not os.listdir(self.cache_dir): os.rmdir(self.cache_dir)`).
+        # For multi-component models like Whisper the encoder and decoder share the
+        # same cache_dir; the encoder's save_model removes the empty directory before
+        # the decoder's save_model can run, raising FileNotFoundError.
+        # Fix: keep a marker file in the cache_dir for the duration of create_model so
+        # that the directory is never considered empty and never deleted prematurely.
+        cache_dir_path = Path(HF_HUB_CACHE)
+        cache_dir_path.mkdir(parents=True, exist_ok=True)
+        marker_path = cache_dir_path / f".olive_build_{os.getpid()}"
+        marker_path.touch()
 
         try:
             logger.debug("Building model with the following args: %s", extra_args)
@@ -301,12 +309,13 @@ def _run_for_config(
 
         except Exception:
             # if model building fails, clean up the intermediate files in the cache_dir
-            cache_dir = Path(HF_HUB_CACHE)
-            if cache_dir.is_dir():
-                for file in cache_dir.iterdir():
+            if cache_dir_path.is_dir():
+                for file in cache_dir_path.iterdir():
                     if file.suffix == ".bin":
                         file.unlink()
             raise
+        finally:
+            marker_path.unlink(missing_ok=True)
 
         # Override default search options with ones from user config
         genai_config_filepath = str(output_model_filepath.parent / "genai_config.json")

From c5f399819d0cc6e6b105dadeef3a6f55c5c510dd Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 2 Jun 2026 16:20:06 +0000
Subject: [PATCH 6/7] make precision a parameter in
 _run_whisper_capture_onnx_flow and TestCliWhisperSmoke

---
 test/cli/test_cli_test_model_smoke.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/test/cli/test_cli_test_model_smoke.py b/test/cli/test_cli_test_model_smoke.py
index 9d1ef695f..3e25c6964 100644
--- a/test/cli/test_cli_test_model_smoke.py
+++ b/test/cli/test_cli_test_model_smoke.py
@@ -281,7 +281,7 @@ def _list_relative_files(path: Path):
         return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()}
 
 
-def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str):
+def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str, precision: str = "fp32"):
     """Export a tiny local Whisper model to ONNX via capture-onnx-graph with Model Builder."""
     model_name = model_id.replace("/", "--")
     model_path = tmp_path / "models" / model_name
@@ -295,7 +295,7 @@ def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str):
             str(model_path),
             "--use_model_builder",
             "--precision",
-            "fp32",
+            precision,
             "--output_path",
             str(run_output_dir),
         ]
@@ -308,6 +308,7 @@ class TestCliWhisperSmoke(unittest.TestCase):
     """Smoke tests for Whisper encoder-decoder models exported via capture-onnx-graph."""
 
     model_ids = DEFAULT_WHISPER_MODEL_IDS
+    precision = "fp32"
     workdir = None
 
     def test_whisper_capture_onnx_graph(self):
@@ -325,7 +326,7 @@ def _assert_whisper_export(self, tmp_path: Path):
         expected_decoder_file = "decoder.onnx"
         for model_id in self.model_ids:
             with self.subTest(model_id=model_id):
-                run_output_dir = _run_whisper_capture_onnx_flow(tmp_path, model_id)
+                run_output_dir = _run_whisper_capture_onnx_flow(tmp_path, model_id, self.precision)
                 output_files = self._list_relative_files(run_output_dir)
                 assert expected_encoder_file in output_files, (
                     f"Expected {expected_encoder_file} in output, got: {output_files}"

From af377c353de7b6c590172edb715bfcc69edb049d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 2 Jun 2026 17:01:47 +0000
Subject: [PATCH 7/7] move Whisper smoke tests to test_cli_whisper_smoke.py

---
 .github/workflows/test-model-fast.yml |   2 +-
 test/cli/test_cli_test_model_smoke.py | 110 -------------------
 test/cli/test_cli_whisper_smoke.py    | 145 ++++++++++++++++++++++++++
 3 files changed, 146 insertions(+), 111 deletions(-)
 create mode 100644 test/cli/test_cli_whisper_smoke.py

diff --git a/.github/workflows/test-model-fast.yml b/.github/workflows/test-model-fast.yml
index 72c9e8547..20af9b524 100644
--- a/.github/workflows/test-model-fast.yml
+++ b/.github/workflows/test-model-fast.yml
@@ -36,4 +36,4 @@ jobs:
 
       - name: Run fast test
         run: |
-          python -m pytest -v -s -p no:warnings --disable-warnings --log-cli-level=WARNING test/cli/test_cli_test_model_smoke.py
+          python -m pytest -v -s -p no:warnings --disable-warnings --log-cli-level=WARNING test/cli/test_cli_test_model_smoke.py test/cli/test_cli_whisper_smoke.py
diff --git a/test/cli/test_cli_test_model_smoke.py b/test/cli/test_cli_test_model_smoke.py
index 3e25c6964..c52a771eb 100644
--- a/test/cli/test_cli_test_model_smoke.py
+++ b/test/cli/test_cli_test_model_smoke.py
@@ -26,10 +26,6 @@
     "mistralai/Mistral-7B-Instruct-v0.3",
     "microsoft/Phi-3-mini-4k-instruct",
 )
-DEFAULT_WHISPER_MODEL_IDS = (
-    "openai/whisper-tiny",
-    "microsoft/whisper-base",
-)
 MAX_ARTIFACT_SIZE_BYTES = 1024 * 1024
 
 
@@ -66,46 +62,6 @@ def _save_local_tiny_llama(model_path: Path):
     ).save_pretrained(model_path)
 
 
-def _save_local_tiny_whisper(model_path: Path):
-    from transformers import PreTrainedTokenizerFast, WhisperConfig, WhisperForConditionalGeneration
-
-    model = WhisperForConditionalGeneration(
-        WhisperConfig.from_dict(
-            {
-                "num_mel_bins": 80,
-                "encoder_layers": 2,
-                "encoder_attention_heads": 4,
-                "decoder_layers": 2,
-                "decoder_attention_heads": 4,
-                "d_model": 64,
-                "encoder_ffn_dim": 128,
-                "decoder_ffn_dim": 128,
-                "max_source_positions": 16,
-                "max_target_positions": 16,
-                "pad_token_id": 0,
-                "bos_token_id": 1,
-                "eos_token_id": 2,
-                "decoder_start_token_id": 1,
-            }
-        )
-    )
-    model.save_pretrained(model_path)
-
-    tokenizer = Tokenizer(
-        WordLevel(
-            vocab={"<pad>": 0, "<bos>": 1, "<eos>": 2, "hello": 3, "world": 4},
-            unk_token="<pad>",
-        )
-    )
-    tokenizer.pre_tokenizer = Whitespace()
-    PreTrainedTokenizerFast(
-        tokenizer_object=tokenizer,
-        bos_token="<bos>",
-        eos_token="<eos>",
-        pad_token="<pad>",
-    ).save_pretrained(model_path)
-
-
 def _set_offline_gptq_data_config(config_path: Path):
     config = json.loads(config_path.read_text())
     config["passes"]["gptq"]["data_config"] = {
@@ -281,71 +237,6 @@ def _list_relative_files(path: Path):
         return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()}
 
 
-def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str, precision: str = "fp32"):
-    """Export a tiny local Whisper model to ONNX via capture-onnx-graph with Model Builder."""
-    model_name = model_id.replace("/", "--")
-    model_path = tmp_path / "models" / model_name
-    run_output_dir = tmp_path / f"{model_name}-onnx"
-
-    _save_local_tiny_whisper(model_path)
-    _run_cli_main(
-        [
-            "capture-onnx-graph",
-            "-m",
-            str(model_path),
-            "--use_model_builder",
-            "--precision",
-            precision,
-            "--output_path",
-            str(run_output_dir),
-        ]
-    )
-
-    return run_output_dir
-
-
-class TestCliWhisperSmoke(unittest.TestCase):
-    """Smoke tests for Whisper encoder-decoder models exported via capture-onnx-graph."""
-
-    model_ids = DEFAULT_WHISPER_MODEL_IDS
-    precision = "fp32"
-    workdir = None
-
-    def test_whisper_capture_onnx_graph(self):
-        """Verify that Whisper encoder and decoder are exported to ONNX successfully."""
-        if self.workdir is None:
-            with tempfile.TemporaryDirectory() as temp_dir:
-                self._assert_whisper_export(Path(temp_dir))
-        else:
-            workdir = Path(self.workdir)
-            workdir.mkdir(parents=True, exist_ok=True)
-            self._assert_whisper_export(workdir)
-
-    def _assert_whisper_export(self, tmp_path: Path):
-        expected_encoder_file = "encoder.onnx"
-        expected_decoder_file = "decoder.onnx"
-        for model_id in self.model_ids:
-            with self.subTest(model_id=model_id):
-                run_output_dir = _run_whisper_capture_onnx_flow(tmp_path, model_id, self.precision)
-                output_files = self._list_relative_files(run_output_dir)
-                assert expected_encoder_file in output_files, (
-                    f"Expected {expected_encoder_file} in output, got: {output_files}"
-                )
-                assert expected_decoder_file in output_files, (
-                    f"Expected {expected_decoder_file} in output, got: {output_files}"
-                )
-                self._assert_file_size_below_limit(run_output_dir / expected_encoder_file)
-                self._assert_file_size_below_limit(run_output_dir / expected_decoder_file)
-
-    def _assert_file_size_below_limit(self, path: Path):
-        assert path.exists()
-        assert path.stat().st_size < MAX_ARTIFACT_SIZE_BYTES
-
-    @staticmethod
-    def _list_relative_files(path: Path):
-        return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()}
-
-
 def _parse_args():
     parser = argparse.ArgumentParser(add_help=False)
     parser.add_argument("--workdir")
@@ -357,7 +248,6 @@ def _parse_args():
     parsed_args, remaining = _parse_args()
     if parsed_args.workdir:
         TestCliTestModelSmoke.workdir = Path(parsed_args.workdir)
-        TestCliWhisperSmoke.workdir = Path(parsed_args.workdir)
     if parsed_args.model_ids:
         TestCliTestModelSmoke.model_ids = tuple(parsed_args.model_ids)
     unittest.main(argv=[__file__, *remaining])
diff --git a/test/cli/test_cli_whisper_smoke.py b/test/cli/test_cli_whisper_smoke.py
new file mode 100644
index 000000000..11c5948e9
--- /dev/null
+++ b/test/cli/test_cli_whisper_smoke.py
@@ -0,0 +1,145 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import argparse
+import tempfile
+import unittest
+from pathlib import Path
+
+from tokenizers import Tokenizer
+from tokenizers.models import WordLevel
+from tokenizers.pre_tokenizers import Whitespace
+
+DEFAULT_WHISPER_MODEL_IDS = (
+    "openai/whisper-tiny",
+    "microsoft/whisper-base",
+)
+MAX_ARTIFACT_SIZE_BYTES = 1024 * 1024
+
+
+def _run_cli_main(args):
+    from olive.cli.launcher import main as cli_main
+
+    cli_main(args)
+
+
+def _save_local_tiny_whisper(model_path: Path):
+    from transformers import PreTrainedTokenizerFast, WhisperConfig, WhisperForConditionalGeneration
+
+    model = WhisperForConditionalGeneration(
+        WhisperConfig.from_dict(
+            {
+                "num_mel_bins": 80,
+                "encoder_layers": 2,
+                "encoder_attention_heads": 4,
+                "decoder_layers": 2,
+                "decoder_attention_heads": 4,
+                "d_model": 64,
+                "encoder_ffn_dim": 128,
+                "decoder_ffn_dim": 128,
+                "max_source_positions": 16,
+                "max_target_positions": 16,
+                "pad_token_id": 0,
+                "bos_token_id": 1,
+                "eos_token_id": 2,
+                "decoder_start_token_id": 1,
+            }
+        )
+    )
+    model.save_pretrained(model_path)
+
+    tokenizer = Tokenizer(
+        WordLevel(
+            vocab={"<pad>": 0, "<bos>": 1, "<eos>": 2, "hello": 3, "world": 4},
+            unk_token="<pad>",
+        )
+    )
+    tokenizer.pre_tokenizer = Whitespace()
+    PreTrainedTokenizerFast(
+        tokenizer_object=tokenizer,
+        bos_token="<bos>",
+        eos_token="<eos>",
+        pad_token="<pad>",
+    ).save_pretrained(model_path)
+
+
+def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str, precision: str = "fp32"):
+    """Export a tiny local Whisper model to ONNX via capture-onnx-graph with Model Builder."""
+    model_name = model_id.replace("/", "--")
+    model_path = tmp_path / "models" / model_name
+    run_output_dir = tmp_path / f"{model_name}-onnx"
+
+    _save_local_tiny_whisper(model_path)
+    _run_cli_main(
+        [
+            "capture-onnx-graph",
+            "-m",
+            str(model_path),
+            "--use_model_builder",
+            "--precision",
+            precision,
+            "--output_path",
+            str(run_output_dir),
+        ]
+    )
+
+    return run_output_dir
+
+
+class TestCliWhisperSmoke(unittest.TestCase):
+    """Smoke tests for Whisper encoder-decoder models exported via capture-onnx-graph."""
+
+    model_ids = DEFAULT_WHISPER_MODEL_IDS
+    precision = "fp32"
+    workdir = None
+
+    def test_whisper_capture_onnx_graph(self):
+        """Verify that Whisper encoder and decoder are exported to ONNX successfully."""
+        if self.workdir is None:
+            with tempfile.TemporaryDirectory() as temp_dir:
+                self._assert_whisper_export(Path(temp_dir))
+        else:
+            workdir = Path(self.workdir)
+            workdir.mkdir(parents=True, exist_ok=True)
+            self._assert_whisper_export(workdir)
+
+    def _assert_whisper_export(self, tmp_path: Path):
+        expected_encoder_file = "encoder.onnx"
+        expected_decoder_file = "decoder.onnx"
+        for model_id in self.model_ids:
+            with self.subTest(model_id=model_id):
+                run_output_dir = _run_whisper_capture_onnx_flow(tmp_path, model_id, self.precision)
+                output_files = self._list_relative_files(run_output_dir)
+                assert expected_encoder_file in output_files, (
+                    f"Expected {expected_encoder_file} in output, got: {output_files}"
+                )
+                assert expected_decoder_file in output_files, (
+                    f"Expected {expected_decoder_file} in output, got: {output_files}"
+                )
+                self._assert_file_size_below_limit(run_output_dir / expected_encoder_file)
+                self._assert_file_size_below_limit(run_output_dir / expected_decoder_file)
+
+    def _assert_file_size_below_limit(self, path: Path):
+        assert path.exists()
+        assert path.stat().st_size < MAX_ARTIFACT_SIZE_BYTES
+
+    @staticmethod
+    def _list_relative_files(path: Path):
+        return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()}
+
+
+def _parse_args():
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument("--workdir")
+    parser.add_argument("--model-id", dest="model_ids", action="append")
+    return parser.parse_known_args()
+
+
+if __name__ == "__main__":
+    parsed_args, remaining = _parse_args()
+    if parsed_args.workdir:
+        TestCliWhisperSmoke.workdir = Path(parsed_args.workdir)
+    if parsed_args.model_ids:
+        TestCliWhisperSmoke.model_ids = tuple(parsed_args.model_ids)
+    unittest.main(argv=[__file__, *remaining])