microsoft · xadupre · Jun 1, 2026 · Jun 1, 2026 · Jun 2, 2026 · Jun 2, 2026
diff --git a/.github/workflows/test-model-fast.yml b/.github/workflows/test-model-fast.yml
@@ -36,4 +36,4 @@ jobs:
 
       - name: Run fast test
         run: |
-          python -m pytest -v -s -p no:warnings --disable-warnings --log-cli-level=WARNING test/cli/test_cli_test_model_smoke.py
+          python -m pytest -v -s -p no:warnings --disable-warnings --log-cli-level=WARNING test/cli/test_cli_test_model_smoke.py test/cli/test_cli_whisper_smoke.py
diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py
@@ -8,6 +8,7 @@
 import importlib
 import json
 import logging
+import os
 from enum import IntEnum
 from pathlib import Path
 from typing import Any, ClassVar, Union
@@ -280,6 +281,18 @@ def _run_for_config(
 
         model_attributes = copy.deepcopy(model.model_attributes or {})
 
+        # onnxruntime-genai's save_model cleanup deletes cache_dir when it is empty
+        # (base.py: `if not os.listdir(self.cache_dir): os.rmdir(self.cache_dir)`).
+        # For multi-component models like Whisper the encoder and decoder share the
+        # same cache_dir; the encoder's save_model removes the empty directory before
+        # the decoder's save_model can run, raising FileNotFoundError.
+        # Fix: keep a marker file in the cache_dir for the duration of create_model so
+        # that the directory is never considered empty and never deleted prematurely.
+        cache_dir_path = Path(HF_HUB_CACHE)
+        cache_dir_path.mkdir(parents=True, exist_ok=True)
+        marker_path = cache_dir_path / f".olive_build_{os.getpid()}"
+        marker_path.touch()
+
         try:
             logger.debug("Building model with the following args: %s", extra_args)
             create_model(
@@ -296,12 +309,13 @@ def _run_for_config(
 
         except Exception:
             # if model building fails, clean up the intermediate files in the cache_dir
-            cache_dir = Path(HF_HUB_CACHE)
-            if cache_dir.is_dir():
-                for file in cache_dir.iterdir():
+            if cache_dir_path.is_dir():
+                for file in cache_dir_path.iterdir():
                     if file.suffix == ".bin":
                         file.unlink()
             raise
+        finally:
+            marker_path.unlink(missing_ok=True)
 
         # Override default search options with ones from user config
         genai_config_filepath = str(output_model_filepath.parent / "genai_config.json")

diff --git a/test/cli/test_cli_test_model_smoke.py b/test/cli/test_cli_test_model_smoke.py
@@ -12,7 +12,6 @@
 from tokenizers import Tokenizer
 from tokenizers.models import WordLevel
 from tokenizers.pre_tokenizers import Whitespace
-from transformers import LlamaConfig, LlamaForCausalLM, PreTrainedTokenizerFast
 
 from olive.cli.base import TEST_OUTPUT_MARKER_FILE
 from olive.common.hf.utils import TEST_MODEL_MARKER_FILE
@@ -25,11 +24,14 @@
     "local/tiny-random-llama-a",
     "local/tiny-random-llama-b",
     "mistralai/Mistral-7B-Instruct-v0.3",
+    "microsoft/Phi-3-mini-4k-instruct",
 )
 MAX_ARTIFACT_SIZE_BYTES = 1024 * 1024
 
 
 def _save_local_tiny_llama(model_path: Path):
+    from transformers import LlamaConfig, LlamaForCausalLM, PreTrainedTokenizerFast
+
     model = LlamaForCausalLM(
         LlamaConfig.from_dict(
             {

diff --git a/test/cli/test_cli_whisper_smoke.py b/test/cli/test_cli_whisper_smoke.py
@@ -0,0 +1,145 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import argparse
+import tempfile
+import unittest
+from pathlib import Path
+
+from tokenizers import Tokenizer
+from tokenizers.models import WordLevel
+from tokenizers.pre_tokenizers import Whitespace
+
+DEFAULT_WHISPER_MODEL_IDS = (
+    "openai/whisper-tiny",
+    "microsoft/whisper-base",
+)
+MAX_ARTIFACT_SIZE_BYTES = 1024 * 1024
+
+
+def _run_cli_main(args):
+    from olive.cli.launcher import main as cli_main
+
+    cli_main(args)
+
+
+def _save_local_tiny_whisper(model_path: Path):
+    from transformers import PreTrainedTokenizerFast, WhisperConfig, WhisperForConditionalGeneration
+
+    model = WhisperForConditionalGeneration(
+        WhisperConfig.from_dict(
+            {
+                "num_mel_bins": 80,
+                "encoder_layers": 2,
+                "encoder_attention_heads": 4,
+                "decoder_layers": 2,
+                "decoder_attention_heads": 4,
+                "d_model": 64,
+                "encoder_ffn_dim": 128,
+                "decoder_ffn_dim": 128,
+                "max_source_positions": 16,
+                "max_target_positions": 16,
+                "pad_token_id": 0,
+                "bos_token_id": 1,
+                "eos_token_id": 2,
+                "decoder_start_token_id": 1,
+            }
+        )
+    )
+    model.save_pretrained(model_path)
+
+    tokenizer = Tokenizer(
+        WordLevel(
+            vocab={"<pad>": 0, "<bos>": 1, "<eos>": 2, "hello": 3, "world": 4},
+            unk_token="<pad>",
+        )
+    )
+    tokenizer.pre_tokenizer = Whitespace()
+    PreTrainedTokenizerFast(
+        tokenizer_object=tokenizer,
+        bos_token="<bos>",
+        eos_token="<eos>",
+        pad_token="<pad>",
+    ).save_pretrained(model_path)
+
+
+def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str, precision: str = "fp32"):
+    """Export a tiny local Whisper model to ONNX via capture-onnx-graph with Model Builder."""
+    model_name = model_id.replace("/", "--")
+    model_path = tmp_path / "models" / model_name
+    run_output_dir = tmp_path / f"{model_name}-onnx"
+
+    _save_local_tiny_whisper(model_path)
+    _run_cli_main(
+        [
+            "capture-onnx-graph",
+            "-m",
+            str(model_path),
+            "--use_model_builder",
+            "--precision",
+            precision,
+            "--output_path",
+            str(run_output_dir),
+        ]
+    )
+
+    return run_output_dir
+
+
+class TestCliWhisperSmoke(unittest.TestCase):
+    """Smoke tests for Whisper encoder-decoder models exported via capture-onnx-graph."""
+
+    model_ids = DEFAULT_WHISPER_MODEL_IDS
+    precision = "fp32"
+    workdir = None
+
+    def test_whisper_capture_onnx_graph(self):
+        """Verify that Whisper encoder and decoder are exported to ONNX successfully."""
+        if self.workdir is None:
+            with tempfile.TemporaryDirectory() as temp_dir:
+                self._assert_whisper_export(Path(temp_dir))
+        else:
+            workdir = Path(self.workdir)
+            workdir.mkdir(parents=True, exist_ok=True)
+            self._assert_whisper_export(workdir)
+
+    def _assert_whisper_export(self, tmp_path: Path):
+        expected_encoder_file = "encoder.onnx"
+        expected_decoder_file = "decoder.onnx"
+        for model_id in self.model_ids:
+            with self.subTest(model_id=model_id):
+                run_output_dir = _run_whisper_capture_onnx_flow(tmp_path, model_id, self.precision)
+                output_files = self._list_relative_files(run_output_dir)
+                assert expected_encoder_file in output_files, (
+                    f"Expected {expected_encoder_file} in output, got: {output_files}"
+                )
+                assert expected_decoder_file in output_files, (
+                    f"Expected {expected_decoder_file} in output, got: {output_files}"
+                )
+                self._assert_file_size_below_limit(run_output_dir / expected_encoder_file)
+                self._assert_file_size_below_limit(run_output_dir / expected_decoder_file)
+
+    def _assert_file_size_below_limit(self, path: Path):
+        assert path.exists()
+        assert path.stat().st_size < MAX_ARTIFACT_SIZE_BYTES
+
+    @staticmethod
+    def _list_relative_files(path: Path):
+        return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()}
+
+
+def _parse_args():
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument("--workdir")
+    parser.add_argument("--model-id", dest="model_ids", action="append")
+    return parser.parse_known_args()
+
+
+if __name__ == "__main__":
+    parsed_args, remaining = _parse_args()
+    if parsed_args.workdir:
+        TestCliWhisperSmoke.workdir = Path(parsed_args.workdir)
+    if parsed_args.model_ids:
+        TestCliWhisperSmoke.model_ids = tuple(parsed_args.model_ids)
+    unittest.main(argv=[__file__, *remaining])