diff --git a/.github/workflows/test-model-fast.yml b/.github/workflows/test-model-fast.yml index 72c9e8547..20af9b524 100644 --- a/.github/workflows/test-model-fast.yml +++ b/.github/workflows/test-model-fast.yml @@ -36,4 +36,4 @@ jobs: - name: Run fast test run: | - python -m pytest -v -s -p no:warnings --disable-warnings --log-cli-level=WARNING test/cli/test_cli_test_model_smoke.py + python -m pytest -v -s -p no:warnings --disable-warnings --log-cli-level=WARNING test/cli/test_cli_test_model_smoke.py test/cli/test_cli_whisper_smoke.py diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py index c575f035b..54c8eb999 100644 --- a/olive/passes/onnx/model_builder.py +++ b/olive/passes/onnx/model_builder.py @@ -8,6 +8,7 @@ import importlib import json import logging +import os from enum import IntEnum from pathlib import Path from typing import Any, ClassVar, Union @@ -280,6 +281,18 @@ def _run_for_config( model_attributes = copy.deepcopy(model.model_attributes or {}) + # onnxruntime-genai's save_model cleanup deletes cache_dir when it is empty + # (base.py: `if not os.listdir(self.cache_dir): os.rmdir(self.cache_dir)`). + # For multi-component models like Whisper the encoder and decoder share the + # same cache_dir; the encoder's save_model removes the empty directory before + # the decoder's save_model can run, raising FileNotFoundError. + # Fix: keep a marker file in the cache_dir for the duration of create_model so + # that the directory is never considered empty and never deleted prematurely. + cache_dir_path = Path(HF_HUB_CACHE) + cache_dir_path.mkdir(parents=True, exist_ok=True) + marker_path = cache_dir_path / f".olive_build_{os.getpid()}" + marker_path.touch() + try: logger.debug("Building model with the following args: %s", extra_args) create_model( @@ -296,12 +309,13 @@ def _run_for_config( except Exception: # if model building fails, clean up the intermediate files in the cache_dir - cache_dir = Path(HF_HUB_CACHE) - if cache_dir.is_dir(): - for file in cache_dir.iterdir(): + if cache_dir_path.is_dir(): + for file in cache_dir_path.iterdir(): if file.suffix == ".bin": file.unlink() raise + finally: + marker_path.unlink(missing_ok=True) # Override default search options with ones from user config genai_config_filepath = str(output_model_filepath.parent / "genai_config.json") diff --git a/test/cli/test_cli_test_model_smoke.py b/test/cli/test_cli_test_model_smoke.py index b28130658..c52a771eb 100644 --- a/test/cli/test_cli_test_model_smoke.py +++ b/test/cli/test_cli_test_model_smoke.py @@ -12,7 +12,6 @@ from tokenizers import Tokenizer from tokenizers.models import WordLevel from tokenizers.pre_tokenizers import Whitespace -from transformers import LlamaConfig, LlamaForCausalLM, PreTrainedTokenizerFast from olive.cli.base import TEST_OUTPUT_MARKER_FILE from olive.common.hf.utils import TEST_MODEL_MARKER_FILE @@ -25,11 +24,14 @@ "local/tiny-random-llama-a", "local/tiny-random-llama-b", "mistralai/Mistral-7B-Instruct-v0.3", + "microsoft/Phi-3-mini-4k-instruct", ) MAX_ARTIFACT_SIZE_BYTES = 1024 * 1024 def _save_local_tiny_llama(model_path: Path): + from transformers import LlamaConfig, LlamaForCausalLM, PreTrainedTokenizerFast + model = LlamaForCausalLM( LlamaConfig.from_dict( { diff --git a/test/cli/test_cli_whisper_smoke.py b/test/cli/test_cli_whisper_smoke.py new file mode 100644 index 000000000..11c5948e9 --- /dev/null +++ b/test/cli/test_cli_whisper_smoke.py @@ -0,0 +1,145 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import argparse +import tempfile +import unittest +from pathlib import Path + +from tokenizers import Tokenizer +from tokenizers.models import WordLevel +from tokenizers.pre_tokenizers import Whitespace + +DEFAULT_WHISPER_MODEL_IDS = ( + "openai/whisper-tiny", + "microsoft/whisper-base", +) +MAX_ARTIFACT_SIZE_BYTES = 1024 * 1024 + + +def _run_cli_main(args): + from olive.cli.launcher import main as cli_main + + cli_main(args) + + +def _save_local_tiny_whisper(model_path: Path): + from transformers import PreTrainedTokenizerFast, WhisperConfig, WhisperForConditionalGeneration + + model = WhisperForConditionalGeneration( + WhisperConfig.from_dict( + { + "num_mel_bins": 80, + "encoder_layers": 2, + "encoder_attention_heads": 4, + "decoder_layers": 2, + "decoder_attention_heads": 4, + "d_model": 64, + "encoder_ffn_dim": 128, + "decoder_ffn_dim": 128, + "max_source_positions": 16, + "max_target_positions": 16, + "pad_token_id": 0, + "bos_token_id": 1, + "eos_token_id": 2, + "decoder_start_token_id": 1, + } + ) + ) + model.save_pretrained(model_path) + + tokenizer = Tokenizer( + WordLevel( + vocab={"": 0, "": 1, "": 2, "hello": 3, "world": 4}, + unk_token="", + ) + ) + tokenizer.pre_tokenizer = Whitespace() + PreTrainedTokenizerFast( + tokenizer_object=tokenizer, + bos_token="", + eos_token="", + pad_token="", + ).save_pretrained(model_path) + + +def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str, precision: str = "fp32"): + """Export a tiny local Whisper model to ONNX via capture-onnx-graph with Model Builder.""" + model_name = model_id.replace("/", "--") + model_path = tmp_path / "models" / model_name + run_output_dir = tmp_path / f"{model_name}-onnx" + + _save_local_tiny_whisper(model_path) + _run_cli_main( + [ + "capture-onnx-graph", + "-m", + str(model_path), + "--use_model_builder", + "--precision", + precision, + "--output_path", + str(run_output_dir), + ] + ) + + return run_output_dir + + +class TestCliWhisperSmoke(unittest.TestCase): + """Smoke tests for Whisper encoder-decoder models exported via capture-onnx-graph.""" + + model_ids = DEFAULT_WHISPER_MODEL_IDS + precision = "fp32" + workdir = None + + def test_whisper_capture_onnx_graph(self): + """Verify that Whisper encoder and decoder are exported to ONNX successfully.""" + if self.workdir is None: + with tempfile.TemporaryDirectory() as temp_dir: + self._assert_whisper_export(Path(temp_dir)) + else: + workdir = Path(self.workdir) + workdir.mkdir(parents=True, exist_ok=True) + self._assert_whisper_export(workdir) + + def _assert_whisper_export(self, tmp_path: Path): + expected_encoder_file = "encoder.onnx" + expected_decoder_file = "decoder.onnx" + for model_id in self.model_ids: + with self.subTest(model_id=model_id): + run_output_dir = _run_whisper_capture_onnx_flow(tmp_path, model_id, self.precision) + output_files = self._list_relative_files(run_output_dir) + assert expected_encoder_file in output_files, ( + f"Expected {expected_encoder_file} in output, got: {output_files}" + ) + assert expected_decoder_file in output_files, ( + f"Expected {expected_decoder_file} in output, got: {output_files}" + ) + self._assert_file_size_below_limit(run_output_dir / expected_encoder_file) + self._assert_file_size_below_limit(run_output_dir / expected_decoder_file) + + def _assert_file_size_below_limit(self, path: Path): + assert path.exists() + assert path.stat().st_size < MAX_ARTIFACT_SIZE_BYTES + + @staticmethod + def _list_relative_files(path: Path): + return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()} + + +def _parse_args(): + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument("--workdir") + parser.add_argument("--model-id", dest="model_ids", action="append") + return parser.parse_known_args() + + +if __name__ == "__main__": + parsed_args, remaining = _parse_args() + if parsed_args.workdir: + TestCliWhisperSmoke.workdir = Path(parsed_args.workdir) + if parsed_args.model_ids: + TestCliWhisperSmoke.model_ids = tuple(parsed_args.model_ids) + unittest.main(argv=[__file__, *remaining])