Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-model-fast.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ jobs:

- name: Run fast test
run: |
python -m pytest -v -s -p no:warnings --disable-warnings --log-cli-level=WARNING test/cli/test_cli_test_model_smoke.py
python -m pytest -v -s -p no:warnings --disable-warnings --log-cli-level=WARNING test/cli/test_cli_test_model_smoke.py test/cli/test_cli_whisper_smoke.py
20 changes: 17 additions & 3 deletions olive/passes/onnx/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import importlib
import json
import logging
import os
from enum import IntEnum
from pathlib import Path
from typing import Any, ClassVar, Union
Expand Down Expand Up @@ -280,6 +281,18 @@ def _run_for_config(

model_attributes = copy.deepcopy(model.model_attributes or {})

# onnxruntime-genai's save_model cleanup deletes cache_dir when it is empty
# (base.py: `if not os.listdir(self.cache_dir): os.rmdir(self.cache_dir)`).
# For multi-component models like Whisper the encoder and decoder share the
# same cache_dir; the encoder's save_model removes the empty directory before
# the decoder's save_model can run, raising FileNotFoundError.
# Fix: keep a marker file in the cache_dir for the duration of create_model so
# that the directory is never considered empty and never deleted prematurely.
cache_dir_path = Path(HF_HUB_CACHE)
cache_dir_path.mkdir(parents=True, exist_ok=True)
marker_path = cache_dir_path / f".olive_build_{os.getpid()}"
marker_path.touch()

try:
logger.debug("Building model with the following args: %s", extra_args)
create_model(
Expand All @@ -296,12 +309,13 @@ def _run_for_config(

except Exception:
# if model building fails, clean up the intermediate files in the cache_dir
cache_dir = Path(HF_HUB_CACHE)
if cache_dir.is_dir():
for file in cache_dir.iterdir():
if cache_dir_path.is_dir():
for file in cache_dir_path.iterdir():
if file.suffix == ".bin":
file.unlink()
raise
finally:
marker_path.unlink(missing_ok=True)

# Override default search options with ones from user config
genai_config_filepath = str(output_model_filepath.parent / "genai_config.json")
Expand Down
4 changes: 3 additions & 1 deletion test/cli/test_cli_test_model_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from tokenizers import Tokenizer
from tokenizers.models import WordLevel
from tokenizers.pre_tokenizers import Whitespace
from transformers import LlamaConfig, LlamaForCausalLM, PreTrainedTokenizerFast

from olive.cli.base import TEST_OUTPUT_MARKER_FILE
from olive.common.hf.utils import TEST_MODEL_MARKER_FILE
Expand All @@ -25,11 +24,14 @@
"local/tiny-random-llama-a",
"local/tiny-random-llama-b",
"mistralai/Mistral-7B-Instruct-v0.3",
"microsoft/Phi-3-mini-4k-instruct",
)
MAX_ARTIFACT_SIZE_BYTES = 1024 * 1024


def _save_local_tiny_llama(model_path: Path):
from transformers import LlamaConfig, LlamaForCausalLM, PreTrainedTokenizerFast

model = LlamaForCausalLM(
LlamaConfig.from_dict(
{
Expand Down
145 changes: 145 additions & 0 deletions test/cli/test_cli_whisper_smoke.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import argparse
import tempfile
import unittest
from pathlib import Path

from tokenizers import Tokenizer
from tokenizers.models import WordLevel
from tokenizers.pre_tokenizers import Whitespace

DEFAULT_WHISPER_MODEL_IDS = (
"openai/whisper-tiny",
"microsoft/whisper-base",
)
MAX_ARTIFACT_SIZE_BYTES = 1024 * 1024


def _run_cli_main(args):
from olive.cli.launcher import main as cli_main

cli_main(args)


def _save_local_tiny_whisper(model_path: Path):
from transformers import PreTrainedTokenizerFast, WhisperConfig, WhisperForConditionalGeneration

model = WhisperForConditionalGeneration(
WhisperConfig.from_dict(
{
"num_mel_bins": 80,
"encoder_layers": 2,
"encoder_attention_heads": 4,
"decoder_layers": 2,
"decoder_attention_heads": 4,
"d_model": 64,
"encoder_ffn_dim": 128,
"decoder_ffn_dim": 128,
"max_source_positions": 16,
"max_target_positions": 16,
"pad_token_id": 0,
"bos_token_id": 1,
"eos_token_id": 2,
"decoder_start_token_id": 1,
}
)
)
model.save_pretrained(model_path)

tokenizer = Tokenizer(
WordLevel(
vocab={"<pad>": 0, "<bos>": 1, "<eos>": 2, "hello": 3, "world": 4},
unk_token="<pad>",
)
)
tokenizer.pre_tokenizer = Whitespace()
PreTrainedTokenizerFast(
tokenizer_object=tokenizer,
bos_token="<bos>",
eos_token="<eos>",
pad_token="<pad>",
).save_pretrained(model_path)


def _run_whisper_capture_onnx_flow(tmp_path: Path, model_id: str, precision: str = "fp32"):
"""Export a tiny local Whisper model to ONNX via capture-onnx-graph with Model Builder."""
model_name = model_id.replace("/", "--")
model_path = tmp_path / "models" / model_name
run_output_dir = tmp_path / f"{model_name}-onnx"

_save_local_tiny_whisper(model_path)
_run_cli_main(
[
"capture-onnx-graph",
"-m",
str(model_path),
"--use_model_builder",
"--precision",
precision,
"--output_path",
str(run_output_dir),
]
)

return run_output_dir


class TestCliWhisperSmoke(unittest.TestCase):
"""Smoke tests for Whisper encoder-decoder models exported via capture-onnx-graph."""

model_ids = DEFAULT_WHISPER_MODEL_IDS
precision = "fp32"
workdir = None

def test_whisper_capture_onnx_graph(self):
"""Verify that Whisper encoder and decoder are exported to ONNX successfully."""
if self.workdir is None:
with tempfile.TemporaryDirectory() as temp_dir:
self._assert_whisper_export(Path(temp_dir))
else:
workdir = Path(self.workdir)
workdir.mkdir(parents=True, exist_ok=True)
self._assert_whisper_export(workdir)

def _assert_whisper_export(self, tmp_path: Path):
expected_encoder_file = "encoder.onnx"
expected_decoder_file = "decoder.onnx"
for model_id in self.model_ids:
with self.subTest(model_id=model_id):
run_output_dir = _run_whisper_capture_onnx_flow(tmp_path, model_id, self.precision)
output_files = self._list_relative_files(run_output_dir)
assert expected_encoder_file in output_files, (
f"Expected {expected_encoder_file} in output, got: {output_files}"
)
assert expected_decoder_file in output_files, (
f"Expected {expected_decoder_file} in output, got: {output_files}"
)
self._assert_file_size_below_limit(run_output_dir / expected_encoder_file)
self._assert_file_size_below_limit(run_output_dir / expected_decoder_file)

def _assert_file_size_below_limit(self, path: Path):
assert path.exists()
assert path.stat().st_size < MAX_ARTIFACT_SIZE_BYTES

@staticmethod
def _list_relative_files(path: Path):
return {file_path.relative_to(path).as_posix() for file_path in path.rglob("*") if file_path.is_file()}


def _parse_args():
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--workdir")
parser.add_argument("--model-id", dest="model_ids", action="append")
return parser.parse_known_args()


if __name__ == "__main__":
parsed_args, remaining = _parse_args()
if parsed_args.workdir:
TestCliWhisperSmoke.workdir = Path(parsed_args.workdir)
if parsed_args.model_ids:
TestCliWhisperSmoke.model_ids = tuple(parsed_args.model_ids)
unittest.main(argv=[__file__, *remaining])