diff --git a/examples/run-examples.py b/examples/run-examples.py index 47a6b8f4..d470d59d 100755 --- a/examples/run-examples.py +++ b/examples/run-examples.py @@ -8,6 +8,8 @@ uv run examples/run-examples.py --e2e # also run e2e test scripts uv run examples/run-examples.py --all # run everything uv run examples/run-examples.py --parallel # run in parallel + uv run examples/run-examples.py --model gateway:openai/gpt-5.4-mini + # patch ai.get_model() to use the given model for every sample """ import argparse @@ -20,6 +22,7 @@ REPO = Path(__file__).resolve().parent.parent SAMPLES = REPO / "examples" / "samples" +PATCH_SCRIPT = REPO / "examples" / "run-with-patched-model.py" @dataclasses.dataclass @@ -116,10 +119,10 @@ class Sample: ] -def _sample_cmd(sample: Sample) -> list[str]: +def _sample_cmd(sample: Sample, model: str | None) -> list[str]: if sample.cmd is not None: return sample.cmd - return [ + base = [ "uv", "run", "--frozen", @@ -128,8 +131,10 @@ def _sample_cmd(sample: Sample) -> list[str]: "--with-editable", str(REPO), "python", - str(SAMPLES / sample.name), ] + if model is not None: + return [*base, str(PATCH_SCRIPT), model, str(SAMPLES / sample.name)] + return [*base, str(SAMPLES / sample.name)] _env = {k: v for k, v in os.environ.items() if k != "VIRTUAL_ENV"} @@ -141,11 +146,11 @@ def _sample_env(sample: Sample) -> dict[str, str]: return {**_env, **sample.extra_env} -def run_sample(sample: Sample) -> bool: +def run_sample(sample: Sample, model: str | None) -> bool: print(f"{'=' * 20} {sample.name} {'=' * 20}") sys.stdout.flush() result = subprocess.run( - _sample_cmd(sample), + _sample_cmd(sample, model), env=_sample_env(sample), timeout=sample.timeout, input=sample.stdin, @@ -169,10 +174,10 @@ def print_summary(results: list[tuple[str, bool]]) -> bool: return any_failed -def run_sample_quiet(sample: Sample) -> tuple[str, bool, str]: +def run_sample_quiet(sample: Sample, model: str | None) -> tuple[str, bool, str]: try: result = subprocess.run( - _sample_cmd(sample), + _sample_cmd(sample, model), env=_sample_env(sample), timeout=sample.timeout, capture_output=True, @@ -196,6 +201,14 @@ def main() -> None: parser.add_argument( "--parallel", action="store_true", help="run samples in parallel" ) + parser.add_argument( + "--model", + help=( + "run each sample through run-with-patched-model.py with this " + "model id (e.g. 'gateway:openai/gpt-5.4-mini'); ignored for " + "samples with a custom cmd" + ), + ) args = parser.parse_args() has_category = args.text or args.image or args.video or args.broken or args.e2e @@ -217,7 +230,7 @@ def main() -> None: if args.parallel: outputs: dict[str, str] = {} with concurrent.futures.ThreadPoolExecutor() as pool: - futures = {pool.submit(run_sample_quiet, s): s for s in samples} + futures = {pool.submit(run_sample_quiet, s, args.model): s for s in samples} for future in concurrent.futures.as_completed(futures): name, ok, output = future.result() status = "PASS" if ok else "FAIL" @@ -240,7 +253,7 @@ def main() -> None: else: for sample in samples: try: - ok = run_sample(sample) + ok = run_sample(sample, args.model) except subprocess.TimeoutExpired: print(f" TIMEOUT after {sample.timeout:g}s\n") ok = False diff --git a/examples/run-with-patched-model.py b/examples/run-with-patched-model.py new file mode 100644 index 00000000..64449ce1 --- /dev/null +++ b/examples/run-with-patched-model.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +"""Run a Python file with ``ai.get_model()`` patched to always return a fixed model. + +Useful for re-running an example against a different model without +editing it. + +Usage (from repo root): + + uv run examples/run-with-patched-model.py + +Example: + + uv run examples/run-with-patched-model.py \\ + gateway:openai/gpt-5.4-mini \\ + examples/samples/stream.py +""" + +import argparse +import runpy +import sys +from typing import Any + +import ai +from ai import models +from ai.models import core +from ai.models.core import model as _model + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "model", help="model id, e.g. 'gateway:anthropic/claude-sonnet-4.6'" + ) + parser.add_argument("file", help="path to a python file to execute") + args = parser.parse_args() + + original = _model.get_model + + def patched(*_args: Any, **_kwargs: Any) -> ai.Model: + return original(args.model) + + ai.get_model = patched + models.get_model = patched + core.get_model = patched + _model.get_model = patched + + sys.argv = [args.file] + runpy.run_path(args.file, run_name="__main__") + + +if __name__ == "__main__": + main() diff --git a/examples/samples/agent_hooks_serverless.py b/examples/samples/agent_hooks_serverless.py index c0d7e188..bea92b9d 100644 --- a/examples/samples/agent_hooks_serverless.py +++ b/examples/samples/agent_hooks_serverless.py @@ -76,7 +76,9 @@ async def main() -> None: messages = stream.messages print("\n Run interrupted; approval will be pre-registered for re-entry.\n") - assert AUDIT_LOG == ["Deleted file: /tmp/old_logs.txt"] + assert len(AUDIT_LOG) == 1 and "/tmp/old_logs.txt" in AUDIT_LOG[0], ( + f"Bad audit log: {AUDIT_LOG}" + ) # -- Second run: pre-register resolution, replay from checkpoint -- print("--- Run 2: pre-register approval, resume from checkpoint ---") @@ -96,7 +98,9 @@ async def main() -> None: assert {"/tmp/old_logs.txt"} == FILES_DELETED, ( f"Wrong files deleted: {FILES_DELETED}" ) - assert AUDIT_LOG == ["Deleted file: /tmp/old_logs.txt"] + assert len(AUDIT_LOG) == 1 and "/tmp/old_logs.txt" in AUDIT_LOG[0], ( + f"Bad audit log: {AUDIT_LOG}" + ) if __name__ == "__main__":