vercel-labs · msullivan · May 15, 2026 · May 15, 2026
diff --git a/examples/run-examples.py b/examples/run-examples.py
@@ -8,6 +8,8 @@
     uv run examples/run-examples.py --e2e       # also run e2e test scripts
     uv run examples/run-examples.py --all       # run everything
     uv run examples/run-examples.py --parallel  # run in parallel
+    uv run examples/run-examples.py --model gateway:openai/gpt-5.4-mini
+        # patch ai.get_model() to use the given model for every sample
 """
 
 import argparse
@@ -20,6 +22,7 @@
 
 REPO = Path(__file__).resolve().parent.parent
 SAMPLES = REPO / "examples" / "samples"
+PATCH_SCRIPT = REPO / "examples" / "run-with-patched-model.py"
 
 
 @dataclasses.dataclass
@@ -116,10 +119,10 @@ class Sample:
 ]
 
 
-def _sample_cmd(sample: Sample) -> list[str]:
+def _sample_cmd(sample: Sample, model: str | None) -> list[str]:
     if sample.cmd is not None:
         return sample.cmd
-    return [
+    base = [
         "uv",
         "run",
         "--frozen",
@@ -128,8 +131,10 @@ def _sample_cmd(sample: Sample) -> list[str]:
         "--with-editable",
         str(REPO),
         "python",
-        str(SAMPLES / sample.name),
     ]
+    if model is not None:
+        return [*base, str(PATCH_SCRIPT), model, str(SAMPLES / sample.name)]
+    return [*base, str(SAMPLES / sample.name)]
 
 
 _env = {k: v for k, v in os.environ.items() if k != "VIRTUAL_ENV"}
@@ -141,11 +146,11 @@ def _sample_env(sample: Sample) -> dict[str, str]:
     return {**_env, **sample.extra_env}
 
 
-def run_sample(sample: Sample) -> bool:
+def run_sample(sample: Sample, model: str | None) -> bool:
     print(f"{'=' * 20} {sample.name} {'=' * 20}")
     sys.stdout.flush()
     result = subprocess.run(
-        _sample_cmd(sample),
+        _sample_cmd(sample, model),
         env=_sample_env(sample),
         timeout=sample.timeout,
         input=sample.stdin,
@@ -169,10 +174,10 @@ def print_summary(results: list[tuple[str, bool]]) -> bool:
     return any_failed
 
 
-def run_sample_quiet(sample: Sample) -> tuple[str, bool, str]:
+def run_sample_quiet(sample: Sample, model: str | None) -> tuple[str, bool, str]:
     try:
         result = subprocess.run(
-            _sample_cmd(sample),
+            _sample_cmd(sample, model),
             env=_sample_env(sample),
             timeout=sample.timeout,
             capture_output=True,
@@ -196,6 +201,14 @@ def main() -> None:
     parser.add_argument(
         "--parallel", action="store_true", help="run samples in parallel"
     )
+    parser.add_argument(
+        "--model",
+        help=(
+            "run each sample through run-with-patched-model.py with this "
+            "model id (e.g. 'gateway:openai/gpt-5.4-mini'); ignored for "
+            "samples with a custom cmd"
+        ),
+    )
     args = parser.parse_args()
 
     has_category = args.text or args.image or args.video or args.broken or args.e2e
@@ -217,7 +230,7 @@ def main() -> None:
     if args.parallel:
         outputs: dict[str, str] = {}
         with concurrent.futures.ThreadPoolExecutor() as pool:
-            futures = {pool.submit(run_sample_quiet, s): s for s in samples}
+            futures = {pool.submit(run_sample_quiet, s, args.model): s for s in samples}
             for future in concurrent.futures.as_completed(futures):
                 name, ok, output = future.result()
                 status = "PASS" if ok else "FAIL"
@@ -240,7 +253,7 @@ def main() -> None:
     else:
         for sample in samples:
             try:
-                ok = run_sample(sample)
+                ok = run_sample(sample, args.model)
             except subprocess.TimeoutExpired:
                 print(f"  TIMEOUT after {sample.timeout:g}s\n")
                 ok = False

diff --git a/examples/run-with-patched-model.py b/examples/run-with-patched-model.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+"""Run a Python file with ``ai.get_model()`` patched to always return a fixed model.
+
+Useful for re-running an example against a different model without
+editing it.
+
+Usage (from repo root):
+
+    uv run examples/run-with-patched-model.py <model> <file.py>
+
+Example:
+
+    uv run examples/run-with-patched-model.py \\
+        gateway:openai/gpt-5.4-mini \\
+        examples/samples/stream.py
+"""
+
+import argparse
+import runpy
+import sys
+from typing import Any
+
+import ai
+from ai import models
+from ai.models import core
+from ai.models.core import model as _model
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "model", help="model id, e.g. 'gateway:anthropic/claude-sonnet-4.6'"
+    )
+    parser.add_argument("file", help="path to a python file to execute")
+    args = parser.parse_args()
+
+    original = _model.get_model
+
+    def patched(*_args: Any, **_kwargs: Any) -> ai.Model:
+        return original(args.model)
+
+    ai.get_model = patched
+    models.get_model = patched
+    core.get_model = patched
+    _model.get_model = patched
+
+    sys.argv = [args.file]
+    runpy.run_path(args.file, run_name="__main__")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/samples/agent_hooks_serverless.py b/examples/samples/agent_hooks_serverless.py
@@ -76,7 +76,9 @@ async def main() -> None:
         messages = stream.messages
 
     print("\n  Run interrupted; approval will be pre-registered for re-entry.\n")
-    assert AUDIT_LOG == ["Deleted file: /tmp/old_logs.txt"]
+    assert len(AUDIT_LOG) == 1 and "/tmp/old_logs.txt" in AUDIT_LOG[0], (
+        f"Bad audit log: {AUDIT_LOG}"
+    )
 
     # -- Second run: pre-register resolution, replay from checkpoint --
     print("--- Run 2: pre-register approval, resume from checkpoint ---")
@@ -96,7 +98,9 @@ async def main() -> None:
     assert {"/tmp/old_logs.txt"} == FILES_DELETED, (
         f"Wrong files deleted: {FILES_DELETED}"
     )
-    assert AUDIT_LOG == ["Deleted file: /tmp/old_logs.txt"]
+    assert len(AUDIT_LOG) == 1 and "/tmp/old_logs.txt" in AUDIT_LOG[0], (
+        f"Bad audit log: {AUDIT_LOG}"
+    )
 
 
 if __name__ == "__main__":