From 19a0389451b155c024592181438c5f9e828ebfe4 Mon Sep 17 00:00:00 2001 From: Andrey Buzin Date: Mon, 13 Apr 2026 15:04:46 -0700 Subject: [PATCH 1/2] Update the docs with new apis --- README.md | 29 +++++++++++++++++++++-------- skills/ai/SKILL.md | 45 ++++++++++++++++++++++++--------------------- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index a1530ec5..573f730d 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ async def get_weather(city: str) -> str: async def main() -> None: - model = ai.model("ai-gateway", "anthropic/claude-sonnet-4") + model = ai.ai_gateway("anthropic/claude-sonnet-4") agent = ai.agent(tools=[get_weather]) messages = [ @@ -53,11 +53,14 @@ if __name__ == "__main__": ### Models ``` -ai.model(provider, model_id) model metadata (providers: ai-gateway, anthropic, openai) -ai.stream(model, messages, ...) streaming generation (supports tools=, output_type=, client=) -ai.generate(model, messages) non-streaming / image / video generation +ai.openai(model_id) provider — callable, returns Model +ai.anthropic(model_id) provider — callable, returns Model +ai.ai_gateway(model_id) provider — callable, returns Model +provider.list() list available model IDs from the provider API +ai.stream(model, messages, ...) streaming generation (supports tools=, output_type=) +ai.generate(model, messages, p) non-streaming generation (ImageParams, VideoParams) ai.check_connection(model) verify credentials and model availability -ai.Client(base_url=, api_key=) explicit client when you need a custom endpoint +ai.Client(base_url=, api_key=) explicit client — pass to provider call: ai.openai("gpt-5.4", client=c) ``` ### Agents @@ -78,11 +81,20 @@ ai.system_message ai.user_message ai.assistant_message ai.tool_message ai.tool_result ai.file_part ai.thinking ``` +### Middleware + +``` +ai.Middleware base class — subclass and override wrap methods +agent.run(..., middleware=[m]) run-scoped, first = outermost +``` + +Five wrap surfaces: `wrap_agent_run`, `wrap_model`, `wrap_generate`, `wrap_tool`, `wrap_hook`. +Each receives a context dataclass and a `next` callable. Default implementations pass through. + ### Integrations ``` ai.mcp.get_http_tools(url, ...) expose an MCP server as tools -ai.telemetry.enable/disable() OpenTelemetry-style event hooks ai.ai_sdk_ui AI SDK UI streaming adapter ``` @@ -94,7 +106,7 @@ Override the default loop when you need approval gates, routing, or custom orche @agent.loop async def custom(context: ai.Context): while True: - s = await ai.models.stream( + s = await ai.stream( context.model, context.messages, tools=context.tools ) async for msg in s: @@ -115,4 +127,5 @@ Small focused samples live in `examples/samples/`. End-to-end demos: - `examples/fastapi-vite/` -- FastAPI backend + Vite frontend with hook-based tool approval - `examples/multiagent-textual/` -- Textual TUI with parallel agents and interactive hook resolution - `examples/temporal-direct/` -- durable agent with a custom loop (every I/O call is a Temporal activity) -- `examples/temporal-middleware/` -- durable agent via Middleware (default loop, I/O routed to Temporal activities) +- `examples/temporal-middleware/` -- durable agent via middleware (default loop, I/O routed to Temporal activities) +- `examples/samples/middleware_simple.py` -- middleware that logs all five execution surfaces diff --git a/skills/ai/SKILL.md b/skills/ai/SKILL.md index ab02a30c..bd09cc8a 100644 --- a/skills/ai/SKILL.md +++ b/skills/ai/SKILL.md @@ -1,6 +1,6 @@ --- name: ai -description: Python `ai` module — models, agents, hooks, MCP, structured output +description: Python `ai` module — models, agents, hooks, middleware, MCP, structured output --- # ai @@ -21,7 +21,7 @@ async def get_weather(city: str) -> str: """Get the current weather for a city.""" return f"Sunny, 72F in {city}" -model = ai.model("ai-gateway", "anthropic/claude-sonnet-4") +model = ai.ai_gateway("anthropic/claude-sonnet-4") agent = ai.agent(tools=[get_weather]) messages = [ @@ -33,11 +33,11 @@ async for msg in agent.run(model, messages): print(msg.text_delta, end="") ``` -`ai.model(provider, model_id)` — built-in providers: `ai-gateway`, `anthropic`, `openai`. Default clients auto-created from env vars (`AI_GATEWAY_API_KEY`, `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`). Pass `ai.Client(base_url=, api_key=)` for custom endpoints. +`ai.openai(model_id)`, `ai.anthropic(model_id)`, `ai.ai_gateway(model_id)` — provider factories, callable, return `Model`. Clients auto-created from env vars (`AI_GATEWAY_API_KEY`, `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`). Pass `ai.Client(base_url=, api_key=)` to the provider call for custom endpoints: `ai.openai("gpt-5.4", client=c)`. `provider.list()` returns available model IDs. `ai.stream(model, messages, ...)` — streaming without an agent loop. Returns `StreamResult` with `.text`, `.tool_calls`, `.output`, `.usage` after iteration. -`ai.generate(model, messages)` — non-streaming / image / video generation. +`ai.generate(model, messages, params)` — non-streaming generation. `params` is `ImageParams` or `VideoParams`. `ai.check_connection(model)` — verify credentials and model availability. @@ -75,7 +75,7 @@ my_agent = ai.agent(tools=[get_weather, get_population]) @my_agent.loop async def custom(context: ai.Context): while True: - s = await ai.models.stream( + s = await ai.stream( context.model, context.messages, tools=context.tools ) async for msg in s: @@ -144,7 +144,7 @@ Hook messages have `role="signal"` with a `HookPart`. **Long-running mode** (`interrupt_loop=False`, default): await blocks until resolved. Use for websocket/interactive UIs. -**Serverless mode** (`interrupt_loop=True`): unresolved hooks cancel the run. Store the checkpoint, pre-register a resolution, rerun with `checkpoint=`. +**Serverless mode** (`interrupt_loop=True`): unresolved hooks cancel the run. Pre-register a resolution with `ai.resolve_hook(...)` before rerunning. Consuming hooks in the iterator: @@ -160,18 +160,6 @@ async for msg in my_agent.run(model, messages): print(msg.text_delta, end="") ``` -## Checkpoints - -`Checkpoint` records completed LLM calls, tool executions, and hook resolutions. On replay, cached results are returned without re-executing. - -```python -data = result.checkpoint.model_dump() -checkpoint = ai.Checkpoint.model_validate(data) -result = my_agent.run(model, messages, checkpoint=checkpoint) -``` - -Primary use case: serverless hook re-entry. - ## Structured output ```python @@ -213,9 +201,24 @@ return StreamingResponse( ) ``` -## Telemetry +## Middleware + +Subclass `ai.Middleware` and override the wrap methods you need. Pass to `agent.run(..., middleware=[...])`. Run-scoped, composable, first in list = outermost. ```python -ai.telemetry.enable() # OTel-based, emits run/step/tool events -ai.telemetry.disable() +class LoggingMiddleware(ai.Middleware): + async def wrap_model(self, call, next): + print(f"calling {call.model.id}") + result = await next(call) + print("stream started") + return result + + async def wrap_tool(self, call, next): + print(f"tool {call.tool_name}({call.kwargs})") + return await next(call) + +async for msg in agent.run(model, messages, middleware=[LoggingMiddleware()]): + ... ``` + +Five surfaces: `wrap_agent_run`, `wrap_model`, `wrap_generate`, `wrap_tool`, `wrap_hook`. Each receives a frozen context dataclass and a `next` callable. Use `dataclasses.replace(call, ...)` to modify before passing to `next`. From 4b85d6f202d030f0e3e5bdee8cb44a5e913e255b Mon Sep 17 00:00:00 2001 From: Andrey Buzin Date: Mon, 13 Apr 2026 15:05:25 -0700 Subject: [PATCH 2/2] Bump the version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e92fc3d3..1ae6b44f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "vercel-ai-sdk" -version = "0.0.1.dev9" +version = "0.0.1.dev10" description = "The AI Toolkit for Python" readme = "README.md" authors = [