diff --git a/README.md b/README.md index ce9659ec..8d79033f 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,6 @@ uv add vercel-ai-sdk ``` ```python -import os import vercel_ai_sdk as ai @ai.tool @@ -27,11 +26,7 @@ async def agent(llm, query): tools=[talk_to_mothership], ) -llm = ai.openai.OpenAIModel( - model="anthropic/claude-opus-4.6", - base_url="https://ai-gateway.vercel.sh/v1", - api_key=os.environ["AI_GATEWAY_API_KEY"], -) +llm = ai.ai_gateway.GatewayModel(model="anthropic/claude-opus-4.6") async for msg in ai.run(agent, llm, "When will the robots take over?"): print(msg.text_delta, end="") @@ -204,23 +199,31 @@ Three event types are tracked: #### LLM Providers ```python -# OpenAI-compatible (including Vercel AI Gateway) -llm = ai.openai.OpenAIModel( +# Vercel AI Gateway (recommended) +# Uses AI_GATEWAY_API_KEY env var by default +llm = ai.ai_gateway.GatewayModel( model="anthropic/claude-opus-4.6", - base_url="https://ai-gateway.vercel.sh/v1", - api_key=os.environ["AI_GATEWAY_API_KEY"], thinking=True, # enable reasoning output budget_tokens=10000, # or reasoning_effort="medium" ) -# Anthropic (native client) +# OpenAI (direct) +llm = ai.openai.OpenAIModel( + model="gpt-4o", + thinking=True, + reasoning_effort="medium", +) + +# Anthropic (direct) llm = ai.anthropic.AnthropicModel( - model="claude-opus-4.6-20250916", + model="claude-opus-4-6-20250916", thinking=True, budget_tokens=10000, ) ``` +The gateway provider automatically routes Anthropic models through the native Anthropic API for full feature support, and falls back to the OpenAI-compatible endpoint for structured output and non-Anthropic models. + #### MCP ```python diff --git a/examples/fastapi-vite/backend/agent.py b/examples/fastapi-vite/backend/agent.py index 2c40df10..94ecd714 100644 --- a/examples/fastapi-vite/backend/agent.py +++ b/examples/fastapi-vite/backend/agent.py @@ -1,6 +1,5 @@ """Agent logic for the chat demo.""" -import os from typing import Any import vercel_ai_sdk as ai @@ -14,11 +13,7 @@ async def talk_to_mothership(question: str) -> str: def get_llm() -> ai.LanguageModel: """Create the LLM instance.""" - return ai.openai.OpenAIModel( - model="anthropic/claude-sonnet-4", - base_url="https://ai-gateway.vercel.sh/v1", - api_key=os.environ.get("AI_GATEWAY_API_KEY"), - ) + return ai.ai_gateway.GatewayModel(model="anthropic/claude-opus-4.6") TOOLS: list[ai.Tool[..., Any]] = [talk_to_mothership] diff --git a/examples/multiagent-textual/server.py b/examples/multiagent-textual/server.py index 7e8957af..fd87276a 100644 --- a/examples/multiagent-textual/server.py +++ b/examples/multiagent-textual/server.py @@ -12,7 +12,6 @@ import asyncio import json -import os import warnings from typing import Any @@ -181,11 +180,7 @@ async def ws_endpoint(websocket: fastapi.WebSocket) -> None: await websocket.accept() print("Client connected") - llm = ai.anthropic.AnthropicModel( - model="anthropic/claude-haiku-4.5", - base_url="https://ai-gateway.vercel.sh", - api_key=os.environ.get("AI_GATEWAY_API_KEY"), - ) + llm = ai.ai_gateway.GatewayModel(model="anthropic/claude-opus-4.6") result = ai.run(multiagent, llm, "When will the robots take over?") diff --git a/examples/samples/agent.py b/examples/samples/agent.py index 215105e8..953aa04c 100644 --- a/examples/samples/agent.py +++ b/examples/samples/agent.py @@ -1,18 +1,13 @@ """Coding agent with local filesystem tools.""" import asyncio -import os import vercel_ai_sdk as ai import vercel_ai_sdk.agent as agent async def main() -> None: - llm = ai.openai.OpenAIModel( - model="anthropic/claude-sonnet-4-20250514", - base_url="https://ai-gateway.vercel.sh/v1", - api_key=os.environ.get("AI_GATEWAY_API_KEY"), - ) + llm = ai.ai_gateway.GatewayModel(model="anthropic/claude-opus-4.6") coding_agent = agent.Agent( model=llm, diff --git a/examples/samples/custom_loop.py b/examples/samples/custom_loop.py index 4e65d83a..9c6d9622 100644 --- a/examples/samples/custom_loop.py +++ b/examples/samples/custom_loop.py @@ -1,7 +1,6 @@ """Custom agent loop with @ai.stream and manual tool execution.""" import asyncio -import os from collections.abc import AsyncGenerator from typing import Any @@ -64,11 +63,7 @@ async def agent(llm: ai.LanguageModel, user_query: str) -> ai.StreamResult: async def main() -> None: - llm = ai.anthropic.AnthropicModel( - model="anthropic/claude-sonnet-4", - base_url="https://ai-gateway.vercel.sh", - api_key=os.environ.get("AI_GATEWAY_API_KEY"), - ) + llm = ai.ai_gateway.GatewayModel(model="anthropic/claude-opus-4.6") async for msg in ai.run( agent, llm, "What's the weather and population of New York and Los Angeles?" diff --git a/examples/samples/hooks.py b/examples/samples/hooks.py index a1c4c14f..4b11bc3f 100644 --- a/examples/samples/hooks.py +++ b/examples/samples/hooks.py @@ -1,7 +1,6 @@ """Human-in-the-loop approval hooks.""" import asyncio -import os import pydantic @@ -57,11 +56,7 @@ async def graph(llm: ai.LanguageModel, query: str) -> ai.StreamResult: async def main() -> None: - llm = ai.openai.OpenAIModel( - model="anthropic/claude-sonnet-4-20250514", - base_url="https://ai-gateway.vercel.sh/v1", - api_key=os.environ.get("AI_GATEWAY_API_KEY"), - ) + llm = ai.ai_gateway.GatewayModel(model="anthropic/claude-opus-4.6") async for msg in ai.run(graph, llm, "When will the robots take over?"): # Hook parts arrive as pending, waiting for resolution diff --git a/examples/samples/mcp_tools.py b/examples/samples/mcp_tools.py index cc83a0b0..1a2f58d1 100644 --- a/examples/samples/mcp_tools.py +++ b/examples/samples/mcp_tools.py @@ -30,11 +30,7 @@ async def context7_agent(llm: ai.LanguageModel, user_query: str) -> ai.StreamRes async def main() -> None: - llm = ai.openai.OpenAIModel( - model="openai/gpt-4.1", - base_url="https://ai-gateway.vercel.sh/v1", - api_key=os.environ.get("AI_GATEWAY_API_KEY"), - ) + llm = ai.ai_gateway.GatewayModel(model="anthropic/claude-opus-4.6") async for msg in ai.run( context7_agent, llm, "How do I create middleware in Next.js?" diff --git a/examples/samples/multiagent.py b/examples/samples/multiagent.py index 098b56c7..efb042d3 100644 --- a/examples/samples/multiagent.py +++ b/examples/samples/multiagent.py @@ -1,7 +1,6 @@ """Multi-agent: parallel execution with labels, then summarization.""" import asyncio -import os import vercel_ai_sdk as ai @@ -54,11 +53,7 @@ async def multiagent(llm: ai.LanguageModel, user_query: str) -> ai.StreamResult: async def main() -> None: - llm = ai.anthropic.AnthropicModel( - model="anthropic/claude-haiku-4.5", - base_url="https://ai-gateway.vercel.sh", - api_key=os.environ.get("AI_GATEWAY_API_KEY"), - ) + llm = ai.ai_gateway.GatewayModel(model="anthropic/claude-opus-4.6") async for msg in ai.run(multiagent, llm, "Process the number 5"): if msg.text_delta: diff --git a/examples/samples/simple.py b/examples/samples/simple.py index 69ba4196..ca8f09c4 100644 --- a/examples/samples/simple.py +++ b/examples/samples/simple.py @@ -1,5 +1,4 @@ import asyncio -import os import vercel_ai_sdk as ai @@ -21,11 +20,7 @@ async def agent(llm: ai.LanguageModel, user_query: str) -> ai.StreamResult: async def main() -> None: - llm = ai.openai.OpenAIModel( - model="anthropic/claude-sonnet-4", - base_url="https://ai-gateway.vercel.sh/v1", - api_key=os.environ.get("AI_GATEWAY_API_KEY"), - ) + llm = ai.ai_gateway.GatewayModel(model="anthropic/claude-opus-4.6") async for msg in ai.run(agent, llm, "When will the robots take over?"): if msg.text_delta: diff --git a/examples/samples/streaming_tool.py b/examples/samples/streaming_tool.py index 01eafd0b..bae3930c 100644 --- a/examples/samples/streaming_tool.py +++ b/examples/samples/streaming_tool.py @@ -1,7 +1,6 @@ """Streaming from inside a tool via runtime.put_message().""" import asyncio -import os import vercel_ai_sdk as ai @@ -34,11 +33,7 @@ async def agent(llm: ai.LanguageModel, user_query: str) -> ai.StreamResult: async def main() -> None: - llm = ai.openai.OpenAIModel( - model="anthropic/claude-sonnet-4", - base_url="https://ai-gateway.vercel.sh/v1", - api_key=os.environ.get("AI_GATEWAY_API_KEY"), - ) + llm = ai.ai_gateway.GatewayModel(model="anthropic/claude-opus-4.6") async for msg in ai.run(agent, llm, "When will the robots take over?"): if msg.label == "tool_progress": diff --git a/examples/samples/structured_output.py b/examples/samples/structured_output.py index 3392eb6d..b9875b2b 100644 --- a/examples/samples/structured_output.py +++ b/examples/samples/structured_output.py @@ -1,5 +1,4 @@ import asyncio -import os import pydantic @@ -15,18 +14,7 @@ class WeatherForecast(pydantic.BaseModel): async def main() -> None: - # OpenAI-compatible provider - # llm = ai.openai.OpenAIModel( - # model="anthropic/claude-opus-4.6", - # base_url="https://ai-gateway.vercel.sh/v1", - # api_key=os.environ.get("AI_GATEWAY_API_KEY"), - # ) - - # Anthropic provider - llm = ai.anthropic.AnthropicModel( - model="claude-opus-4-6", - api_key=os.environ.get("ANTHROPIC_API_KEY"), - ) + llm = ai.ai_gateway.GatewayModel(model="anthropic/claude-opus-4.6") messages = ai.make_messages( system="You are a weather assistant. Respond with realistic weather data.", diff --git a/examples/temporal-durable/activities.py b/examples/temporal-durable/activities.py index a7dd1922..22cc864d 100644 --- a/examples/temporal-durable/activities.py +++ b/examples/temporal-durable/activities.py @@ -8,13 +8,11 @@ from __future__ import annotations import dataclasses -import os from typing import Any import temporalio.activity import vercel_ai_sdk as ai -import vercel_ai_sdk.anthropic # ── Tool activities (one per tool, plain functions) ─────────────── @@ -48,11 +46,7 @@ class LLMCallResult: @temporalio.activity.defn(name="llm_call") async def llm_call_activity(params: LLMCallParams) -> LLMCallResult: """Call the LLM, drain the stream, return the final message.""" - llm = ai.anthropic.AnthropicModel( - model="anthropic/claude-sonnet-4", - base_url="https://ai-gateway.vercel.sh", - api_key=os.environ.get("AI_GATEWAY_API_KEY"), - ) + llm = ai.ai_gateway.GatewayModel(model="anthropic/claude-opus-4.6") messages = [ai.Message.model_validate(m) for m in params.messages] tools = [ai.ToolSchema.model_validate(t) for t in params.tool_schemas] diff --git a/src/vercel_ai_sdk/__init__.py b/src/vercel_ai_sdk/__init__.py index 314ce348..f9f02c03 100644 --- a/src/vercel_ai_sdk/__init__.py +++ b/src/vercel_ai_sdk/__init__.py @@ -1,4 +1,4 @@ -from . import ai_sdk_ui, anthropic, mcp, openai +from . import ai_gateway, ai_sdk_ui, anthropic, mcp, openai from .core.checkpoint import Checkpoint from .core.hooks import Hook, hook from .core.llm import LanguageModel @@ -61,6 +61,7 @@ "make_messages", "hook", # Submodules + "ai_gateway", "anthropic", "mcp", "openai", diff --git a/src/vercel_ai_sdk/ai_gateway/__init__.py b/src/vercel_ai_sdk/ai_gateway/__init__.py new file mode 100644 index 00000000..8939f87c --- /dev/null +++ b/src/vercel_ai_sdk/ai_gateway/__init__.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import os +from collections.abc import AsyncGenerator, Sequence +from typing import override + +import pydantic + +from .. import core +from ..anthropic import AnthropicModel +from ..openai import OpenAIModel + +_DEFAULT_BASE_URL = "https://ai-gateway.vercel.sh" + + +class GatewayModel(core.llm.LanguageModel): + """Vercel AI Gateway provider. + + Pre-configured for the Vercel AI Gateway with automatic routing: + Anthropic models use the native Anthropic API through the gateway, + except when structured output is requested (which requires the + OpenAI-compatible endpoint). All other models use the + OpenAI-compatible endpoint. + + Usage:: + + import vercel_ai_sdk as ai + + llm = ai.ai_gateway.GatewayModel(model="anthropic/claude-sonnet-4") + + Args: + model: Model identifier in ``provider/model`` format + (e.g., ``'anthropic/claude-sonnet-4'``, ``'openai/gpt-4.1'``) + api_key: API key for the gateway. Falls back to the + ``AI_GATEWAY_API_KEY`` environment variable. + base_url: Gateway base URL. Defaults to + ``https://ai-gateway.vercel.sh``. + thinking: Enable reasoning/thinking output. + budget_tokens: Max tokens for reasoning + (mutually exclusive with *reasoning_effort*). + reasoning_effort: Effort level for reasoning — ``'none'``, + ``'minimal'``, ``'low'``, ``'medium'``, ``'high'``, ``'xhigh'`` + (mutually exclusive with *budget_tokens*; OpenAI models only). + """ + + def __init__( + self, + model: str = "anthropic/claude-sonnet-4", + api_key: str | None = None, + base_url: str = _DEFAULT_BASE_URL, + thinking: bool = False, + budget_tokens: int | None = None, + reasoning_effort: str | None = None, + ) -> None: + self._model = model + self._api_key = api_key or os.environ.get("AI_GATEWAY_API_KEY") or "" + self._base_url = base_url.rstrip("/") + self._thinking = thinking + self._budget_tokens = budget_tokens + self._reasoning_effort = reasoning_effort + + def _is_anthropic_model(self) -> bool: + return self._model.startswith("anthropic/") + + def _make_openai(self) -> OpenAIModel: + return OpenAIModel( + model=self._model, + base_url=f"{self._base_url}/v1", + api_key=self._api_key, + thinking=self._thinking, + budget_tokens=self._budget_tokens, + reasoning_effort=self._reasoning_effort, + ) + + def _make_anthropic(self) -> AnthropicModel: + return AnthropicModel( + model=self._model, + base_url=self._base_url, + api_key=self._api_key, + thinking=self._thinking, + budget_tokens=self._budget_tokens or 10000, + ) + + def _resolve( + self, output_type: type[pydantic.BaseModel] | None + ) -> core.llm.LanguageModel: + """Pick delegate based on model and feature requirements. + + - Anthropic models without structured output use the native + Anthropic API (richer reasoning support, native tool format). + - Anthropic models *with* structured output use OpenAI-compat + (structured output via the Anthropic-native gateway endpoint + is not currently supported). + - All other models use OpenAI-compat. + """ + if self._is_anthropic_model() and output_type is None: + return self._make_anthropic() + return self._make_openai() + + @override + async def stream( + self, + messages: list[core.messages.Message], + tools: Sequence[core.tools.ToolLike] | None = None, + output_type: type[pydantic.BaseModel] | None = None, + ) -> AsyncGenerator[core.messages.Message]: + delegate = self._resolve(output_type) + async for msg in delegate.stream(messages, tools, output_type): + yield msg