Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,5 @@ __marimo__/
.reference
.vercel
.env*.local

.DS_Store
63 changes: 63 additions & 0 deletions examples/samples/media/image_edit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Image editing with a dedicated image model.

Demonstrates sending an input image to be edited/transformed by the
image model. The input image is passed as a FilePart in the user
message, and the model returns the edited version.

Usage:
uv run examples/samples/media/image_edit.py
"""

import asyncio
import base64
import pathlib

import vercel_ai_sdk as ai


async def main() -> None:
model = ai.ai_gateway.GatewayImageModel(
model="openai/gpt-image-1",
)

# Load an existing image to use as input for editing.
# In practice you would load a real image file:
# image_data = pathlib.Path("my_photo.png").read_bytes()
# input_image = ai.FilePart.from_bytes(image_data, media_type="image/png")
input_image = ai.FilePart.from_url(
"https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/1200px-Cat03.jpg",
media_type="image/jpeg",
)

# Ask the model to transform the photo into anime style
msg = await model.generate(
[
ai.Message(
role="user",
parts=[
ai.TextPart(
text=(
"Transform this photo into a soft watercolor "
"anime style. Turn the cat into an anime girl "
"with cat ears and a tail, sitting in the same "
"pose. Add cherry blossom petals falling gently "
"in the background."
)
),
input_image,
],
)
],
size="1024x1024",
)

print(f"Generated {len(msg.images)} edited image(s)")
for i, img in enumerate(msg.images):
filename = f"catgirl_edit_{i}.png"
data = img.data if isinstance(img.data, bytes) else base64.b64decode(img.data)
pathlib.Path(filename).write_bytes(data)
print(f" {filename}: {img.media_type}, {len(data)} bytes")


if __name__ == "__main__":
asyncio.run(main())
53 changes: 53 additions & 0 deletions examples/samples/media/image_gen_dedicated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""Dedicated image generation model (Imagen 4).

Uses the ImageModel interface to generate images via the AI Gateway's
/image-model endpoint. Unlike language models, dedicated image models
are optimized purely for image generation with parameters like size,
aspect ratio, and seed.

Usage:
uv run examples/samples/media/image_gen_dedicated.py
"""

import asyncio
import base64
import pathlib

import vercel_ai_sdk as ai


async def main() -> None:
model = ai.ai_gateway.GatewayImageModel(
model="google/imagen-4.0-generate-001",
)

# Generate two images of an anime girl character
msg = await model.generate(
ai.make_messages(
user=(
"Anime girl with twin tails and cat ears, wearing a "
"sailor school uniform, striking a victory pose in front "
"of a futuristic Tokyo skyline at night, neon lights "
"reflecting in her eyes, digital art style"
),
),
n=2,
aspect_ratio="16:9",
)

print(f"Generated {len(msg.images)} images")
for i, img in enumerate(msg.images):
filename = f"neko_girl_{i}.png"
data = img.data if isinstance(img.data, bytes) else base64.b64decode(img.data)
pathlib.Path(filename).write_bytes(data)
print(f" {filename}: {img.media_type}, {len(data)} bytes")

if msg.usage:
print(
f"Usage: {msg.usage.input_tokens} input, "
f"{msg.usage.output_tokens} output tokens"
)


if __name__ == "__main__":
asyncio.run(main())
63 changes: 63 additions & 0 deletions examples/samples/media/image_gen_inline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Inline image generation via a language model (Gemini 3 Pro Image).

Models like Gemini 3 Pro Image and GPT-5 can generate images alongside
text as part of their language model response. The images arrive as
FileParts in the streamed Message.

Usage:
uv run examples/samples/media/image_gen_inline.py
"""

import asyncio
import base64
import pathlib

import vercel_ai_sdk as ai


async def agent(llm: ai.LanguageModel, user_query: str) -> ai.StreamResult:
return await ai.stream_loop(
llm,
messages=ai.make_messages(
system=(
"You are an anime art assistant. When asked to draw or create "
"an image, generate it in a soft pastel anime style with "
"detailed backgrounds and expressive characters."
),
user=user_query,
),
tools=[],
)


async def main() -> None:
# Gemini 3 Pro Image is a language model that can output images inline
llm = ai.ai_gateway.GatewayModel(model="google/gemini-3-pro-image")

prompt = (
"Draw an anime girl with long silver hair and violet eyes, "
"sitting in a field of cherry blossoms at sunset. "
"She's wearing a traditional kimono and reading a book."
)

async for msg in ai.run(agent, llm, prompt):
if msg.text_delta:
print(msg.text_delta, end="", flush=True)

print()

# The final message may contain both text and images
if msg.images:
for i, img in enumerate(msg.images):
filename = f"sakura_girl_{i}.png"
data = (
img.data if isinstance(img.data, bytes) else base64.b64decode(img.data)
)
pathlib.Path(filename).write_bytes(data)
print(f"Saved {filename} ({img.media_type}, {len(data)} bytes)")
else:
print("No images were generated in this response.")


if __name__ == "__main__":
asyncio.run(main())
42 changes: 42 additions & 0 deletions examples/samples/media/multimodal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Multimodal input example: send an image URL to the model.

Usage:
uv run examples/samples/media/multimodal.py
"""

import asyncio

import vercel_ai_sdk as ai

IMAGE_URL = (
"https://4kwallpapers.com/images/wallpapers/hatsune-miku-3840x2160-15479.jpg"
)


async def agent(llm: ai.LanguageModel, user_query: str) -> ai.StreamResult:
return await ai.stream_loop(
llm,
messages=[
ai.Message(
role="user",
parts=[
ai.TextPart(text=user_query),
ai.FilePart.from_url(IMAGE_URL),
],
)
],
tools=[],
)


async def main() -> None:
llm = ai.ai_gateway.GatewayModel(model="anthropic/claude-opus-4.6")

async for msg in ai.run(agent, llm, "What's in this image? Be concise."):
if msg.text_delta:
print(msg.text_delta, end="", flush=True)
print()


if __name__ == "__main__":
asyncio.run(main())
50 changes: 50 additions & 0 deletions examples/samples/media/video_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""Video generation with Veo 3.

Uses the VideoModel interface to generate videos via the AI Gateway's
/video-model endpoint. The gateway handles the long-running generation
process (which can take minutes) and returns the result via SSE.

Usage:
uv run examples/samples/media/video_gen.py
"""

import asyncio
import base64
import pathlib

import vercel_ai_sdk as ai


async def main() -> None:
model = ai.ai_gateway.GatewayVideoModel(
model="google/veo-3.0-generate-001",
)

# Generate a short anime-style video clip
print("Generating video (this may take a minute or two)...")
msg = await model.generate(
ai.make_messages(
user=(
"An anime girl with long pink hair and a flowing white "
"dress stands on a hilltop at golden hour. A warm breeze "
"lifts her hair as she releases a paper lantern into the "
"sunset sky. The camera slowly pulls back to reveal dozens "
"of lanterns rising over a countryside village below. "
"Soft cel-shaded anime art style, warm palette."
),
),
aspect_ratio="16:9",
duration=8,
)

print(f"Generated {len(msg.videos)} video(s)")
for i, vid in enumerate(msg.videos):
ext = "mp4" if "mp4" in vid.media_type else "webm"
filename = f"lantern_girl_{i}.{ext}"
data = vid.data if isinstance(vid.data, bytes) else base64.b64decode(vid.data)
pathlib.Path(filename).write_bytes(data)
print(f" {filename}: {vid.media_type}, {len(data)} bytes")


if __name__ == "__main__":
asyncio.run(main())
7 changes: 7 additions & 0 deletions src/vercel_ai_sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
from .core.checkpoint import Checkpoint, PendingHookInfo
from .core.hooks import Hook, ToolApproval, hook
from .core.llm import LanguageModel
from .core.media import ImageModel, MediaModel, MediaResult, VideoModel

# Re-export core types
from .core.messages import (
FilePart,
HookPart,
Message,
Part,
Expand Down Expand Up @@ -40,11 +42,16 @@
"ToolPart",
"ToolDelta",
"ReasoningPart",
"FilePart",
"ToolLike",
"ToolSchema",
"Tool",
"Usage",
"LanguageModel",
"MediaModel",
"MediaResult",
"ImageModel",
"VideoModel",
"Runtime",
"RunResult",
"HookInfo",
Expand Down
Loading
Loading