vercel-labs · anbuzin · Mar 12, 2026 · Mar 6, 2026 · Mar 7, 2026 · Mar 9, 2026
diff --git a/.gitignore b/.gitignore
@@ -210,3 +210,5 @@ __marimo__/
 .reference
 .vercel
 .env*.local
+
+.DS_Store
diff --git a/examples/samples/media/image_edit.py b/examples/samples/media/image_edit.py
@@ -0,0 +1,63 @@
+"""Image editing with a dedicated image model.
+
+Demonstrates sending an input image to be edited/transformed by the
+image model. The input image is passed as a FilePart in the user
+message, and the model returns the edited version.
+
+Usage:
+    uv run examples/samples/media/image_edit.py
+"""
+
+import asyncio
+import base64
+import pathlib
+
+import vercel_ai_sdk as ai
+
+
+async def main() -> None:
+    model = ai.ai_gateway.GatewayImageModel(
+        model="openai/gpt-image-1",
+    )
+
+    # Load an existing image to use as input for editing.
+    # In practice you would load a real image file:
+    #   image_data = pathlib.Path("my_photo.png").read_bytes()
+    #   input_image = ai.FilePart.from_bytes(image_data, media_type="image/png")
+    input_image = ai.FilePart.from_url(
+        "https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/1200px-Cat03.jpg",
+        media_type="image/jpeg",
+    )
+
+    # Ask the model to transform the photo into anime style
+    msg = await model.generate(
+        [
+            ai.Message(
+                role="user",
+                parts=[
+                    ai.TextPart(
+                        text=(
+                            "Transform this photo into a soft watercolor "
+                            "anime style. Turn the cat into an anime girl "
+                            "with cat ears and a tail, sitting in the same "
+                            "pose. Add cherry blossom petals falling gently "
+                            "in the background."
+                        )
+                    ),
+                    input_image,
+                ],
+            )
+        ],
+        size="1024x1024",
+    )
+
+    print(f"Generated {len(msg.images)} edited image(s)")
+    for i, img in enumerate(msg.images):
+        filename = f"catgirl_edit_{i}.png"
+        data = img.data if isinstance(img.data, bytes) else base64.b64decode(img.data)
+        pathlib.Path(filename).write_bytes(data)
+        print(f"  {filename}: {img.media_type}, {len(data)} bytes")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/samples/media/image_gen_dedicated.py b/examples/samples/media/image_gen_dedicated.py
@@ -0,0 +1,53 @@
+"""Dedicated image generation model (Imagen 4).
+
+Uses the ImageModel interface to generate images via the AI Gateway's
+/image-model endpoint. Unlike language models, dedicated image models
+are optimized purely for image generation with parameters like size,
+aspect ratio, and seed.
+
+Usage:
+    uv run examples/samples/media/image_gen_dedicated.py
+"""
+
+import asyncio
+import base64
+import pathlib
+
+import vercel_ai_sdk as ai
+
+
+async def main() -> None:
+    model = ai.ai_gateway.GatewayImageModel(
+        model="google/imagen-4.0-generate-001",
+    )
+
+    # Generate two images of an anime girl character
+    msg = await model.generate(
+        ai.make_messages(
+            user=(
+                "Anime girl with twin tails and cat ears, wearing a "
+                "sailor school uniform, striking a victory pose in front "
+                "of a futuristic Tokyo skyline at night, neon lights "
+                "reflecting in her eyes, digital art style"
+            ),
+        ),
+        n=2,
+        aspect_ratio="16:9",
+    )
+
+    print(f"Generated {len(msg.images)} images")
+    for i, img in enumerate(msg.images):
+        filename = f"neko_girl_{i}.png"
+        data = img.data if isinstance(img.data, bytes) else base64.b64decode(img.data)
+        pathlib.Path(filename).write_bytes(data)
+        print(f"  {filename}: {img.media_type}, {len(data)} bytes")
+
+    if msg.usage:
+        print(
+            f"Usage: {msg.usage.input_tokens} input, "
+            f"{msg.usage.output_tokens} output tokens"
+        )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/samples/media/image_gen_inline.py b/examples/samples/media/image_gen_inline.py
@@ -0,0 +1,63 @@
+"""Inline image generation via a language model (Gemini 3 Pro Image).
+
+Models like Gemini 3 Pro Image and GPT-5 can generate images alongside
+text as part of their language model response. The images arrive as
+FileParts in the streamed Message.
+
+Usage:
+    uv run examples/samples/media/image_gen_inline.py
+"""
+
+import asyncio
+import base64
+import pathlib
+
+import vercel_ai_sdk as ai
+
+
+async def agent(llm: ai.LanguageModel, user_query: str) -> ai.StreamResult:
+    return await ai.stream_loop(
+        llm,
+        messages=ai.make_messages(
+            system=(
+                "You are an anime art assistant. When asked to draw or create "
+                "an image, generate it in a soft pastel anime style with "
+                "detailed backgrounds and expressive characters."
+            ),
+            user=user_query,
+        ),
+        tools=[],
+    )
+
+
+async def main() -> None:
+    # Gemini 3 Pro Image is a language model that can output images inline
+    llm = ai.ai_gateway.GatewayModel(model="google/gemini-3-pro-image")
+
+    prompt = (
+        "Draw an anime girl with long silver hair and violet eyes, "
+        "sitting in a field of cherry blossoms at sunset. "
+        "She's wearing a traditional kimono and reading a book."
+    )
+
+    async for msg in ai.run(agent, llm, prompt):
+        if msg.text_delta:
+            print(msg.text_delta, end="", flush=True)
+
+    print()
+
+    # The final message may contain both text and images
+    if msg.images:
+        for i, img in enumerate(msg.images):
+            filename = f"sakura_girl_{i}.png"
+            data = (
+                img.data if isinstance(img.data, bytes) else base64.b64decode(img.data)
+            )
+            pathlib.Path(filename).write_bytes(data)
+            print(f"Saved {filename} ({img.media_type}, {len(data)} bytes)")
+    else:
+        print("No images were generated in this response.")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/samples/media/multimodal.py b/examples/samples/media/multimodal.py
@@ -0,0 +1,42 @@
+"""Multimodal input example: send an image URL to the model.
+
+Usage:
+    uv run examples/samples/media/multimodal.py
+"""
+
+import asyncio
+
+import vercel_ai_sdk as ai
+
+IMAGE_URL = (
+    "https://4kwallpapers.com/images/wallpapers/hatsune-miku-3840x2160-15479.jpg"
+)
+
+
+async def agent(llm: ai.LanguageModel, user_query: str) -> ai.StreamResult:
+    return await ai.stream_loop(
+        llm,
+        messages=[
+            ai.Message(
+                role="user",
+                parts=[
+                    ai.TextPart(text=user_query),
+                    ai.FilePart.from_url(IMAGE_URL),
+                ],
+            )
+        ],
+        tools=[],
+    )
+
+
+async def main() -> None:
+    llm = ai.ai_gateway.GatewayModel(model="anthropic/claude-opus-4.6")
+
+    async for msg in ai.run(agent, llm, "What's in this image? Be concise."):
+        if msg.text_delta:
+            print(msg.text_delta, end="", flush=True)
+    print()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/samples/media/video_gen.py b/examples/samples/media/video_gen.py
@@ -0,0 +1,50 @@
+"""Video generation with Veo 3.
+
+Uses the VideoModel interface to generate videos via the AI Gateway's
+/video-model endpoint. The gateway handles the long-running generation
+process (which can take minutes) and returns the result via SSE.
+
+Usage:
+    uv run examples/samples/media/video_gen.py
+"""
+
+import asyncio
+import base64
+import pathlib
+
+import vercel_ai_sdk as ai
+
+
+async def main() -> None:
+    model = ai.ai_gateway.GatewayVideoModel(
+        model="google/veo-3.0-generate-001",
+    )
+
+    # Generate a short anime-style video clip
+    print("Generating video (this may take a minute or two)...")
+    msg = await model.generate(
+        ai.make_messages(
+            user=(
+                "An anime girl with long pink hair and a flowing white "
+                "dress stands on a hilltop at golden hour. A warm breeze "
+                "lifts her hair as she releases a paper lantern into the "
+                "sunset sky. The camera slowly pulls back to reveal dozens "
+                "of lanterns rising over a countryside village below. "
+                "Soft cel-shaded anime art style, warm palette."
+            ),
+        ),
+        aspect_ratio="16:9",
+        duration=8,
+    )
+
+    print(f"Generated {len(msg.videos)} video(s)")
+    for i, vid in enumerate(msg.videos):
+        ext = "mp4" if "mp4" in vid.media_type else "webm"
+        filename = f"lantern_girl_{i}.{ext}"
+        data = vid.data if isinstance(vid.data, bytes) else base64.b64decode(vid.data)
+        pathlib.Path(filename).write_bytes(data)
+        print(f"  {filename}: {vid.media_type}, {len(data)} bytes")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/src/vercel_ai_sdk/__init__.py b/src/vercel_ai_sdk/__init__.py
@@ -3,9 +3,11 @@
 from .core.checkpoint import Checkpoint, PendingHookInfo
 from .core.hooks import Hook, ToolApproval, hook
 from .core.llm import LanguageModel
+from .core.media import ImageModel, MediaModel, MediaResult, VideoModel
 
 # Re-export core types
 from .core.messages import (
+    FilePart,
     HookPart,
     Message,
     Part,
@@ -40,11 +42,16 @@
     "ToolPart",
     "ToolDelta",
     "ReasoningPart",
+    "FilePart",
     "ToolLike",
     "ToolSchema",
     "Tool",
     "Usage",
     "LanguageModel",
+    "MediaModel",
+    "MediaResult",
+    "ImageModel",
+    "VideoModel",
     "Runtime",
     "RunResult",
     "HookInfo",