diff --git a/README.md b/README.md index 8ea0486..8f9fdb2 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ This repo contains a modern fullstack cookbook app showcasing the agentic AI cap ## Requirements -- **Python** 3.11 or higher; we recommend [uv 0.7+](https://github.com/astral-sh/uv) for working with Python -- **Node.js** 22.x or higher; we recommend [pnpm 10.17+](https://pnpm.io/installation) for working with Node.js +- **Python** 3.11 or higher; we recommend [uv 0.7+](https://github.com/astral-sh/uv) for working with Python +- **Node.js** 22.x or higher; we recommend [pnpm 10.17+](https://pnpm.io/installation) for working with Node.js ## Quick Start @@ -44,6 +44,7 @@ COOKBOOK_ENDPOINTS='[ cd backend && uv sync cd .. cd frontend && npm install +cd .. ``` ### Run the app @@ -131,21 +132,21 @@ backend/ #### Backend Features & Technologies -- FastAPI - Modern Python web framework -- uvicorn - ASGI server -- uv - Fast Python package manager -- openai - OpenAI Python client for LLM proxying +- FastAPI - Modern Python web framework +- uvicorn - ASGI server +- uv - Fast Python package manager +- openai - OpenAI Python client for LLM proxying #### Backend Routes -- `GET /api/health` - Health check -- `GET /api/recipes` - List available recipe slugs -- `GET /api/endpoints` - List configured LLM endpoints -- `GET /api/models?endpointId=xxx` - List models for endpoint -- `POST /api/recipes/multiturn-chat` - Multi-turn chat endpoint -- `POST /api/recipes/batch-text-classification` - Text Classification endpoint -- `POST /api/recipes/image-captioning` - Image captioning endpoint -- `GET /api/recipes/{slug}/code` - Get recipe backend source code +- `GET /api/health` - Health check +- `GET /api/recipes` - List available recipe slugs +- `GET /api/endpoints` - List configured LLM endpoints +- `GET /api/models?endpointId=xxx` - List models for endpoint +- `POST /api/recipes/multiturn-chat` - Multi-turn chat endpoint +- `POST /api/recipes/batch-text-classification` - Text Classification endpoint +- `POST /api/recipes/image-captioning` - Image captioning endpoint +- `GET /api/recipes/{slug}/code` - Get recipe backend source code ### React Frontend @@ -166,26 +167,26 @@ frontend/ #### Frontend Features & Technologies -- **React 18 + TypeScript** - Type-safe component development -- **Vite** - Lightning-fast dev server and optimized production builds -- **React Router v7** - Auto-generated routing with lazy loading -- **Mantine v7** - Comprehensive UI component library with dark/light themes -- **SWR** - Lightweight data fetching with automatic caching -- **Vercel AI SDK** - Streaming chat UI with token-by-token responses -- **MDX** - Markdown documentation with JSX support -- **Recipe Registry** - Single source of truth for all recipes (pure data + React components) +- **React 18 + TypeScript** - Type-safe component development +- **Vite** - Lightning-fast dev server and optimized production builds +- **React Router v7** - Auto-generated routing with lazy loading +- **Mantine v7** - Comprehensive UI component library with dark/light themes +- **SWR** - Lightweight data fetching with automatic caching +- **Vercel AI SDK** - Streaming chat UI with token-by-token responses +- **MDX** - Markdown documentation with JSX support +- **Recipe Registry** - Single source of truth for all recipes (pure data + React components) #### Frontend Routes -- `/` - Recipe index -- `/:slug` - Recipe demo (interactive UI) -- `/:slug/readme` - Recipe documentation -- `/:slug/code` - Recipe source code view +- `/` - Recipe index +- `/:slug` - Recipe demo (interactive UI) +- `/:slug/readme` - Recipe documentation +- `/:slug/code` - Recipe source code view ## Documentation -- [Contributing Guide](docs/contributing.md) - Architecture, patterns, and how to add recipes -- [Docker Deployment Guide](docs/docker.md) - Container deployment with MAX +- [Contributing Guide](docs/contributing.md) - Architecture, patterns, and how to add recipes +- [Docker Deployment Guide](docs/docker.md) - Container deployment with MAX ## License diff --git a/backend/src/main.py b/backend/src/main.py index fb00486..be95e77 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -8,7 +8,7 @@ from fastapi.staticfiles import StaticFiles from src.core import endpoints, models -from src.recipes import batch_text_classification, image_captioning, multiturn_chat +from src.recipes import batch_text_classification, image_captioning, image_generation, multiturn_chat # Load environment variables from .env.local env_path = Path(__file__).parent.parent / ".env.local" @@ -38,6 +38,7 @@ app.include_router(batch_text_classification.router) app.include_router(multiturn_chat.router) app.include_router(image_captioning.router) +app.include_router(image_generation.router) @app.get("/api/health") diff --git a/backend/src/recipes/image_generation.py b/backend/src/recipes/image_generation.py new file mode 100644 index 0000000..82a526b --- /dev/null +++ b/backend/src/recipes/image_generation.py @@ -0,0 +1,225 @@ +""" +Image Generation with Text-to-Image Diffusion Models + +This recipe demonstrates how to generate images from text prompts using +OpenAI-compatible endpoints with Modular MAX's FLUX.2 diffusion models. +Users provide a text description and optional generation parameters, and +receive a generated image with performance metrics. + +Key features: +- Text-to-image generation: Create images from natural language descriptions +- Configurable parameters: Resolution, inference steps, guidance scale +- Performance metrics: Total generation duration tracking +- Negative prompts: Specify content to avoid in generated images +- OpenAI-compatible: Works with any endpoint supporting the images API + +Architecture: +- FastAPI endpoint: Receives generation requests with prompt and parameters +- AsyncOpenAI client: Handles image generation via client.responses.create() +- MAX-specific parameters: Passed via extra_body.provider_options.image +- Performance tracking: Measures total generation time in milliseconds + +Request Format: +- endpointId: Which LLM endpoint to use +- modelName: Which model to use (e.g., "flux2-dev-fp4") +- prompt: Text description of the image to generate +- width/height: Output image dimensions (default 1024x1024) +- steps: Number of denoising iterations (default 28) +- guidance_scale: Prompt adherence strength (default 3.5) +- negative_prompt: Content to avoid in the generated image + +Response Format: +- JSON object with base64-encoded image data and generation metrics +- Fields: image_b64, width, height, duration +""" + +import time + +import httpx +from fastapi import APIRouter, HTTPException +from fastapi.responses import Response +from pydantic import BaseModel + +from ..core.endpoints import get_cached_endpoint +from ..core.code_reader import read_source_file + +router = APIRouter(prefix="/api/recipes", tags=["recipes"]) + + +# ============================================================================ +# Types and Models +# ============================================================================ + + +class ImageGenerationRequest(BaseModel): + """ + Request body for image generation. + + The frontend sends the endpoint ID, model name, a text prompt, and + optional generation parameters. The backend looks up the actual API + credentials from the endpoint ID and generates the image. + """ + endpointId: str + modelName: str + prompt: str + width: int = 1024 + height: int = 1024 + steps: int = 28 + guidance_scale: float = 3.5 + negative_prompt: str = "" + + +class ImageGenerationResult(BaseModel): + """ + Result of generating an image from a text prompt. + + Contains the base64-encoded image data along with the dimensions + and performance metrics (duration in milliseconds). + """ + image_b64: str + width: int + height: int + duration: int + + +# ============================================================================ +# API Endpoints +# ============================================================================ + + +@router.post("/image-generation") +async def image_generation(request: ImageGenerationRequest) -> ImageGenerationResult: + """ + Image generation endpoint using OpenAI-compatible images API. + + Accepts a text prompt and generation parameters, then returns a + base64-encoded image along with performance metrics. + + The endpoint uses client.responses.create() from the OpenAI SDK, + which maps to the /v1/responses API (Modular Open Responses standard). + MAX-specific parameters are passed via extra_body.provider_options.image. + + Args: + request: ImageGenerationRequest with prompt and generation parameters + + Returns: + ImageGenerationResult with base64 image data, dimensions, and duration + + Raises: + HTTPException: If endpoint not found, invalid configuration, or + upstream API failure + """ + # Get endpoint configuration from cache. The endpoint ID comes from the + # frontend and maps to a full endpoint configuration (baseUrl, apiKey) + # stored in .env.local. This keeps API keys secure on the server side. + endpoint = get_cached_endpoint(request.endpointId) + if not endpoint: + raise HTTPException( + status_code=400, + detail=f"Endpoint not found: {request.endpointId}" + ) + + base_url = endpoint.get("baseUrl") + api_key = endpoint.get("apiKey") + + if not base_url or not api_key: + raise HTTPException( + status_code=500, + detail="Invalid endpoint configuration: missing baseUrl or apiKey" + ) + + # Build provider_options for MAX-specific generation parameters. + # The Modular MAX API uses the Open Responses standard (/v1/responses) + # with image parameters nested under provider_options.image. + image_options: dict = { + "width": request.width, + "height": request.height, + "steps": request.steps, + "guidance_scale": request.guidance_scale, + } + if request.negative_prompt: + image_options["negative_prompt"] = request.negative_prompt + + payload = { + "model": request.modelName, + "input": request.prompt, + "provider_options": {"image": image_options}, + } + + try: + # Start timing for duration measurement + start_time = time.time() + + # Use httpx directly to avoid OpenAI SDK response parsing incompatibility + # with the Modular Open Responses API (/v1/responses). + async with httpx.AsyncClient() as http_client: + resp = await http_client.post( + f"{base_url.rstrip('/')}/responses", + json=payload, + headers={"Authorization": f"Bearer {api_key}"}, + timeout=300, + ) + + if resp.status_code != 200: + raise HTTPException( + status_code=502, + detail=f"Upstream error {resp.status_code}: {resp.text}", + ) + + # Calculate total generation duration in milliseconds + duration_ms = int((time.time() - start_time) * 1000) + + # Extract base64 image data from output[0].content[0].image_data + data = resp.json() + try: + image_b64 = data["output"][0]["content"][0]["image_data"] + except (KeyError, IndexError, TypeError): + raise HTTPException( + status_code=502, + detail="No image data in response from upstream endpoint" + ) + + if not image_b64: + raise HTTPException( + status_code=502, + detail="No image data in response from upstream endpoint" + ) + + return ImageGenerationResult( + image_b64=image_b64, + width=request.width, + height=request.height, + duration=duration_ms, + ) + + except HTTPException: + # Re-raise our own HTTP exceptions (like the empty response check above) + raise + except Exception as error: + # Catch upstream API errors (connection failures, rate limits, etc.) + # and return a 502 to indicate the upstream service failed. + raise HTTPException( + status_code=502, + detail=f"Image generation failed: {str(error)}" + ) + + +@router.get("/image-generation/code") +async def get_image_generation_code(): + """ + Get the source code for the image generation recipe. + + Returns the Python source code of this file as plain text. + This enables the frontend's "Code" view to display the backend implementation. + """ + try: + # Use __file__ to get the path to this source file, then read it. + # This allows the frontend to display the actual backend code for + # educational purposes. + code_data = read_source_file(__file__) + return Response(content=code_data, media_type="text/plain") + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Error reading source code: {str(e)}" + ) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 05b8b81..5fca3d6 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -35,6 +35,7 @@ "@types/react-dom": "^19.1.9", "@vitejs/plugin-react": "^5.0.4", "@vitest/ui": "^4.0.6", + "baseline-browser-mapping": "^2.10.20", "chokidar": "^4.0.3", "concurrently": "^9.2.1", "eslint": "^9.36.0", @@ -244,6 +245,7 @@ "integrity": "sha512-2BCOP7TN8M+gVDj7/ht3hsaO/B/n5oDbiAyyvnRlNOs+u1o+JWNYTQrmpuNp1/Wq2gcFrI01JAW+paEKDMx/CA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.3", @@ -637,6 +639,7 @@ } ], "license": "MIT", + "peer": true, "engines": { "node": ">=18" }, @@ -680,6 +683,7 @@ } ], "license": "MIT", + "peer": true, "engines": { "node": ">=18" } @@ -1477,6 +1481,7 @@ "resolved": "https://registry.npmjs.org/@mantine/core/-/core-7.17.8.tgz", "integrity": "sha512-42sfdLZSCpsCYmLCjSuntuPcDg3PLbakSmmYfz5Auea8gZYLr+8SS5k647doVu0BRAecqYOytkX2QC5/u/8VHw==", "license": "MIT", + "peer": true, "dependencies": { "@floating-ui/react": "^0.26.28", "clsx": "^2.1.1", @@ -1511,6 +1516,7 @@ "resolved": "https://registry.npmjs.org/@mantine/hooks/-/hooks-7.17.8.tgz", "integrity": "sha512-96qygbkTjRhdkzd5HDU8fMziemN/h758/EwrFu7TlWrEP10Vw076u+Ap/sG6OT4RGPZYYoHrTlT+mkCZblWHuw==", "license": "MIT", + "peer": true, "peerDependencies": { "react": "^18.x || ^19.x" } @@ -2177,8 +2183,7 @@ "resolved": "https://registry.npmjs.org/@types/aria-query/-/aria-query-5.0.4.tgz", "integrity": "sha512-rfT93uj5s0PRL7EzccGMs3brplhcrghnDoV26NqKhCAS1hVo+WdNsPvE/yb6ilfr5hi2MEk6d5EWJTKdxg8jVw==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/@types/babel__core": { "version": "7.20.5", @@ -2575,6 +2580,7 @@ "integrity": "sha512-MKNwXh3seSK8WurXF7erHPJ2AONmMwkI7zAMrXZDPIru8jRqkk6rGDBVbw4mLwfqA+ZZliiDPg05JQ3uW66tKQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~7.16.0" } @@ -2584,6 +2590,7 @@ "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.2.tgz", "integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==", "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -2594,6 +2601,7 @@ "integrity": "sha512-9KQPoO6mZCi7jcIStSnlOWn2nEF3mNmyr3rIAsGnAbQKYbRLyqmeSc39EVgtxXVia+LMT8j3knZLAZAh+xLmrw==", "dev": true, "license": "MIT", + "peer": true, "peerDependencies": { "@types/react": "^19.2.0" } @@ -2657,6 +2665,7 @@ "integrity": "sha512-BnOroVl1SgrPLywqxyqdJ4l3S2MsKVLDVxZvjI1Eoe8ev2r3kGDo+PcMihNmDE+6/KjkTubSJnmqGZZjQSBq/g==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.46.2", "@typescript-eslint/types": "8.46.2", @@ -3021,6 +3030,7 @@ "integrity": "sha512-1ekpBsYNUm0Xv/0YsTvoSRmiRkmzz9Pma7qQ3Ui76sg2gwp2/ewSWqx4W/HfaN5dF0E8iBbidFo1wGaeqXYIrQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@vitest/utils": "4.0.6", "fflate": "^0.8.2", @@ -3056,6 +3066,7 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -3197,13 +3208,16 @@ "license": "MIT" }, "node_modules/baseline-browser-mapping": { - "version": "2.8.18", - "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.18.tgz", - "integrity": "sha512-UYmTpOBwgPScZpS4A+YbapwWuBwasxvO/2IOHArSsAhL/+ZdmATBXTex3t+l2hXwLVYK382ibr/nKoY9GKe86w==", + "version": "2.10.20", + "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.20.tgz", + "integrity": "sha512-1AaXxEPfXT+GvTBJFuy4yXVHWJBXa4OdbIebGN/wX5DlsIkU0+wzGnd2lOzokSk51d5LUmqjgBLRLlypLUqInQ==", "dev": true, "license": "Apache-2.0", "bin": { - "baseline-browser-mapping": "dist/cli.js" + "baseline-browser-mapping": "dist/cli.cjs" + }, + "engines": { + "node": ">=6.0.0" } }, "node_modules/bidi-js": { @@ -3260,6 +3274,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.9", "caniuse-lite": "^1.0.30001746", @@ -3397,6 +3412,7 @@ "resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-11.0.3.tgz", "integrity": "sha512-ci2iJH6LeIkvP9eJW6gpueU8cnZhv85ELY8w8WiFtNjMHA5ad6pQLaJo9mEly/9qUyCpvqX8/POVUTf18/HFdw==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@chevrotain/cst-dts-gen": "11.0.3", "@chevrotain/gast": "11.0.3", @@ -3661,6 +3677,7 @@ "resolved": "https://registry.npmjs.org/cytoscape/-/cytoscape-3.33.1.tgz", "integrity": "sha512-iJc4TwyANnOGR1OmWhsS9ayRS3s+XQ185FmuHObThD+5AeJCakAAbWv8KimMTt08xCCLNgneQwFp+JRJOr9qGQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10" } @@ -4070,6 +4087,7 @@ "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "license": "ISC", + "peer": true, "engines": { "node": ">=12" } @@ -4270,8 +4288,7 @@ "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.5.16.tgz", "integrity": "sha512-X7BJ2yElsnOJ30pZF4uIIDfBEVgF4XEBxL9Bxhy6dnrm5hkzqmsWHGTiHqRiITNhMyFLyAiWndIJP7Z1NTteDg==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/dompurify": { "version": "3.3.0", @@ -4418,6 +4435,7 @@ "integrity": "sha512-t5aPOpmtJcZcz5UJyY2GbvpDlsK5E8JqRqoKtfiKE3cNh437KIqfJr3A3AKf5k64NPx6d0G3dno6XDY05PqPtw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -5756,7 +5774,6 @@ "integrity": "sha512-h5bgJWpxJNswbU7qCrV0tIKQCaS3blPDrqKWx+QxzuzL1zGUzij9XCWLrSLsJPu5t+eWA/ycetzYAO5IOMcWAQ==", "dev": true, "license": "MIT", - "peer": true, "bin": { "lz-string": "bin/bin.js" } @@ -7308,6 +7325,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", @@ -7494,7 +7512,6 @@ "integrity": "sha512-Qb1gy5OrP5+zDf2Bvnzdl3jsTf1qXVMazbvCoKhtKqVs4/YK4ozX4gKQJJVyNe+cajNPn0KoC0MC3FUmaHWEmQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "ansi-regex": "^5.0.1", "ansi-styles": "^5.0.0", @@ -7510,7 +7527,6 @@ "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=10" }, @@ -7523,8 +7539,7 @@ "resolved": "https://registry.npmjs.org/react-is/-/react-is-17.0.2.tgz", "integrity": "sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/pretty-ms": { "version": "9.3.0", @@ -7614,6 +7629,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.0.tgz", "integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -7623,6 +7639,7 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.0.tgz", "integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==", "license": "MIT", + "peer": true, "dependencies": { "scheduler": "^0.27.0" }, @@ -8144,6 +8161,7 @@ "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.52.5.tgz", "integrity": "sha512-3GuObel8h7Kqdjt0gxkEzaifHTqLVW56Y/bjN7PSQtkKr0w3V/QYSdt6QWYtd7A1xUtYQigtdUfgj1RvWVtorw==", "license": "MIT", + "peer": true, "dependencies": { "@types/estree": "1.0.8" }, @@ -8644,6 +8662,7 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -8819,6 +8838,7 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -9198,6 +9218,7 @@ "integrity": "sha512-uzcxnSDVjAopEUjljkWh8EIrg6tlzrjFUfMcR1EVsRDGwf/ccef0qQPRyOrROwhrTDaApueq+ja+KLPlzR/zdg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -9291,6 +9312,7 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -9304,6 +9326,7 @@ "integrity": "sha512-gR7INfiVRwnEOkCk47faros/9McCZMp5LM+OMNWGLaDBSvJxIzwjgNFufkuePBNaesGRnLmNfW+ddbUJRZn0nQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@vitest/expect": "4.0.6", "@vitest/mocker": "4.0.6", diff --git a/frontend/package.json b/frontend/package.json index f89f881..f946dee 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -50,6 +50,7 @@ "@types/react-dom": "^19.1.9", "@vitejs/plugin-react": "^5.0.4", "@vitest/ui": "^4.0.6", + "baseline-browser-mapping": "^2.10.20", "chokidar": "^4.0.3", "concurrently": "^9.2.1", "eslint": "^9.36.0", diff --git a/frontend/src/components/Navbar.tsx b/frontend/src/components/Navbar.tsx index e55ff8e..ebdf514 100644 --- a/frontend/src/components/Navbar.tsx +++ b/frontend/src/components/Navbar.tsx @@ -1,6 +1,5 @@ import { Link, useLocation } from 'react-router-dom' import { - Accordion, Anchor, AppShell, Box, @@ -9,14 +8,13 @@ import { Stack, Text, } from '@mantine/core' -import { IconChevronRight, IconPlus } from '@tabler/icons-react' +import { IconChevronRight } from '@tabler/icons-react' import { iconStroke } from '~/lib/theme' import { isRecipeImplemented } from '~/recipes/registry' import { useEndpointFromQuery } from '~/lib/hooks' import { SelectEndpoint } from './SelectEndpoint' import { SelectModel } from './SelectModel' import chapters from '~/lib/chapters' -import classes from './Navbar.module.css' import type { NavItem } from '~/lib/types' interface NavItemProps { @@ -27,8 +25,8 @@ interface NavItemProps { function NavItem({ item, currentRecipe }: NavItemProps) { if (isRecipeImplemented(item.slug)) { return ( - -
  • +
  • + {item.title} @@ -39,13 +37,13 @@ function NavItem({ item, currentRecipe }: NavItemProps) { )} -
  • - {currentRecipe === item.slug && ( - - - - )} -
    + {currentRecipe === item.slug && ( + + + + )} + + ) } return ( @@ -71,31 +69,16 @@ export function Navbar() { - - } - classNames={{ chevron: classes.chevron }} - > - {chapters.sections.map((section) => ( - - {section.title} - -
      - {section.items.map((item) => ( - - ))} -
    -
    -
    + +
      + {chapters.sections[0].items.map((item) => ( + ))} - +
    ) diff --git a/frontend/src/recipes/components.ts b/frontend/src/recipes/components.ts index b374d21..91d0b6c 100644 --- a/frontend/src/recipes/components.ts +++ b/frontend/src/recipes/components.ts @@ -26,6 +26,7 @@ export const recipeComponents: Record< ), 'multiturn-chat': lazyComponentExport(() => import('./multiturn-chat/ui')), 'image-captioning': lazyComponentExport(() => import('./image-captioning/ui')), + 'image-generation': lazyComponentExport(() => import('./image-generation/ui')), } /** @@ -35,6 +36,7 @@ export const readmeComponents: Record 'text-classification': lazy(() => import('./text-classification/README.mdx')), 'multiturn-chat': lazy(() => import('./multiturn-chat/README.mdx')), 'image-captioning': lazy(() => import('./image-captioning/README.mdx')), + 'image-generation': lazy(() => import('./image-generation/README.mdx')), } /** diff --git a/frontend/src/recipes/image-generation/README.mdx b/frontend/src/recipes/image-generation/README.mdx new file mode 100644 index 0000000..a222307 --- /dev/null +++ b/frontend/src/recipes/image-generation/README.mdx @@ -0,0 +1,131 @@ +## Architecture + +**Frontend** (`ui.tsx`): Form-based UI with `useSWRMutation` for non-streaming POST + +- Text prompt input with configurable generation parameters +- Collapsible advanced settings (resolution, steps, guidance scale, negative prompt) +- Displays generated image with performance metrics and download option + +**Backend** (`image_generation.py`): FastAPI with OpenAI-compatible images API + +- Uses `AsyncOpenAI.images.generate()` for image generation +- Passes MAX-specific parameters (steps, guidance_scale) via `extra_body` +- Returns base64-encoded image data with generation duration + +## Key Implementation Details + +**Image Generation** (`image_generation.py:88-128`): + +- Creates `AsyncOpenAI` client with cached endpoint credentials +- Calls `client.images.generate()` with `response_format="b64_json"` +- MAX-specific params (steps, guidance_scale, negative_prompt) sent via `extra_body` +- Measures total generation duration in milliseconds + +**Frontend State** (`ui.tsx:88-99`): + +- Prompt and parameter state managed with `useState` hooks +- `useSWRMutation` handles the POST request lifecycle (loading, error, result) +- Result contains base64 image data displayed inline + +**Download** (`ui.tsx:119-126`): + +- Creates a data URL from the base64 response +- Triggers browser download as PNG file + +### Why Non-Streaming + +- **Complete result**: Image generation returns a single image, not progressive data +- **Simple protocol**: Standard JSON request/response, no streaming complexity +- **Clear loading state**: Spinner while generating, then full image appears +- **Framework-standard**: Uses `useSWRMutation` like text classification recipe + +### Generation Parameters + +- **Steps** (default 28): Number of denoising iterations. More steps = higher quality but slower +- **Guidance Scale** (default 3.5): How closely to follow the prompt. Higher = more literal +- **Width/Height** (default 1024x1024): Output dimensions in pixels, multiples of 16 +- **Negative Prompt**: Describes what to avoid in the generated image + +### File References + +- Frontend: `frontend/src/recipes/image-generation/ui.tsx` +- Backend: `backend/src/recipes/image_generation.py` +- API fetcher: `generateImage` function in ui.tsx +- Parameter handling: `extra_body` in image_generation.py + +## Protocol Flow + +1. Frontend sends `POST /api/recipes/image-generation` with: + + ```json + { + "endpointId": "max-local", + "modelName": "flux2-dev-fp4", + "prompt": "A serene mountain landscape at sunset", + "width": 1024, + "height": 1024, + "steps": 28, + "guidance_scale": 3.5, + "negative_prompt": "" + } + ``` + +2. Backend generates image (`image_generation.py:108-118`): + + ```python + response = await client.images.generate( + model=request.modelName, + prompt=request.prompt, + n=1, + size=f"{request.width}x{request.height}", + response_format="b64_json", + extra_body=extra_body, + ) + ``` + +3. Backend returns JSON with base64 image and metrics: + + ```json + { + "image_b64": "/9j/4AAQSkZJRg...", + "width": 1024, + "height": 1024, + "duration": 823 + } + ``` + +4. Frontend displays the generated image with duration badge + +## API Reference + +**Endpoint:** `POST /api/recipes/image-generation` + +**Content-Type:** `application/json` + +**Request Body:** + +```json +{ + "endpointId": "max-local", + "modelName": "flux2-dev-fp4", + "prompt": "A photorealistic image of a golden retriever playing in autumn leaves", + "width": 1024, + "height": 1024, + "steps": 28, + "guidance_scale": 3.5, + "negative_prompt": "blurry, low quality" +} +``` + +**Response:** + +JSON object with base64-encoded image data and metrics: + +```json +{ + "image_b64": "", + "width": 1024, + "height": 1024, + "duration": 823 +} +``` diff --git a/frontend/src/recipes/image-generation/ui.tsx b/frontend/src/recipes/image-generation/ui.tsx new file mode 100644 index 0000000..43cfea7 --- /dev/null +++ b/frontend/src/recipes/image-generation/ui.tsx @@ -0,0 +1,343 @@ +/** + * Image Generation with Text-to-Image Diffusion Models + * + * This recipe demonstrates image generation from text prompts using + * OpenAI-compatible endpoints with Modular MAX's FLUX.2 diffusion models. + * Users provide a text description and optional generation parameters, + * and receive a generated image with performance metrics. + */ + +import { useState } from 'react' +import prettyMilliseconds from 'pretty-ms' +import useSWRMutation from 'swr/mutation' +import { + Stack, + Textarea, + Select, + ScrollArea, + Button, + Group, + Alert, + Divider, + Text, + Image, + Paper, + Badge, + Collapse, + Space, + UnstyledButton, +} from '@mantine/core' +import { IconExclamationCircle, IconDownload, IconSettings } from '@tabler/icons-react' +import type { Endpoint, Model } from '~/lib/types' + +// ============================================================================ +// Types and interfaces +// ============================================================================ + +/** + * Request body for image generation + */ +interface ImageGenerationRequestBody { + endpointId: string + modelName: string + prompt: string + width: number + height: number + steps: number + guidance_scale: number + negative_prompt: string +} + +/** + * Response from the backend after image generation + */ +interface ImageGenerationResult { + image_b64: string + width: number + height: number + duration: number +} + +interface RecipeProps { + endpoint: Endpoint | null + model: Model | null + pathname: string +} + +// ============================================================================ +// API fetchers +// ============================================================================ + +/** + * Fetcher function for image generation mutation + */ +async function generateImage( + url: string, + { arg }: { arg: ImageGenerationRequestBody } +): Promise { + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(arg), + }) + + if (!response.ok) { + const errorText = await response.text() + throw new Error(errorText || 'Image generation failed') + } + + return response.json() +} + +// ============================================================================ +// Presets +// ============================================================================ + +const SIZE_PRESETS = [ + { value: '512x512', label: '512 × 512 — Small' }, + { value: '768x768', label: '768 × 768 — Medium' }, + { value: '1024x1024', label: '1024 × 1024 — Large (default)' }, + { value: '1024x576', label: '1024 × 576 — Landscape' }, + { value: '576x1024', label: '576 × 1024 — Portrait' }, +] + +const STEPS_PRESETS = [ + { value: '10', label: '10 — Fast' }, + { value: '20', label: '20 — Balanced' }, + { value: '28', label: '28 — Quality (default)' }, +] + +function parseSizePreset(value: string): { width: number; height: number } { + const [w, h] = value.split('x').map(Number) + return { width: w, height: h } +} + +// ============================================================================ +// Main recipe component +// ============================================================================ + +/** + * Main recipe component: handles prompt input, generation parameters, + * image generation, and result display with download. + */ +export function Component({ endpoint, model }: RecipeProps) { + const [prompt, setPrompt] = useState('') + const [sizePreset, setSizePreset] = useState('1024x1024') + const [stepsPreset, setStepsPreset] = useState('28') + const [negativePrompt, setNegativePrompt] = useState('') + const [showAdvanced, setShowAdvanced] = useState(false) + const [result, setResult] = useState(null) + const [error, setError] = useState(null) + + // Use SWR mutation for image generation + const { + trigger, + isMutating, + error: mutationError, + } = useSWRMutation('/api/recipes/image-generation', generateImage) + + const onGenerateClicked = async () => { + if (!endpoint || !model || !prompt.trim()) return + + setError(null) + + try { + const { width, height } = parseSizePreset(sizePreset) + const generationResult = await trigger({ + endpointId: endpoint.id, + modelName: model.id, + prompt: prompt.trim(), + width, + height, + steps: Number(stepsPreset), + guidance_scale: 3.5, + negative_prompt: negativePrompt, + }) + + setResult(generationResult) + } catch (err) { + setError(err as Error) + } + } + + const downloadImage = () => { + if (!result) return + + const link = document.createElement('a') + link.href = `data:image/png;base64,${result.image_b64}` + link.download = `generated-image-${Date.now()}.png` + document.body.appendChild(link) + link.click() + document.body.removeChild(link) + } + + const generateDisabled = !endpoint || !model || !prompt.trim() || isMutating + + return ( + + + + + + {/* Prompt Section */} + + + + Prompt + +