diff --git a/CHANGELOG.md b/CHANGELOG.md index c5de620..e9458eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ The format is intentionally lightweight and human-readable. Group entries by rel ## Unreleased +### Added + +- Added modality-aware metrics and filters so stats, traces, recent requests, and the dashboard can distinguish `chat`, `image_generation`, and `image_editing` +- Added `POST /api/route/image` for dry-run preview of image-generation and image-editing routing decisions + ## v0.5.0 - 2026-03-12 ### Added diff --git a/README.md b/README.md index 194582b..27b983d 100644 --- a/README.md +++ b/README.md @@ -239,6 +239,7 @@ curl -fsS http://127.0.0.1:8090/v1/images/edits \ ### Additional Stable Operational Endpoints - `POST /api/route` +- `POST /api/route/image` - `GET /api/update` - `GET /api/stats` - `GET /api/recent?limit=50` @@ -256,20 +257,30 @@ curl -fsS http://127.0.0.1:8090/api/route \ ] }' +curl -fsS http://127.0.0.1:8090/api/route/image \ + -H 'Content-Type: application/json' \ + -d '{ + "model": "auto", + "capability": "image_editing", + "prompt": "Remove the background and keep the subject centered." + }' + curl -fsS http://127.0.0.1:8090/api/stats curl -fsS http://127.0.0.1:8090/api/update curl -fsS 'http://127.0.0.1:8090/api/recent?limit=10' curl -fsS 'http://127.0.0.1:8090/api/traces?limit=10' -curl -fsS 'http://127.0.0.1:8090/api/stats?provider=local-worker&client_tag=codex' +curl -fsS 'http://127.0.0.1:8090/api/stats?provider=local-worker&client_tag=codex&modality=chat' ``` `POST /api/route` is a dry-run endpoint. It uses the same routing logic as `POST /v1/chat/completions` but does not call an upstream provider. The response includes the resolved client profile, the routing decision, candidate ranking details where applicable, hook errors, and the fallback attempt order. +`POST /api/route/image` is the matching dry-run endpoint for image-generation and image-editing requests. Use `capability: "image_generation"` or `capability: "image_editing"` to preview modality-specific routing without calling an upstream image provider. + If request hooks are enabled, `POST /api/route` also shows the applied hook names and the effective request metadata after hook processing. -`GET /api/stats`, `GET /api/recent`, and `GET /api/traces` also accept optional `provider`, `client_profile`, `client_tag`, `layer`, and `success` filters. The built-in dashboard uses the same filtered endpoints. +`GET /api/stats`, `GET /api/recent`, and `GET /api/traces` also accept optional `provider`, `modality`, `client_profile`, `client_tag`, `layer`, and `success` filters. The built-in dashboard uses the same filtered endpoints. -`GET /api/traces` returns recent enriched routing records from the metrics store, including requested model, resolved client profile, client tag, decision reason, confidence, and attempt order. +`GET /api/traces` returns recent enriched routing records from the metrics store, including requested model, modality, resolved client profile, client tag, decision reason, confidence, and attempt order. `GET /api/update` returns the cached release-check result for the running service, including the current version, latest known tag, update availability, and the release URL when GitHub lookups succeed. diff --git a/foundrygate/main.py b/foundrygate/main.py index dcf0406..9464a0f 100644 --- a/foundrygate/main.py +++ b/foundrygate/main.py @@ -210,6 +210,16 @@ def _estimate_request_dimensions(body: dict[str, Any]) -> dict[str, int | str]: } +def _estimate_image_request_dimensions(body: dict[str, Any], *, capability: str) -> dict[str, Any]: + """Return lightweight image-request details for debugging and routing preview.""" + return { + "prompt_chars": len(str(body.get("prompt") or "")), + "requested_size": body.get("size") or "", + "requested_outputs": body.get("n") if isinstance(body.get("n"), int) else 1, + "capability": capability, + } + + def _collect_request_cache_preference(body: dict[str, Any]) -> str: """Return one request-level cache preference.""" metadata = body.get("metadata") if isinstance(body.get("metadata"), dict) else {} @@ -550,6 +560,7 @@ async def list_models(): @app.get("/api/stats") async def stats( provider: str | None = None, + modality: str | None = None, client_profile: str | None = None, client_tag: str | None = None, layer: str | None = None, @@ -558,6 +569,7 @@ async def stats( """Full statistics: totals, per-provider, routing breakdown, time series.""" filters = { "provider": provider, + "modality": modality, "client_profile": client_profile, "client_tag": client_tag, "layer": layer, @@ -566,6 +578,7 @@ async def stats( return { "totals": _metrics.get_totals(**filters), "providers": _metrics.get_provider_summary(**filters), + "modalities": _metrics.get_modality_breakdown(**filters), "routing": _metrics.get_routing_breakdown(**filters), "clients": _metrics.get_client_breakdown(**filters), "hourly": _metrics.get_hourly_series(24), @@ -577,6 +590,7 @@ async def stats( async def recent( limit: int = 50, provider: str | None = None, + modality: str | None = None, client_profile: str | None = None, client_tag: str | None = None, layer: str | None = None, @@ -587,6 +601,7 @@ async def recent( "requests": _metrics.get_recent( limit, provider=provider, + modality=modality, client_profile=client_profile, client_tag=client_tag, layer=layer, @@ -599,6 +614,7 @@ async def recent( async def traces( limit: int = 50, provider: str | None = None, + modality: str | None = None, client_profile: str | None = None, client_tag: str | None = None, layer: str | None = None, @@ -609,6 +625,7 @@ async def traces( "traces": _metrics.get_recent( limit, provider=provider, + modality=modality, client_profile=client_profile, client_tag=client_tag, layer=layer, @@ -655,6 +672,7 @@ async def preview_route(request: Request): "hook_notes": hook_state.notes, "hook_errors": hook_state.errors, "effective_request": { + "modality": "chat", "model": effective_body.get("model", "auto"), "has_tools": bool(effective_body.get("tools")), **_estimate_request_dimensions(effective_body), @@ -665,6 +683,58 @@ async def preview_route(request: Request): } +@app.post("/api/route/image") +async def preview_image_route(request: Request): + """Dry-run one image routing decision without sending a provider request.""" + try: + body = await request.json() + except Exception: + return JSONResponse({"error": "Invalid JSON body"}, status_code=400) + + capability = str(body.get("capability") or "image_generation").strip().lower() + if capability not in {"image_generation", "image_editing"}: + return _invalid_request_response( + "Invalid image route preview request", + exc=ValueError("Unsupported capability"), + ) + + headers = _collect_routing_headers(request) + preview_body = dict(body) + preview_body.pop("capability", None) + try: + ( + decision, + client_profile, + client_tag, + attempt_order, + model_requested, + hook_state, + effective_body, + ) = await _resolve_image_route_preview(preview_body, headers, capability=capability) + except HookExecutionError as exc: + return _request_hook_error_response(exc) + except ValueError as exc: + return _invalid_request_response("Invalid image route preview request", exc=exc) + + return { + "requested_model": model_requested, + "resolved_profile": client_profile, + "client_tag": client_tag, + "routing_headers": headers, + "applied_hooks": hook_state.applied_hooks, + "hook_notes": hook_state.notes, + "hook_errors": hook_state.errors, + "effective_request": { + "modality": capability, + "model": effective_body.get("model", "auto"), + **_estimate_image_request_dimensions(effective_body, capability=capability), + }, + "decision": decision.to_dict(), + "selected_provider": _serialize_provider(decision.provider_name), + "attempt_order": [_serialize_provider(name) for name in attempt_order], + } + + @app.post("/v1/images/generations") async def image_generations(request: Request): """OpenAI-compatible image generation endpoint.""" @@ -713,6 +783,7 @@ async def image_generations(request: Request): rule_name=decision.rule_name, latency_ms=(result.get("_foundrygate") or {}).get("latency_ms", 0), requested_model=model_requested, + modality="image_generation", client_profile=client_profile, client_tag=client_tag, decision_reason=decision.reason, @@ -744,6 +815,7 @@ async def image_generations(request: Request): success=False, error=exc.detail[:500], requested_model=model_requested, + modality="image_generation", client_profile=client_profile, client_tag=client_tag, decision_reason=decision.reason, @@ -822,6 +894,7 @@ async def image_edits(request: Request): rule_name=decision.rule_name, latency_ms=(result.get("_foundrygate") or {}).get("latency_ms", 0), requested_model=model_requested, + modality="image_editing", client_profile=client_profile, client_tag=client_tag, decision_reason=decision.reason, @@ -853,6 +926,7 @@ async def image_edits(request: Request): success=False, error=exc.detail[:500], requested_model=model_requested, + modality="image_editing", client_profile=client_profile, client_tag=client_tag, decision_reason=decision.reason, @@ -964,6 +1038,7 @@ async def chat_completions(request: Request): cost_usd=cost, latency_ms=cg.get("latency_ms", 0), requested_model=model_requested, + modality="chat", client_profile=client_profile, client_tag=client_tag, decision_reason=decision.reason, @@ -1005,6 +1080,7 @@ async def chat_completions(request: Request): success=False, error=e.detail[:500], requested_model=model_requested, + modality="chat", client_profile=client_profile, client_tag=client_tag, decision_reason=decision.reason, @@ -1115,6 +1191,14 @@ def main():

Filters

+