diff --git a/CHANGELOG.md b/CHANGELOG.md
index c5de620..e9458eb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,11 @@ The format is intentionally lightweight and human-readable. Group entries by rel
## Unreleased
+### Added
+
+- Added modality-aware metrics and filters so stats, traces, recent requests, and the dashboard can distinguish `chat`, `image_generation`, and `image_editing`
+- Added `POST /api/route/image` for dry-run preview of image-generation and image-editing routing decisions
+
## v0.5.0 - 2026-03-12
### Added
diff --git a/README.md b/README.md
index 194582b..27b983d 100644
--- a/README.md
+++ b/README.md
@@ -239,6 +239,7 @@ curl -fsS http://127.0.0.1:8090/v1/images/edits \
### Additional Stable Operational Endpoints
- `POST /api/route`
+- `POST /api/route/image`
- `GET /api/update`
- `GET /api/stats`
- `GET /api/recent?limit=50`
@@ -256,20 +257,30 @@ curl -fsS http://127.0.0.1:8090/api/route \
]
}'
+curl -fsS http://127.0.0.1:8090/api/route/image \
+ -H 'Content-Type: application/json' \
+ -d '{
+ "model": "auto",
+ "capability": "image_editing",
+ "prompt": "Remove the background and keep the subject centered."
+ }'
+
curl -fsS http://127.0.0.1:8090/api/stats
curl -fsS http://127.0.0.1:8090/api/update
curl -fsS 'http://127.0.0.1:8090/api/recent?limit=10'
curl -fsS 'http://127.0.0.1:8090/api/traces?limit=10'
-curl -fsS 'http://127.0.0.1:8090/api/stats?provider=local-worker&client_tag=codex'
+curl -fsS 'http://127.0.0.1:8090/api/stats?provider=local-worker&client_tag=codex&modality=chat'
```
`POST /api/route` is a dry-run endpoint. It uses the same routing logic as `POST /v1/chat/completions` but does not call an upstream provider. The response includes the resolved client profile, the routing decision, candidate ranking details where applicable, hook errors, and the fallback attempt order.
+`POST /api/route/image` is the matching dry-run endpoint for image-generation and image-editing requests. Use `capability: "image_generation"` or `capability: "image_editing"` to preview modality-specific routing without calling an upstream image provider.
+
If request hooks are enabled, `POST /api/route` also shows the applied hook names and the effective request metadata after hook processing.
-`GET /api/stats`, `GET /api/recent`, and `GET /api/traces` also accept optional `provider`, `client_profile`, `client_tag`, `layer`, and `success` filters. The built-in dashboard uses the same filtered endpoints.
+`GET /api/stats`, `GET /api/recent`, and `GET /api/traces` also accept optional `provider`, `modality`, `client_profile`, `client_tag`, `layer`, and `success` filters. The built-in dashboard uses the same filtered endpoints.
-`GET /api/traces` returns recent enriched routing records from the metrics store, including requested model, resolved client profile, client tag, decision reason, confidence, and attempt order.
+`GET /api/traces` returns recent enriched routing records from the metrics store, including requested model, modality, resolved client profile, client tag, decision reason, confidence, and attempt order.
`GET /api/update` returns the cached release-check result for the running service, including the current version, latest known tag, update availability, and the release URL when GitHub lookups succeed.
diff --git a/foundrygate/main.py b/foundrygate/main.py
index dcf0406..9464a0f 100644
--- a/foundrygate/main.py
+++ b/foundrygate/main.py
@@ -210,6 +210,16 @@ def _estimate_request_dimensions(body: dict[str, Any]) -> dict[str, int | str]:
}
+def _estimate_image_request_dimensions(body: dict[str, Any], *, capability: str) -> dict[str, Any]:
+ """Return lightweight image-request details for debugging and routing preview."""
+ return {
+ "prompt_chars": len(str(body.get("prompt") or "")),
+ "requested_size": body.get("size") or "",
+ "requested_outputs": body.get("n") if isinstance(body.get("n"), int) else 1,
+ "capability": capability,
+ }
+
+
def _collect_request_cache_preference(body: dict[str, Any]) -> str:
"""Return one request-level cache preference."""
metadata = body.get("metadata") if isinstance(body.get("metadata"), dict) else {}
@@ -550,6 +560,7 @@ async def list_models():
@app.get("/api/stats")
async def stats(
provider: str | None = None,
+ modality: str | None = None,
client_profile: str | None = None,
client_tag: str | None = None,
layer: str | None = None,
@@ -558,6 +569,7 @@ async def stats(
"""Full statistics: totals, per-provider, routing breakdown, time series."""
filters = {
"provider": provider,
+ "modality": modality,
"client_profile": client_profile,
"client_tag": client_tag,
"layer": layer,
@@ -566,6 +578,7 @@ async def stats(
return {
"totals": _metrics.get_totals(**filters),
"providers": _metrics.get_provider_summary(**filters),
+ "modalities": _metrics.get_modality_breakdown(**filters),
"routing": _metrics.get_routing_breakdown(**filters),
"clients": _metrics.get_client_breakdown(**filters),
"hourly": _metrics.get_hourly_series(24),
@@ -577,6 +590,7 @@ async def stats(
async def recent(
limit: int = 50,
provider: str | None = None,
+ modality: str | None = None,
client_profile: str | None = None,
client_tag: str | None = None,
layer: str | None = None,
@@ -587,6 +601,7 @@ async def recent(
"requests": _metrics.get_recent(
limit,
provider=provider,
+ modality=modality,
client_profile=client_profile,
client_tag=client_tag,
layer=layer,
@@ -599,6 +614,7 @@ async def recent(
async def traces(
limit: int = 50,
provider: str | None = None,
+ modality: str | None = None,
client_profile: str | None = None,
client_tag: str | None = None,
layer: str | None = None,
@@ -609,6 +625,7 @@ async def traces(
"traces": _metrics.get_recent(
limit,
provider=provider,
+ modality=modality,
client_profile=client_profile,
client_tag=client_tag,
layer=layer,
@@ -655,6 +672,7 @@ async def preview_route(request: Request):
"hook_notes": hook_state.notes,
"hook_errors": hook_state.errors,
"effective_request": {
+ "modality": "chat",
"model": effective_body.get("model", "auto"),
"has_tools": bool(effective_body.get("tools")),
**_estimate_request_dimensions(effective_body),
@@ -665,6 +683,58 @@ async def preview_route(request: Request):
}
+@app.post("/api/route/image")
+async def preview_image_route(request: Request):
+ """Dry-run one image routing decision without sending a provider request."""
+ try:
+ body = await request.json()
+ except Exception:
+ return JSONResponse({"error": "Invalid JSON body"}, status_code=400)
+
+ capability = str(body.get("capability") or "image_generation").strip().lower()
+ if capability not in {"image_generation", "image_editing"}:
+ return _invalid_request_response(
+ "Invalid image route preview request",
+ exc=ValueError("Unsupported capability"),
+ )
+
+ headers = _collect_routing_headers(request)
+ preview_body = dict(body)
+ preview_body.pop("capability", None)
+ try:
+ (
+ decision,
+ client_profile,
+ client_tag,
+ attempt_order,
+ model_requested,
+ hook_state,
+ effective_body,
+ ) = await _resolve_image_route_preview(preview_body, headers, capability=capability)
+ except HookExecutionError as exc:
+ return _request_hook_error_response(exc)
+ except ValueError as exc:
+ return _invalid_request_response("Invalid image route preview request", exc=exc)
+
+ return {
+ "requested_model": model_requested,
+ "resolved_profile": client_profile,
+ "client_tag": client_tag,
+ "routing_headers": headers,
+ "applied_hooks": hook_state.applied_hooks,
+ "hook_notes": hook_state.notes,
+ "hook_errors": hook_state.errors,
+ "effective_request": {
+ "modality": capability,
+ "model": effective_body.get("model", "auto"),
+ **_estimate_image_request_dimensions(effective_body, capability=capability),
+ },
+ "decision": decision.to_dict(),
+ "selected_provider": _serialize_provider(decision.provider_name),
+ "attempt_order": [_serialize_provider(name) for name in attempt_order],
+ }
+
+
@app.post("/v1/images/generations")
async def image_generations(request: Request):
"""OpenAI-compatible image generation endpoint."""
@@ -713,6 +783,7 @@ async def image_generations(request: Request):
rule_name=decision.rule_name,
latency_ms=(result.get("_foundrygate") or {}).get("latency_ms", 0),
requested_model=model_requested,
+ modality="image_generation",
client_profile=client_profile,
client_tag=client_tag,
decision_reason=decision.reason,
@@ -744,6 +815,7 @@ async def image_generations(request: Request):
success=False,
error=exc.detail[:500],
requested_model=model_requested,
+ modality="image_generation",
client_profile=client_profile,
client_tag=client_tag,
decision_reason=decision.reason,
@@ -822,6 +894,7 @@ async def image_edits(request: Request):
rule_name=decision.rule_name,
latency_ms=(result.get("_foundrygate") or {}).get("latency_ms", 0),
requested_model=model_requested,
+ modality="image_editing",
client_profile=client_profile,
client_tag=client_tag,
decision_reason=decision.reason,
@@ -853,6 +926,7 @@ async def image_edits(request: Request):
success=False,
error=exc.detail[:500],
requested_model=model_requested,
+ modality="image_editing",
client_profile=client_profile,
client_tag=client_tag,
decision_reason=decision.reason,
@@ -964,6 +1038,7 @@ async def chat_completions(request: Request):
cost_usd=cost,
latency_ms=cg.get("latency_ms", 0),
requested_model=model_requested,
+ modality="chat",
client_profile=client_profile,
client_tag=client_tag,
decision_reason=decision.reason,
@@ -1005,6 +1080,7 @@ async def chat_completions(request: Request):
success=False,
error=e.detail[:500],
requested_model=model_requested,
+ modality="chat",
client_profile=client_profile,
client_tag=client_tag,
decision_reason=decision.reason,
@@ -1115,6 +1191,14 @@ def main():
Filters
+