From cf0c4ac055f8e0f17408ae9f838b634e17a95578 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Thu, 12 Mar 2026 16:06:42 +0100 Subject: [PATCH] feat(obs): add provider inventory and capability coverage --- CHANGELOG.md | 1 + README.md | 20 +++- docs/ARCHITECTURE.md | 5 + docs/FOUNDRYGATE-ROADMAP.md | 4 +- foundrygate/main.py | 154 ++++++++++++++++++++++++++---- foundrygate/providers.py | 1 + tests/test_route_introspection.py | 39 ++++++++ 7 files changed, 199 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bcae09..37847d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ The format is intentionally lightweight and human-readable. Group entries by rel - Added modality-aware metrics and filters so stats, traces, recent requests, and the dashboard can distinguish `chat`, `image_generation`, and `image_editing` - Added `POST /api/route/image` for dry-run preview of image-generation and image-editing routing decisions - Added optional `image` provider metadata (`max_outputs`, `max_side_px`, `supported_sizes`) so image-capable providers can be ranked against `n` and `size` +- Added top-level capability coverage to `GET /health` plus `GET /api/providers` for filtered provider inventory and dashboard coverage views ## v0.5.0 - 2026-03-12 diff --git a/README.md b/README.md index 8efd544..485a748 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ FoundryGate is a local OpenAI-compatible router/proxy for OpenClaw and other cli - Multi-provider routing: use `auto` for routing or target a provider directly by model id. - Multi-dimensional routing: score providers across locality, context headroom, token limits, cache metadata, latency, and recent failure state during provider selection. - Robust fallback behavior: provider errors, timeouts, and connection failures fall through the configured fallback chain. -- Useful observability: `/health` reports provider status, consecutive failures, last error, and average latency. +- Useful observability: `/health` reports provider status, capability coverage, consecutive failures, last error, and average latency. - Hardened extension seam: request hooks are sanitized, can fail closed, and expose hook errors in dry-run and completion responses. - Safe database path handling: metrics use `FOUNDRYGATE_DB_PATH`, so the SQLite database does not need to live in the repo checkout. @@ -153,7 +153,7 @@ These endpoints are implemented today in [foundrygate/main.py](./foundrygate/mai ### `GET /health` -Returns overall service status plus one object per loaded provider. Each provider entry includes: +Returns overall service status, provider summary, capability coverage, and one object per loaded provider. Each provider entry includes: - `healthy` - `consecutive_failures` @@ -163,11 +163,23 @@ Returns overall service status plus one object per loaded provider. Each provide - `backend` - `tier` - `capabilities` +- `image` ```bash curl -fsS http://127.0.0.1:8090/health ``` +### `GET /api/providers` + +Returns the loaded provider inventory plus the same capability-coverage summary used by the dashboard. + +- optional `capability=` filter +- optional `healthy=true|false` filter + +```bash +curl -fsS 'http://127.0.0.1:8090/api/providers?capability=image_generation' +``` + ### `GET /v1/models` Returns an OpenAI-compatible model list. It always includes the virtual `auto` model, plus one entry for every provider that actually loaded at startup. @@ -240,6 +252,7 @@ curl -fsS http://127.0.0.1:8090/v1/images/edits \ - `POST /api/route` - `POST /api/route/image` +- `GET /api/providers` - `GET /api/update` - `GET /api/stats` - `GET /api/recent?limit=50` @@ -267,6 +280,7 @@ curl -fsS http://127.0.0.1:8090/api/route/image \ curl -fsS http://127.0.0.1:8090/api/stats curl -fsS http://127.0.0.1:8090/api/update +curl -fsS 'http://127.0.0.1:8090/api/providers?healthy=true' curl -fsS 'http://127.0.0.1:8090/api/recent?limit=10' curl -fsS 'http://127.0.0.1:8090/api/traces?limit=10' curl -fsS 'http://127.0.0.1:8090/api/stats?provider=local-worker&client_tag=codex&modality=chat' @@ -278,6 +292,8 @@ curl -fsS 'http://127.0.0.1:8090/api/stats?provider=local-worker&client_tag=code If request hooks are enabled, `POST /api/route` also shows the applied hook names and the effective request metadata after hook processing. +`GET /api/providers` returns the current provider inventory, including capability flags and optional image metadata such as `max_outputs`, `max_side_px`, and `supported_sizes`. + `GET /api/stats`, `GET /api/recent`, and `GET /api/traces` also accept optional `provider`, `modality`, `client_profile`, `client_tag`, `layer`, and `success` filters. The built-in dashboard uses the same filtered endpoints. `GET /api/traces` returns recent enriched routing records from the metrics store, including requested model, modality, resolved client profile, client tag, decision reason, confidence, and attempt order. diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 28df513..945b644 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -91,6 +91,7 @@ Request hooks sit beside these caller-aware signals as a narrow extension seam. The main operational endpoints are: - `GET /health` +- `GET /api/providers` - `GET /v1/models` - `POST /v1/chat/completions` - `POST /v1/images/generations` @@ -101,6 +102,10 @@ The main operational endpoints are: - `GET /api/traces` - `GET /dashboard` +`/health` now exposes both provider-level health and top-level capability coverage, so operators can quickly see whether the gateway currently has healthy support for `chat`, `image_generation`, `image_editing`, or other boolean capabilities exposed by loaded providers. + +`/api/providers` exposes the normalized provider inventory with optional `capability` and `healthy` filters. This is the inventory surface the dashboard should use when it needs provider metadata beyond raw request metrics. + `/api/stats`, `/api/recent`, and `/api/traces` can now be filtered by provider, client profile, client tag, layer, and success state. The dashboard is a thin UI over those same filtered endpoints and persists its active filters in the URL so operators can share one filtered view. ## Design target diff --git a/docs/FOUNDRYGATE-ROADMAP.md b/docs/FOUNDRYGATE-ROADMAP.md index 722a911..81904e1 100644 --- a/docs/FOUNDRYGATE-ROADMAP.md +++ b/docs/FOUNDRYGATE-ROADMAP.md @@ -15,7 +15,7 @@ The foundation that used to be the near-term buildout is largely in place: - route introspection - routing traces and client/profile metrics - local worker probing -- a hardened simple dashboard with filtered traces, client/provider views, URL-persisted filters, and operator summary cards +- a hardened simple dashboard with filtered traces, client/provider views, URL-persisted filters, operator summary cards, and modality/capability coverage This roadmap now shifts from "rename and foundation" to "deepen the gateway plane without bloating it". @@ -195,7 +195,7 @@ Primary goals: - add modality-aware provider contracts, starting with image generation - extend that contract toward image editing where the provider surface supports it - keep chat and image paths explicit instead of mixing modality-specific behavior into one opaque route -- expose modality-aware health and routing visibility in the dashboard and operational endpoints +- expose modality-aware health, provider inventory, and routing visibility in the dashboard and operational endpoints This should borrow the useful parts of image-router patterns without copying another gateway's product shape. diff --git a/foundrygate/main.py b/foundrygate/main.py index 4ecb6a2..07157a4 100644 --- a/foundrygate/main.py +++ b/foundrygate/main.py @@ -178,9 +178,69 @@ def _serialize_provider(name: str) -> dict[str, Any] | None: "context_window": provider.context_window, "limits": provider.limits, "cache": provider.cache, + "image": getattr(provider, "image", {}), } +def _build_provider_inventory( + *, + capability: str | None = None, + healthy: bool | None = None, +) -> list[dict[str, Any]]: + """Return a normalized provider inventory with optional filters.""" + rows: list[dict[str, Any]] = [] + for name, provider in _providers.items(): + if capability and not provider.capabilities.get(capability): + continue + if healthy is not None and bool(provider.health.healthy) != bool(healthy): + continue + + rows.append( + { + "name": name, + "model": provider.model, + "backend": provider.backend_type, + "contract": provider.contract, + "tier": provider.tier, + "healthy": provider.health.healthy, + "capabilities": provider.capabilities, + "context_window": provider.context_window, + "limits": provider.limits, + "cache": provider.cache, + "image": getattr(provider, "image", {}), + "last_error": getattr(provider.health, "last_error", ""), + "avg_latency_ms": getattr(provider.health, "avg_latency_ms", 0.0), + } + ) + + return sorted(rows, key=lambda row: (row["healthy"] is False, row["name"])) + + +def _build_capability_coverage() -> dict[str, dict[str, Any]]: + """Return operator-facing capability coverage across loaded providers.""" + coverage: dict[str, dict[str, Any]] = {} + for name, provider in _providers.items(): + for capability, value in provider.capabilities.items(): + if value is not True: + continue + bucket = coverage.setdefault( + capability, + { + "total": 0, + "healthy": 0, + "providers": [], + "healthy_providers": [], + }, + ) + bucket["total"] += 1 + bucket["providers"].append(name) + if provider.health.healthy: + bucket["healthy"] += 1 + bucket["healthy_providers"].append(name) + + return dict(sorted(coverage.items())) + + def _estimate_request_dimensions(body: dict[str, Any]) -> dict[str, int | str]: """Return lightweight request-dimension estimates for debugging and routing preview.""" messages = body.get("messages", []) @@ -511,21 +571,45 @@ async def lifespan(app: FastAPI): @app.get("/health") async def health(): await _refresh_local_worker_probes() + providers = { + name: { + **p.health.to_dict(), + "contract": p.contract, + "backend": p.backend_type, + "tier": p.tier, + "capabilities": p.capabilities, + "context_window": p.context_window, + "limits": p.limits, + "cache": p.cache, + "image": getattr(p, "image", {}), + } + for name, p in _providers.items() + } return { "status": "ok", - "providers": { - name: { - **p.health.to_dict(), - "contract": p.contract, - "backend": p.backend_type, - "tier": p.tier, - "capabilities": p.capabilities, - "context_window": p.context_window, - "limits": p.limits, - "cache": p.cache, - } - for name, p in _providers.items() + "summary": { + "providers_total": len(providers), + "providers_healthy": sum(1 for provider in providers.values() if provider["healthy"]), + "providers_unhealthy": sum( + 1 for provider in providers.values() if not provider["healthy"] + ), }, + "coverage": _build_capability_coverage(), + "providers": providers, + } + + +@app.get("/api/providers") +async def provider_inventory( + capability: str | None = None, + healthy: bool | None = None, +): + """Return the loaded provider inventory with optional capability/health filters.""" + await _refresh_local_worker_probes() + rows = _build_provider_inventory(capability=capability, healthy=healthy) + return { + "providers": rows, + "coverage": _build_capability_coverage(), } @@ -1235,7 +1319,14 @@ def main():

Provider Health

- + +
ProviderStatusContractTierContextLimitsCacheLatencyLast ErrorProviderStatusContractTierCapabilitiesContextLimitsCacheLatencyLast Error
+
+ +
+

Capability Coverage

+ +
CapabilityHealthyTotalHealthy ProvidersAll Providers
@@ -1349,26 +1440,36 @@ def main(): return parts.length ? esc(parts.join(' / ')) : '—'; } +function formatCapabilities(provider){ + const capabilities = Object.entries(provider?.capabilities || {}) + .filter(([, value]) => value === true) + .map(([name]) => `${esc(name)}`); + return capabilities.length ? capabilities.join(' ') : '—'; +} + async function load(){ try{ const query = currentFilters(); persistFilters(query); const queryStr = query.toString(); const suffix = queryStr ? `?${queryStr}` : ''; - const [health, stats, traces, rec, update] = await Promise.all([ + const [health, stats, traces, rec, update, inventory] = await Promise.all([ fetch('/health').then(r=>r.json()), fetch(`/api/stats${suffix}`).then(r=>r.json()), fetch(`/api/traces${suffix}${suffix ? '&' : '?'}limit=20`).then(r=>r.json()), fetch(`/api/recent${suffix}${suffix ? '&' : '?'}limit=20`).then(r=>r.json()), - fetch('/api/update').then(r=>r.json()).catch(() => ({enabled:false,status:'unavailable'})) + fetch('/api/update').then(r=>r.json()).catch(() => ({enabled:false,status:'unavailable'})), + fetch('/api/providers').then(r=>r.json()), ]); const totals = stats.totals || {}; - const providers = Object.values(health.providers || {}); - const healthyProviders = providers.filter(provider => provider.healthy).length; - const unhealthyProviders = providers.length - healthyProviders; + const providers = inventory.providers || Object.values(health.providers || {}); + const healthyProviders = (health.summary && health.summary.providers_healthy) || providers.filter(provider => provider.healthy).length; + const unhealthyProviders = (health.summary && health.summary.providers_unhealthy) || (providers.length - healthyProviders); const modalityRows = stats.modalities || []; const topModality = modalityRows.length ? modalityRows[0].modality : '—'; + const capabilityCoverage = inventory.coverage || health.coverage || {}; + const coverageEntries = Object.entries(capabilityCoverage); $('#status').style.background = '#5e5'; $('#ago').textContent = ago(totals.last_request); @@ -1380,22 +1481,33 @@ def main():
Cache Hit Rate
${fmt(totals.cache_hit_pct || 0,1)}%
${fmtTok(totals.total_cache_hit || 0)} hit / ${fmtTok(totals.total_cache_miss || 0)} miss
Failures
${totals.total_failures || 0}
Healthy Providers
${healthyProviders}/${providers.length}
${unhealthyProviders} unhealthy
+
Capability Coverage
${coverageEntries.length}
${coverageEntries.map(([name]) => name).slice(0,3).join(', ') || 'none'}
Top Modality
${esc(topModality)}
${modalityRows.length} modality groups
Release Status
${esc(update.latest_version || update.current_version || 'n/a')}
${update.enabled ? (update.update_available ? 'Update available' : update.status === 'ok' ? 'Up to date' : 'Update check unavailable') : 'Update checks disabled'}
`; - const providerRows = Object.entries(health.providers || {}).map(([name, provider]) => ` - ${esc(name)} + const providerRows = providers.map(provider => ` + ${esc(provider.name)} ${statusTag(provider.healthy)} ${esc(provider.contract || 'generic')} ${esc(provider.tier || 'default')} + ${formatCapabilities(provider)} ${provider.context_window ? fmtTok(provider.context_window) : '—'} ${formatLimits(provider)} ${esc((provider.cache && provider.cache.mode) || 'none')} ${fmtMs(provider.avg_latency_ms)} ${esc(provider.last_error || '—')} `); - $('#health tbody').innerHTML = providerRows.length ? providerRows.join('') : emptyRow(9, 'No provider health data'); + $('#health tbody').innerHTML = providerRows.length ? providerRows.join('') : emptyRow(10, 'No provider health data'); + + const coverageRows = coverageEntries.map(([capability, data]) => ` + ${esc(capability)} + ${data.healthy || 0} + ${data.total || 0} + ${esc((data.healthy_providers || []).join(', ') || '—')} + ${esc((data.providers || []).join(', ') || '—')} + `); + $('#coverage tbody').innerHTML = coverageRows.length ? coverageRows.join('') : emptyRow(5, 'No capability coverage data'); const clientRows = (stats.clients || []).map(row => ` ${esc(row.modality || 'chat')} diff --git a/foundrygate/providers.py b/foundrygate/providers.py index 16331e1..337799c 100644 --- a/foundrygate/providers.py +++ b/foundrygate/providers.py @@ -74,6 +74,7 @@ def __init__(self, name: str, cfg: dict): self.context_window = cfg.get("context_window") self.limits = dict(cfg.get("limits", {})) self.cache = dict(cfg.get("cache", {})) + self.image = dict(cfg.get("image", {})) self.health = ProviderHealth(name=name) self._client = httpx.AsyncClient( diff --git a/tests/test_route_introspection.py b/tests/test_route_introspection.py index 87c2cd0..9f96dd2 100644 --- a/tests/test_route_introspection.py +++ b/tests/test_route_introspection.py @@ -46,7 +46,9 @@ async def aclose(self): _refresh_local_worker_probes, _resolve_image_route_preview, _resolve_route_preview, + health, preview_image_route, + provider_inventory, ) from foundrygate.router import Router @@ -98,6 +100,7 @@ def __init__( tier: str = "default", healthy: bool = True, capabilities: dict | None = None, + image: dict | None = None, ): self.name = name self.model = model @@ -108,6 +111,7 @@ def __init__( self.context_window = 0 self.limits = {} self.cache = {"mode": "none", "read_discount": False} + self.image = image or {} self.health = types.SimpleNamespace( healthy=healthy, last_check=0.0, @@ -227,6 +231,11 @@ def preview_config(tmp_path, monkeypatch): "image_generation": True, "image_editing": True, }, + image={ + "max_outputs": 1, + "max_side_px": 1024, + "supported_sizes": ["1024x1024"], + }, ), "image-large": _ProviderStub( name="image-large", @@ -240,6 +249,11 @@ def preview_config(tmp_path, monkeypatch): "image_generation": True, "image_editing": True, }, + image={ + "max_outputs": 4, + "max_side_px": 2048, + "supported_sizes": ["1024x1024", "2048x2048"], + }, ), }, raising=False, @@ -433,3 +447,28 @@ async def test_refresh_only_probes_local_worker_contracts(self, preview_config): assert local_worker.probe_calls == 1 assert cloud_default.probe_calls == 0 + + +class TestProviderCoverage: + @pytest.mark.asyncio + async def test_health_reports_capability_coverage(self, preview_config): + response = await health() + + assert response["summary"]["providers_total"] == 4 + assert response["summary"]["providers_healthy"] == 4 + assert response["coverage"]["image_generation"]["total"] == 2 + assert response["coverage"]["image_generation"]["healthy"] == 2 + assert response["coverage"]["image_editing"]["providers"] == [ + "image-cloud", + "image-large", + ] + assert response["providers"]["image-cloud"]["image"]["max_outputs"] == 1 + + @pytest.mark.asyncio + async def test_provider_inventory_filters_by_capability(self, preview_config): + response = await provider_inventory(capability="image_editing") + + provider_names = [provider["name"] for provider in response["providers"]] + assert provider_names == ["image-cloud", "image-large"] + assert response["coverage"]["image_editing"]["total"] == 2 + assert response["providers"][0]["contract"] == "image-provider"