From 059275b7c33328affb424fe536b234f7c6c7a71b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Thu, 12 Mar 2026 17:50:02 +0100 Subject: [PATCH] feat(obs): add operator dashboard views --- CHANGELOG.md | 1 + README.md | 2 ++ docs/ARCHITECTURE.md | 3 ++- foundrygate/main.py | 24 +++++++++++++++++++++++- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da9ad59..d127194 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ The format is intentionally lightweight and human-readable. Group entries by rel - Added stronger update-alert metadata to `GET /api/update`, including update type, alert level, and recommended action for operators and dashboard consumers - Added an opt-in `auto_update` policy block plus `foundrygate-auto-update` so controlled deployments can gate helper-driven updates without enabling silent self-updates - Added `GET /api/operator-events` plus operator-event metrics for update checks and helper-driven auto-update attempts +- Added dashboard cards and tables for operator-side update checks and apply attempts ## v0.6.0 - 2026-03-12 diff --git a/README.md b/README.md index 0263ab9..e76dd6d 100644 --- a/README.md +++ b/README.md @@ -304,6 +304,8 @@ For image-capable providers, `image.policy_tags` can be used as lightweight pres `GET /api/stats`, `GET /api/recent`, and `GET /api/traces` also accept optional `provider`, `modality`, `client_profile`, `client_tag`, `layer`, and `success` filters. The built-in dashboard uses the same filtered endpoints. +`GET /api/operator-events` returns recent operator-side update checks and apply attempts. The built-in dashboard now shows both a recent operator-action summary card and an operator-action breakdown table. + `GET /api/traces` returns recent enriched routing records from the metrics store, including requested model, modality, resolved client profile, client tag, decision reason, confidence, and attempt order. `GET /api/update` returns the cached release-check result for the running service, including the current version, latest known tag, update availability, update type (`patch`, `minor`, `major`), alert level, recommended action, and the release URL when GitHub lookups succeed. diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 945b644..afaa2ce 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -100,13 +100,14 @@ The main operational endpoints are: - `GET /api/stats` - `GET /api/recent` - `GET /api/traces` +- `GET /api/operator-events` - `GET /dashboard` `/health` now exposes both provider-level health and top-level capability coverage, so operators can quickly see whether the gateway currently has healthy support for `chat`, `image_generation`, `image_editing`, or other boolean capabilities exposed by loaded providers. `/api/providers` exposes the normalized provider inventory with optional `capability` and `healthy` filters. This is the inventory surface the dashboard should use when it needs provider metadata beyond raw request metrics. -`/api/stats`, `/api/recent`, and `/api/traces` can now be filtered by provider, client profile, client tag, layer, and success state. The dashboard is a thin UI over those same filtered endpoints and persists its active filters in the URL so operators can share one filtered view. +`/api/stats`, `/api/recent`, and `/api/traces` can now be filtered by provider, client profile, client tag, layer, and success state. `/api/operator-events` captures operator-side update checks and helper-driven apply attempts. The dashboard is a thin UI over those same filtered endpoints and persists its active filters in the URL so operators can share one filtered view. ## Design target diff --git a/foundrygate/main.py b/foundrygate/main.py index 2f6071b..80093b2 100644 --- a/foundrygate/main.py +++ b/foundrygate/main.py @@ -1487,6 +1487,13 @@ def main(): +
+

Operator Actions

+ + +
EventActionClientStatusUpdate TypeEligibleEvents
+
+

Route Traces

@@ -1589,13 +1596,14 @@ def main(): persistFilters(query); const queryStr = query.toString(); const suffix = queryStr ? `?${queryStr}` : ''; - const [health, stats, traces, rec, update, inventory] = await Promise.all([ + const [health, stats, traces, rec, update, inventory, operatorEvents] = await Promise.all([ fetch('/health').then(r=>r.json()), fetch(`/api/stats${suffix}`).then(r=>r.json()), fetch(`/api/traces${suffix}${suffix ? '&' : '?'}limit=20`).then(r=>r.json()), fetch(`/api/recent${suffix}${suffix ? '&' : '?'}limit=20`).then(r=>r.json()), fetch('/api/update').then(r=>r.json()).catch(() => ({enabled:false,status:'unavailable'})), fetch('/api/providers').then(r=>r.json()), + fetch('/api/operator-events?limit=20').then(r=>r.json()).catch(() => ({events: []})), ]); const totals = stats.totals || {}; @@ -1609,6 +1617,8 @@ def main(): $('#status').style.background = '#5e5'; $('#ago').textContent = ago(totals.last_request); + const operatorRows = stats.operator_actions || []; + const latestOperatorEvent = (operatorEvents.events || [])[0] || null; $('#cards').innerHTML = `
Requests
${fmtTok(totals.total_requests || 0)}
Cost
${fmtUsd(totals.total_cost_usd || 0)}
@@ -1620,6 +1630,7 @@ def main():
Capability Coverage
${coverageEntries.length}
${coverageEntries.map(([name]) => name).slice(0,3).join(', ') || 'none'}
Top Modality
${esc(topModality)}
${modalityRows.length} modality groups
Release Status
${esc(update.latest_version || update.current_version || 'n/a')}
${update.enabled ? (update.status === 'ok' ? `${esc(update.update_type || 'current')} / ${esc(update.recommended_action || (update.update_available ? 'Upgrade recommended' : 'No action needed'))}${update.auto_update && update.auto_update.enabled ? ` / auto: ${esc(update.auto_update.eligible ? 'eligible' : (update.auto_update.blocked_reason || 'blocked'))}` : ''}` : esc(update.recommended_action || 'Update check unavailable')) : 'Update checks disabled'}
+
Operator Actions
${fmtTok((operatorEvents.events || []).length)}
${latestOperatorEvent ? `${esc(latestOperatorEvent.action || 'update-check')} / ${esc(latestOperatorEvent.status || 'unknown')}` : 'No recent operator events'}
`; const providerRows = providers.map(provider => ` @@ -1677,6 +1688,17 @@ def main(): `); $('#routing tbody').innerHTML = routingRows.length ? routingRows.join('') : emptyRow(6, 'No routing rows for the current filter set'); + const operatorBreakdownRows = operatorRows.map(row => ` + + + + + + + + `); + $('#operators tbody').innerHTML = operatorBreakdownRows.length ? operatorBreakdownRows.join('') : emptyRow(7, 'No operator events recorded yet'); + const traceRows = (traces.traces || []).map(row => `
${esc(row.event_type || 'update')}${esc(row.action || 'update-check')}${esc(row.client_tag || 'operator')}${esc(row.status || 'unknown')}${esc(row.update_type || '—')}${row.eligible ? 'yes' : 'no'}${row.events}
${ago(row.timestamp)} ${esc(row.provider)}