div0rce · div0rce · May 30, 2026 · May 30, 2026 · May 30, 2026 · May 30, 2026
diff --git a/PROGRESS.md b/PROGRESS.md
@@ -27,6 +27,7 @@
 
 - **#13** — record real-provider eval numbers (M9 follow-up). Stays open until keys are wired and `make eval` is run for real.
 - **Backlog (MILESTONES.md):** multi-tenant + RBAC, eval set expansion, OTel traces, Multi-AZ + private subnets + ACM TLS + S3/DynamoDB Terraform backend.
+- **Design system** — dual-theme (dark default + light) audit-grade visual layer for the frontend + a real `GET /dashboard/kpis` endpoint, on branch `claude/serene-maxwell-54yMC` (draft PR). Net-new work beyond the M0–M11 roadmap; `make check` green (201 backend pytest, 7 frontend Vitest, ruff/mypy/tsc/build clean).
 
 ---
 
@@ -92,6 +93,8 @@ Status key: ☐ not started · ◐ in progress · ☑ merged
 - 2026-05-28 (M4) — Extraction failures (`parse_error`, `schema_invalid`, `invalid_citation`, `document_not_found`, `no_chunks`, `unknown_schema`) **never persist** an `extractions` row. Surfacing the typed reason to the caller is enough for M5 guardrails / M7 audit / M9 eval to bucket failures without polluting the success-only table.
 - 2026-05-28 (M4) — Citation validation reuses the M3 posture: a `source_chunk_id` not in the supplied chunk set is a hard failure (`invalid_citation`), not a silent drop. Same invariant the M3 RAG layer enforces with `[chunk:N]` markers.
 - 2026-05-28 (M4) — Invoice `issue_date` remains persisted as a string but is schema-constrained to a real ISO `YYYY-MM-DD` date; non-ISO or impossible dates fail schema validation before persistence.
+- 2026-05-29 (design system) — Applied the dual-theme "audit-grade" design system to the frontend: lifted the token layer into `styles.css` (`:root` dark default + `[data-theme="light"]`), self-hosted IBM Plex Sans/Mono via `@fontsource` (latin subset, offline-safe), added `lucide-react`, and a persisted `localStorage["sentinel-theme"]` toggle set before first paint (inline script in `index.html`, no FOUC). Real IA/routes/API client/Recharts unchanged; charts restyled with token `var()` fills + per-bar `<Cell>` colors so they re-theme live with the toggle.
+- 2026-05-29 (design system) — Dashboard KPIs are powered by a new real endpoint `GET /dashboard/kpis` (docs ingested, auto-approved rate, avg confidence, SLA-at-risk). Every figure derives from real rows; 24h-vs-prior deltas are emitted only when a comparison window has data (otherwise `null`/flat) — no fabricated numbers or deltas. The Review row renders only fields the `/review` payload returns; the prototype's `schema.field`/`value`/`confidence` are extraction-level details absent from the queue API, so they are not invented.
 
 ---
 

diff --git a/backend/app/routers/dashboard.py b/backend/app/routers/dashboard.py
@@ -18,7 +18,7 @@
 from sqlalchemy.orm import Session
 
 from backend.app.db import get_session
-from backend.app.models import Extraction, WorkflowItem, WorkflowStatus
+from backend.app.models import Document, Extraction, WorkflowItem, WorkflowStatus
 
 router = APIRouter(prefix="/dashboard", tags=["dashboard"])
 
@@ -77,13 +77,71 @@ class SlaResponse(BaseModel):
     buckets: list[SlaBucket]
 
 
+class Kpi(BaseModel):
+    """One operational KPI tile for the dashboard header.
+
+    ``value``/``delta`` are the raw numbers (stable to assert on and to re-format);
+    ``display``/``delta_display`` are the server-rendered strings the UI shows verbatim.
+    ``direction`` drives the up/down/flat color (up=success, down=danger, flat=muted) and
+    is keyed purely off the sign of ``delta`` — matching the design system's KPI semantics.
+    """
+
+    key: str
+    label: str
+    value: float
+    display: str
+    delta: float | None = None
+    delta_display: str | None = None
+    direction: Literal["up", "down", "flat"]
+
+
+class KpiResponse(BaseModel):
+    kpis: list[Kpi]
+    threshold_hours: int = Field(ge=1)
+    generated_at: str  # ISO-8601 UTC; lets the UI footnote show a real refresh time
+
+
 # --- helpers ------------------------------------------------------------------------
 
 
 def _utcnow() -> datetime:
     return datetime.now(UTC)
 
 
+def _direction(delta: float | None, *, eps: float = 1e-9) -> Literal["up", "down", "flat"]:
+    """Color direction from the sign of a delta; ``None`` or near-zero reads as flat."""
+    if delta is None or abs(delta) <= eps:
+        return "flat"
+    return "up" if delta > 0 else "down"
+
+
+def _mean(values: list[float]) -> float | None:
+    """Arithmetic mean, or ``None`` for an empty list (so callers can omit, not fake, it)."""
+    return sum(values) / len(values) if values else None
+
+
+def _workflow_counts(
+    session: Session, *, since: datetime | None = None, until: datetime | None = None
+) -> tuple[int, int]:
+    """Return ``(auto_approved, total)`` workflow-item counts in the optional ``[since, until)``
+    creation window (open bounds when an endpoint is ``None``)."""
+    clauses = []
+    if since is not None:
+        clauses.append(WorkflowItem.created_at >= since)
+    if until is not None:
+        clauses.append(WorkflowItem.created_at < until)
+    total_stmt = select(func.count(WorkflowItem.id))
+    approved_stmt = select(func.count(WorkflowItem.id)).where(
+        WorkflowItem.status == WorkflowStatus.AUTO_APPROVED
+    )
+    if clauses:
+        total_stmt = total_stmt.where(*clauses)
+        approved_stmt = approved_stmt.where(*clauses)
+    total = int(session.scalar(total_stmt) or 0)
+    approved = int(session.scalar(approved_stmt) or 0)
+    return approved, total
+
+
 # Bucket boundaries for confidence: ten 0.1-wide bins covering [0.0, 1.0]. Values
 # exactly equal to 1.0 land in the last bucket.
 _CONF_BOUNDARIES: list[tuple[float, float]] = [
@@ -234,6 +292,108 @@ def get_sla(
     )
 
 
+@router.get("/kpis", response_model=KpiResponse)
+def get_kpis(
+    session: Annotated[Session, Depends(get_session)],
+    threshold_hours: Annotated[int, Query(ge=1, le=720)] = 24,
+) -> KpiResponse:
+    """Four operational KPIs for the dashboard header.
+
+    Every figure is derived from real rows. Deltas compare the last 24h against the
+    preceding 24h and are reported as ``None`` whenever a comparison window has no data,
+    so the UI shows nothing fabricated. ``generated_at`` is a real UTC timestamp.
+    """
+    now = _utcnow()
+    last_24h = now - timedelta(hours=24)
+    prev_24h = now - timedelta(hours=48)
+
+    # 1) Docs ingested — total, plus how many landed in the last 24h.
+    total_docs = int(session.scalar(select(func.count(Document.id))) or 0)
+    docs_24h = int(
+        session.scalar(select(func.count(Document.id)).where(Document.created_at >= last_24h)) or 0
+    )
+    docs_kpi = Kpi(
+        key="docs_ingested",
+        label="Docs ingested",
+        value=float(total_docs),
+        display=f"{total_docs:,}",
+        delta=float(docs_24h),
+        delta_display=f"+{docs_24h} (24h)",
+        direction=_direction(float(docs_24h)),
+    )
+
+    # 2) Auto-approved rate — share of all workflow items auto-approved, with the delta in
+    #    percentage points between the last-24h and preceding-24h cohorts.
+    approved_all, total_all = _workflow_counts(session)
+    rate_all = approved_all / total_all if total_all else 0.0
+    approved_last, total_last = _workflow_counts(session, since=last_24h)
+    approved_prev, total_prev = _workflow_counts(session, since=prev_24h, until=last_24h)
+    rate_delta: float | None = None
+    if total_last and total_prev:
+        rate_delta = (approved_last / total_last) - (approved_prev / total_prev)
+    auto_kpi = Kpi(
+        key="auto_approved_rate",
+        label="Auto-approved",
+        value=rate_all,
+        display=f"{rate_all * 100:.1f}%",
+        delta=rate_delta,
+        delta_display=(f"{rate_delta * 100:+.1f}pp" if rate_delta is not None else None),
+        direction=_direction(rate_delta),
+    )
+
+    # 3) Avg confidence — mean of every per-field confidence value, with a 24h-vs-prior delta.
+    rows = session.execute(select(Extraction.created_at, Extraction.field_confidence)).all()
+    all_vals: list[float] = []
+    last_vals: list[float] = []
+    prev_vals: list[float] = []
+    for created_at, field_confidence in rows:
+        if not isinstance(field_confidence, dict):
+            continue
+        for value in field_confidence.values():
+            try:
+                v = float(value)
+            except (TypeError, ValueError):
+                continue
+            all_vals.append(v)
+            if created_at is None:
+                continue
+            if created_at >= last_24h:
+                last_vals.append(v)
+            elif prev_24h <= created_at < last_24h:
+                prev_vals.append(v)
+    mean_all = _mean(all_vals)
+    mean_last = _mean(last_vals)
+    mean_prev = _mean(prev_vals)
+    conf_delta = mean_last - mean_prev if mean_last is not None and mean_prev is not None else None
+    conf_kpi = Kpi(
+        key="avg_confidence",
+        label="Avg confidence",
+        value=mean_all if mean_all is not None else 0.0,
+        display=f"{mean_all:.3f}" if mean_all is not None else "—",
+        delta=conf_delta,
+        delta_display=(f"{conf_delta:+.3f}" if conf_delta is not None else None),
+        direction=_direction(conf_delta),
+    )
+
+    # 4) SLA at risk — items past the threshold over the needs-review total (reuses /sla).
+    sla = get_sla(session, threshold_hours=threshold_hours)
+    sla_kpi = Kpi(
+        key="sla_at_risk",
+        label="SLA at risk",
+        value=float(sla.over_sla),
+        display=f"{sla.over_sla} / {sla.total_needs_review}",
+        delta=None,
+        delta_display=f"threshold {threshold_hours}h",
+        direction="flat",
+    )
+
+    return KpiResponse(
+        kpis=[docs_kpi, auto_kpi, conf_kpi, sla_kpi],
+        threshold_hours=threshold_hours,
+        generated_at=now.isoformat(),
+    )
+
+
 # Re-export the literal type used by the frontend's API client so a future schema
 # evolution surfaces in one place.
 SlaThresholdLiteral = Literal[1, 4, 24, 168, 720]
diff --git a/backend/tests/test_dashboard.py b/backend/tests/test_dashboard.py
@@ -230,6 +230,110 @@ def test_sla_rejects_invalid_threshold(client: TestClient) -> None:
     assert client.get("/dashboard/sla?threshold_hours=10000").status_code == 422
 
 
+# --- /dashboard/kpis ----------------------------------------------------------------
+
+
+def test_kpis_empty_corpus(client: TestClient) -> None:
+    """No rows: four KPIs, honest zeros, and no fabricated deltas."""
+    resp = client.get("/dashboard/kpis")
+    assert resp.status_code == 200
+    body = resp.json()
+    kpis = {k["key"]: k for k in body["kpis"]}
+    assert set(kpis) == {"docs_ingested", "auto_approved_rate", "avg_confidence", "sla_at_risk"}
+
+    assert kpis["docs_ingested"]["display"] == "0"
+    assert kpis["auto_approved_rate"]["display"] == "0.0%"
+    assert kpis["avg_confidence"]["display"] == "—"  # em dash: no fields, no fake number
+    assert kpis["sla_at_risk"]["display"] == "0 / 0"
+
+    # Deltas that need a comparison window are omitted (None), not invented.
+    assert kpis["auto_approved_rate"]["delta"] is None
+    assert kpis["avg_confidence"]["delta"] is None
+    assert all(k["direction"] == "flat" for k in kpis.values())
+
+
+def test_kpis_values_and_formatting(client: TestClient, session: Session) -> None:
+    now = datetime.now(UTC)
+    # Two extractions (=> two documents) created now; per-field confidences mean to 0.800.
+    _make_extraction(
+        session, hash_suffix="kv1", field_confidence={"a": 0.80, "b": 0.90}, created_at=now
+    )
+    ex2 = _make_extraction(session, hash_suffix="kv2", field_confidence={"a": 0.70}, created_at=now)
+    # Workflow mix: 3 auto-approved + 1 needs_review => 75.0% auto-approved.
+    for i in range(3):
+        _make_workflow_item(
+            session, extraction_id=ex2.id, idem_suffix=f"a{i}", status=WorkflowStatus.AUTO_APPROVED
+        )
+    _make_workflow_item(
+        session, extraction_id=ex2.id, idem_suffix="nr", status=WorkflowStatus.NEEDS_REVIEW
+    )
+
+    kpis = {k["key"]: k for k in client.get("/dashboard/kpis").json()["kpis"]}
+
+    assert kpis["docs_ingested"]["value"] == 2.0
+    assert kpis["docs_ingested"]["display"] == "2"
+    assert kpis["docs_ingested"]["delta"] == 2.0  # both landed within the last 24h
+    assert kpis["docs_ingested"]["delta_display"] == "+2 (24h)"
+
+    assert kpis["avg_confidence"]["value"] == pytest.approx(0.8)
+    assert kpis["avg_confidence"]["display"] == "0.800"
+
+    assert kpis["auto_approved_rate"]["value"] == pytest.approx(0.75)
+    assert kpis["auto_approved_rate"]["display"] == "75.0%"
+
+    # The single needs_review item is age ~0, so it is not yet over the 24h threshold.
+    assert kpis["sla_at_risk"]["display"] == "0 / 1"
+
+
+def test_kpis_auto_approved_delta(client: TestClient, session: Session) -> None:
+    """The auto-approved delta compares the last-24h cohort against the preceding 24h."""
+    ext = _make_extraction(session, hash_suffix="kad", field_confidence={"a": 0.9})
+    # Preceding 24–48h window: 2 items, 1 auto-approved => rate 0.50.
+    _make_workflow_item(
+        session,
+        extraction_id=ext.id,
+        idem_suffix="p1",
+        status=WorkflowStatus.AUTO_APPROVED,
+        age_hours=36,
+    )
+    _make_workflow_item(
+        session,
+        extraction_id=ext.id,
+        idem_suffix="p2",
+        status=WorkflowStatus.NEEDS_REVIEW,
+        age_hours=36,
+    )
+    # Last-24h window: 4 items, 3 auto-approved => rate 0.75.
+    for i in range(3):
+        _make_workflow_item(
+            session,
+            extraction_id=ext.id,
+            idem_suffix=f"l{i}",
+            status=WorkflowStatus.AUTO_APPROVED,
+            age_hours=2,
+        )
+    _make_workflow_item(
+        session,
+        extraction_id=ext.id,
+        idem_suffix="lnr",
+        status=WorkflowStatus.NEEDS_REVIEW,
+        age_hours=2,
+    )
+
+    auto = next(
+        k for k in client.get("/dashboard/kpis").json()["kpis"] if k["key"] == "auto_approved_rate"
+    )
+    assert auto["delta"] == pytest.approx(0.25)  # 0.75 - 0.50
+    assert auto["delta_display"] == "+25.0pp"
+    assert auto["direction"] == "up"
+
+
+def test_kpis_rejects_invalid_threshold(client: TestClient) -> None:
+    assert client.get("/dashboard/kpis?threshold_hours=0").status_code == 422
+    assert client.get("/dashboard/kpis?threshold_hours=-1").status_code == 422
+    assert client.get("/dashboard/kpis?threshold_hours=10000").status_code == 422
+
+
 # --- shape sanity (frontend depends on these keys) ----------------------------------
 
 
@@ -243,6 +347,18 @@ def test_response_keys_match_frontend_contract(client: TestClient) -> None:
     sla = client.get("/dashboard/sla").json()
     assert set(sla.keys()) == {"threshold_hours", "total_needs_review", "over_sla", "buckets"}
     assert set(sla["buckets"][0].keys()) == {"label", "count"}
+    kpis = client.get("/dashboard/kpis").json()
+    assert set(kpis.keys()) == {"kpis", "threshold_hours", "generated_at"}
+    assert len(kpis["kpis"]) == 4
+    assert set(kpis["kpis"][0].keys()) == {
+        "key",
+        "label",
+        "value",
+        "display",
+        "delta",
+        "delta_display",
+        "direction",
+    }
 
 
 # --- silence the unused-import warning on Chunk (used by other test modules) -------

diff --git a/frontend/index.html b/frontend/index.html
@@ -3,8 +3,23 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <meta name="color-scheme" content="light dark" />
+    <meta name="color-scheme" content="dark light" />
+    <link rel="icon" href="/brand/sentinel-mark.svg" />
     <title>Sentinel</title>
+    <script>
+      // Set the theme before first paint so the persisted choice never flashes.
+      // Default is dark; mirrored by the useTheme hook in src/theme.ts.
+      (function () {
+        try {
+          document.documentElement.setAttribute(
+            "data-theme",
+            localStorage.getItem("sentinel-theme") || "dark",
+          );
+        } catch (e) {
+          document.documentElement.setAttribute("data-theme", "dark");
+        }
+      })();
+    </script>
   </head>
   <body>
     <div id="root"></div>