PatterAI · nicolotognoni · May 12, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,18 @@
 
 ## 0.6.1 (2026-05-12)
 
+### Changed — Dashboard percentile threshold raised back to 10 turns (with p50 fallback)
+
+The PR-#82 follow-up lowered the percentile sample threshold from 5 → 2 turns to keep the per-call detail pane in sync with the call-list column. In practice that produced misleading headline numbers on short calls: a live test with n=5 turns surfaced `p95=1977 ms` while `p50=309 ms` — the dashboard showed the 1977 ms outlier as "latency" because at n=5 the 95th-percentile collapses to "the single slowest turn" rather than a true tail estimate. Raised the threshold back to 10 (where p95 interpolates between samples 9 and 10 and starts being statistically meaningful), but instead of returning to a blank `—`, every surface now falls back to **p50** below the threshold and labels itself accordingly:
+
+- `LatencyPanel` (detail pane, pipeline calls): p95 boxes become `p50 round-trip (n<10)` / `p50 wait (n<10)` with a hover-tip explaining the fallback.
+- `MetricsPanel` (detail tab, realtime + pipeline): same treatment — `p50 (n<10)`, hover-tip, plus a footer line restating sample size.
+- `CallTable` "Latency" column (renamed from "p95 latency" since it now reports either): per-row fallback shows `<ms> (p50)` for short calls; header tooltip documents the rule.
+- App-level "Avg latency p95" card: now requires `>=10 turns/call AND >=3 qualifying calls` before showing a number; otherwise renders `—` rather than the prior "average of polluted per-call p95s" which would swing wildly when a single short call landed in the bucket.
+- Sparkline tooltips (`bucketHeadline` for `kind='latency'`): show `AVG LATENCY n/a (n<10 turns)` when no call in the bucket qualifies.
+
+The exported `MIN_TURNS_FOR_PERCENTILES` / `MIN_TURNS_FOR_P95_COLUMN` / `MIN_TURNS_FOR_AVG_P95` constants are kept in lockstep across `LatencyPanel.tsx`, `MetricsPanel.tsx`, `CallTable.tsx`, `Metric.tsx`, and `App.tsx` so the threshold is single-sourced. Files: `dashboard-app/src/components/LatencyPanel.tsx`, `dashboard-app/src/components/MetricsPanel.tsx`, `dashboard-app/src/components/CallTable.tsx`, `dashboard-app/src/components/Metric.tsx`, `dashboard-app/src/App.tsx`, `dashboard-app/src/App.test.ts` (new). Bundle re-synced to `libraries/{typescript,python}/.../dashboard/ui.html` via `dashboard-app/scripts/sync.mjs`.
+
 ### Changed — `StreamHandler` adopt-capability check now uses duck typing
 
 The TS realtime adopt branch in `stream-handler.ts` previously relied on `this.adapter instanceof OpenAIRealtimeAdapter` to gate the prewarm-handoff path. Switched to a duck-type check (`typeof adapter.adoptWebSocket === 'function'`) so the generic stream-handler module stays provider-agnostic on this hot path and matches the Python handler's `getattr(self._adapter, "adopt_websocket", None)` shape. Files: `libraries/typescript/src/stream-handler.ts`.

diff --git a/dashboard-app/src/App.test.ts b/dashboard-app/src/App.test.ts
@@ -0,0 +1,111 @@
+import { describe, expect, it } from 'vitest';
+import { avgP95 } from './App';
+import { bucketHeadline, type MetricBucket } from './components/Metric';
+import type { Call } from './components/CallTable';
+
+function makeCall(id: string, overrides: Partial<Call> = {}): Call {
+  return {
+    id,
+    status: 'ended',
+    direction: 'inbound',
+    from: `from-${id}`,
+    to: `to-${id}`,
+    carrier: 'twilio',
+    cost: {},
+    ...overrides,
+  };
+}
+
+function makeBucket(calls: Call[]): MetricBucket {
+  return {
+    height: 100,
+    calls,
+    fromMs: 0,
+    toMs: 60_000,
+  };
+}
+
+describe('avgP95 — cross-call headline gating', () => {
+  it('returns 0 when no calls have latencyP95', () => {
+    expect(avgP95([])).toBe(0);
+    expect(avgP95([makeCall('a')])).toBe(0);
+  });
+
+  it('returns 0 when no call has >=10 turns (avoids single-outlier headline)', () => {
+    // Three calls, all with latencyP95 but only short turn counts — pre-fix
+    // this would average all three; post-fix none qualify and we return 0
+    // so the UI can fall back to "—".
+    const calls = [
+      makeCall('a', { latencyP95: 1977, turnCount: 5 }),
+      makeCall('b', { latencyP95: 1500, turnCount: 7 }),
+      makeCall('c', { latencyP95: 1200, turnCount: 3 }),
+    ];
+    expect(avgP95(calls)).toBe(0);
+  });
+
+  it('returns 0 when fewer than 3 calls qualify (sample too thin)', () => {
+    // Two qualifying calls — below the 3-call minimum we still return 0
+    // because a 2-call average is too noisy for a headline number.
+    const calls = [
+      makeCall('a', { latencyP95: 400, turnCount: 12 }),
+      makeCall('b', { latencyP95: 500, turnCount: 15 }),
+      makeCall('c', { latencyP95: 1900, turnCount: 4 }), // disqualified
+    ];
+    expect(avgP95(calls)).toBe(0);
+  });
+
+  it('averages only calls with >=10 turns when 3+ qualify', () => {
+    const calls = [
+      makeCall('a', { latencyP95: 400, turnCount: 10 }),
+      makeCall('b', { latencyP95: 500, turnCount: 12 }),
+      makeCall('c', { latencyP95: 600, turnCount: 15 }),
+      makeCall('d', { latencyP95: 1977, turnCount: 5 }), // disqualified outlier
+    ];
+    // avg(400,500,600) = 500. The outlier is excluded.
+    expect(avgP95(calls)).toBe(500);
+  });
+
+  it('rounds the average to an integer', () => {
+    const calls = [
+      makeCall('a', { latencyP95: 401, turnCount: 10 }),
+      makeCall('b', { latencyP95: 501, turnCount: 12 }),
+      makeCall('c', { latencyP95: 601, turnCount: 15 }),
+    ];
+    expect(avgP95(calls)).toBe(501); // 1503 / 3 = 501.0
+  });
+});
+
+describe('bucketHeadline — latency sparkline tooltip', () => {
+  it('shows "n/a (n<10 turns)" when no call in bucket has enough turns', () => {
+    const bucket = makeBucket([
+      makeCall('a', { latencyP95: 1977, turnCount: 5 }),
+      makeCall('b', { latencyP95: 1500, turnCount: 7 }),
+    ]);
+    expect(bucketHeadline(bucket, 'latency')).toEqual({
+      label: 'AVG LATENCY',
+      value: 'n/a (n<10 turns)',
+    });
+  });
+
+  it('averages only qualifying calls when at least one has >=10 turns', () => {
+    const bucket = makeBucket([
+      makeCall('a', { latencyP95: 400, turnCount: 10 }),
+      makeCall('b', { latencyP95: 1977, turnCount: 4 }), // excluded
+    ]);
+    expect(bucketHeadline(bucket, 'latency')).toEqual({
+      label: 'AVG LATENCY',
+      value: '400 ms',
+    });
+  });
+
+  it('still reports CALLS count headline for kind=count regardless of turns', () => {
+    const bucket = makeBucket([
+      makeCall('a', { latencyP95: 1977, turnCount: 3 }),
+      makeCall('b', { turnCount: 2 }),
+    ]);
+    expect(bucketHeadline(bucket, 'count')).toEqual({
+      label: 'CALLS',
+      value: '2',
+    });
+  });
+});
diff --git a/dashboard-app/src/App.tsx b/dashboard-app/src/App.tsx
@@ -24,11 +24,25 @@ const RANGE_LABEL: Record<RangeKey, string> = {
   All: 'all-time',
 };
 
-function avgP95(calls: readonly Call[]): number {
-  const withLat = calls.filter((c) => typeof c.latencyP95 === 'number');
-  if (withLat.length === 0) return 0;
-  const total = withLat.reduce((s, c) => s + (c.latencyP95 ?? 0), 0);
-  return Math.round(total / withLat.length);
+// Headline "Avg latency p95" is the cross-call mean of each call's own p95.
+// A per-call p95 with <10 turns is statistically dominated by a single slow
+// turn (observed: n=5 call with p95=1977ms vs p50=309ms), so including such
+// calls in the average makes the dashboard headline swing wildly. We require
+// >=10 turns per call AND >=3 qualifying calls in the bucket before showing
+// a number — below that, the caller should look at per-call detail rather
+// than trust an aggregate.
+export const MIN_TURNS_FOR_AVG_P95 = 10;
+export const MIN_CALLS_FOR_AVG_P95 = 3;
+
+export function avgP95(calls: readonly Call[]): number {
+  const qualifying = calls.filter(
+    (c) =>
+      typeof c.latencyP95 === 'number' &&
+      (c.turnCount ?? 0) >= MIN_TURNS_FOR_AVG_P95,
+  );
+  if (qualifying.length < MIN_CALLS_FOR_AVG_P95) return 0;
+  const total = qualifying.reduce((s, c) => s + (c.latencyP95 ?? 0), 0);
+  return Math.round(total / qualifying.length);
 }
 
 function totalSpend(calls: readonly Call[]): number {
@@ -121,9 +135,14 @@ export function App() {
   // Headline counters reflect the active range (Total / Latency / Spend),
   // except "Active now" which is always the current live count.
   const totalCount = filteredCalls.length;
-  const rangeAvgP95 = avgP95(filteredCalls) || aggregates?.avg_latency_ms || 0;
+  // avgP95 returns 0 when too few calls qualify for a stable headline; in
+  // that case prefer the server-side aggregate (if present) and only fall
+  // back to "—" so the card doesn't claim "0 ms" — a number that looks like
+  // real data but is just the empty-state.
+  const rangeAvgP95Raw = avgP95(filteredCalls) || aggregates?.avg_latency_ms || 0;
   const rangeSpend = totalSpend(filteredCalls) || aggregates?.total_cost || 0;
   const phoneNumber = pickPhoneNumber(calls);
+  const hasStableAvgP95 = rangeAvgP95Raw > 0;
 
   const sparkTotalCalls = useMemo(
     () => computeSparkline(filteredCalls, 'totalCalls', strategy),
@@ -179,8 +198,8 @@ export function App() {
           />
           <Metric
             label="Avg latency p95"
-            value={rangeAvgP95 || 0}
-            unit="ms"
+            value={hasStableAvgP95 ? rangeAvgP95Raw : '—'}
+            unit={hasStableAvgP95 ? 'ms' : undefined}
             spark={sparkLatency.heights}
             buckets={toBuckets(sparkLatency)}
             onSelectCall={setSelectedId}

diff --git a/dashboard-app/src/components/CallTable.tsx b/dashboard-app/src/components/CallTable.tsx
@@ -2,6 +2,11 @@ import { useMemo } from 'react';
 import { fmtDuration, fmtPhone, fmtCostUSD } from './format';
 import { IconArrowDown, IconArrowUp, IconSearch } from './icons';
 
+// Kept in sync with LatencyPanel / MetricsPanel: below 10 turns p95 is
+// noise (one slow turn dominates the headline), so this column shows p50
+// instead and the cell labels which statistic it reports.
+export const MIN_TURNS_FOR_P95_COLUMN = 10;
+
 export interface CallCost {
   telco?: number;
   llm?: number;
@@ -65,8 +70,18 @@ function CallRow({ call, isSelected, onSelect, isNew }: CallRowProps) {
       ? fmtDuration((Date.now() - call.durationStart) / 1000)
       : fmtDuration(call.duration || 0);
 
-  const latPct = call.latencyP95 ? Math.min(100, (call.latencyP95 / 1000) * 100) : 0;
-  const warn = (call.latencyP95 ?? 0) > 600;
+  // Fall back to p50 below the sample threshold so the column still shows
+  // something representative for short calls without claiming a p95 that's
+  // really just the slowest of 3-4 turns.
+  const turns = call.turnCount ?? 0;
+  const usePct95 = turns >= MIN_TURNS_FOR_P95_COLUMN;
+  const latencyValue = usePct95 ? call.latencyP95 : call.latencyP50 ?? call.latencyP95;
+  const latLabel = usePct95 ? 'p95' : 'p50';
+  const latPct = latencyValue ? Math.min(100, (latencyValue / 1000) * 100) : 0;
+  const warn = (latencyValue ?? 0) > 600;
+  const latencyTooltip = usePct95
+    ? undefined
+    : `p95 hidden until ≥${MIN_TURNS_FOR_P95_COLUMN} turns — showing p50 instead (n=${turns})`;
 
   const totalCost =
     call.cost.total ??
@@ -103,13 +118,18 @@ function CallRow({ call, isSelected, onSelect, isNew }: CallRowProps) {
         </span>
       </td>
       <td className="num-cell">{call.status === 'no-answer' ? '—' : dur}</td>
-      <td>
-        {call.latencyP95 ? (
+      <td title={latencyTooltip}>
+        {latencyValue ? (
           <>
             <span className={'lat-bar' + (warn ? ' warn' : '')}>
               <i style={{ width: latPct + '%' }} />
             </span>
-            <span className="num-cell">{call.latencyP95} ms</span>
+            <span className="num-cell">
+              {latencyValue} ms
+              {!usePct95 && (
+                <span style={{ marginLeft: 4, opacity: 0.55, fontSize: 10 }}>({latLabel})</span>
+              )}
+            </span>
           </>
         ) : (
           '—'
@@ -187,7 +207,11 @@ export function CallTable({
               <th>From → To</th>
               <th>Carrier</th>
               <th>Duration</th>
-              <th>p95 latency</th>
+              <th
+                title={`p95 latency. Calls with <${MIN_TURNS_FOR_P95_COLUMN} turns fall back to p50 (marked) since p95 is dominated by a single outlier turn at low sample counts.`}
+              >
+                Latency
+              </th>
               <th>Cost</th>
             </tr>
           </thead>

diff --git a/dashboard-app/src/components/LatencyPanel.tsx b/dashboard-app/src/components/LatencyPanel.tsx
@@ -4,15 +4,27 @@ export interface LatencyPanelProps {
   call: Call | null;
 }
 
-// 2 turn = almeno 1 turn user genuino oltre al firstMessage. Sotto a 2 i
-// percentili sono privi di senso (un singolo campione). Sopra a 2 sono
-// statisticamente magri ma informativi — meglio mostrarli che lasciare il
-// pannello con "—" quando la tabella sopra mostra già una p95 dal fallback
-// ad avg.
-const MIN_TURNS_FOR_PERCENTILES = 2;
+// With <10 samples p95 is dominated by a single outlier turn — observed on
+// a real n=5 call where p95=1977ms but p50=309ms, making the headline
+// number misleading. 10 turns is the threshold where p95 becomes a stable
+// signal (95th percentile = 9.5th-ranked sample, so at n=10 it interpolates
+// between the two slowest turns rather than reporting the absolute slowest).
+// Below the threshold we show p50 instead — robust, single-sample-resistant,
+// and labelled so the user knows why.
+const MIN_TURNS_FOR_PERCENTILES = 10;
 
 export function LatencyPanel({ call }: LatencyPanelProps) {
-  if (!call || (!call.latencyP95 && !call.agentResponseP95)) return null;
+  if (!call) return null;
+  // Hide the panel entirely when there is no latency signal at all (neither
+  // p50 nor p95 on either metric). Below the percentile threshold we still
+  // render — falling back to p50 — so a 1-2 turn call with measured timings
+  // does not show a blank pane.
+  const hasAnyLatency =
+    call.latencyP50 != null ||
+    call.latencyP95 != null ||
+    call.agentResponseP50 != null ||
+    call.agentResponseP95 != null;
+  if (!hasAnyLatency) return null;
 
   const stt = call.sttAvg ?? 0;
   const llm = call.llmAvg ?? 0;
@@ -24,42 +36,68 @@ export function LatencyPanel({ call }: LatencyPanelProps) {
   const showPercentiles = turns >= MIN_TURNS_FOR_PERCENTILES;
   const dash = '—';
 
+  const lowSampleHint = `p95 hidden until ≥${MIN_TURNS_FOR_PERCENTILES} turns — showing p50 instead (n=${turns})`;
+
   return (
     <div className="rr-card">
       <h3 style={{ marginBottom: 14 }}>Latency · this call</h3>
       <div className="lat-grid">
         <div className="latbox">
           <div className="l">p50 round-trip</div>
           <div className="v">
-            {showPercentiles ? call.latencyP50 ?? dash : dash}
-            {showPercentiles && <span className="u">ms</span>}
+            {call.latencyP50 ?? dash}
+            {call.latencyP50 != null && <span className="u">ms</span>}
           </div>
         </div>
-        <div className={'latbox' + (showPercentiles && (call.latencyP95 ?? 0) > 600 ? ' warn' : '')}>
-          <div className="l">p95 round-trip</div>
+        <div
+          className={
+            'latbox' + (showPercentiles && (call.latencyP95 ?? 0) > 600 ? ' warn' : '')
+          }
+          title={showPercentiles ? undefined : lowSampleHint}
+        >
+          <div className="l">
+            {showPercentiles
+              ? 'p95 round-trip'
+              : `p50 round-trip (n<${MIN_TURNS_FOR_PERCENTILES})`}
+          </div>
           <div className="v">
-            {showPercentiles ? call.latencyP95 ?? dash : dash}
-            {showPercentiles && <span className="u">ms</span>}
+            {showPercentiles ? call.latencyP95 ?? dash : call.latencyP50 ?? dash}
+            {(showPercentiles ? call.latencyP95 : call.latencyP50) != null && (
+              <span className="u">ms</span>
+            )}
           </div>
         </div>
         <div className="latbox">
           <div className="l">p50 wait</div>
           <div className="v">
-            {showPercentiles ? call.agentResponseP50 ?? dash : dash}
-            {showPercentiles && <span className="u">ms</span>}
+            {call.agentResponseP50 ?? dash}
+            {call.agentResponseP50 != null && <span className="u">ms</span>}
           </div>
         </div>
-        <div className={'latbox' + (showPercentiles && (call.agentResponseP95 ?? 0) > 600 ? ' warn' : '')}>
-          <div className="l">p95 wait</div>
+        <div
+          className={
+            'latbox' +
+            (showPercentiles && (call.agentResponseP95 ?? 0) > 600 ? ' warn' : '')
+          }
+          title={showPercentiles ? undefined : lowSampleHint}
+        >
+          <div className="l">
+            {showPercentiles ? 'p95 wait' : `p50 wait (n<${MIN_TURNS_FOR_PERCENTILES})`}
+          </div>
           <div className="v">
-            {showPercentiles ? call.agentResponseP95 ?? dash : dash}
-            {showPercentiles && <span className="u">ms</span>}
+            {showPercentiles
+              ? call.agentResponseP95 ?? dash
+              : call.agentResponseP50 ?? dash}
+            {(showPercentiles ? call.agentResponseP95 : call.agentResponseP50) != null && (
+              <span className="u">ms</span>
+            )}
           </div>
         </div>
       </div>
       {!showPercentiles && (
         <div style={{ marginTop: -6, marginBottom: 10, fontSize: 11, opacity: 0.6 }}>
-          {turns} {turns === 1 ? 'turn' : 'turns'} — percentiles need ≥{MIN_TURNS_FOR_PERCENTILES}
+          {turns} {turns === 1 ? 'turn' : 'turns'} — p95 hidden until ≥
+          {MIN_TURNS_FOR_PERCENTILES}, showing p50
         </div>
       )}