klappy · klappy · Apr 26, 2026 · Apr 26, 2026 · Apr 26, 2026 · Apr 26, 2026
diff --git a/workers/src/index.ts b/workers/src/index.ts
@@ -444,13 +444,14 @@ Schema:
   knowledge_base_url — which knowledge base is being served
   document_uri      — for get calls, the klappy:// URI requested
   worker_version    — oddkit version string
-  cache_tier        — which storage tier served the index
   count             — always 1 (use SUM for aggregation)
   duration_ms       — request processing time (full wall-clock at worker edge)
   bytes_in          — UTF-8 byte length of the request body
   bytes_out         — UTF-8 byte length of the response body (0 for SSE streams)
   tokens_in         — cl100k_base token count of the request body
   tokens_out        — cl100k_base token count of the response body
+  cache_hits        — count of fetches in the request that hit a cache tier
+  cache_lookups     — total fetches in the request (denominator for hit rate)
   index1            — sampling key (consumer label)
 
 Use SUM(_sample_interval) instead of COUNT(*) to account for Analytics Engine sampling.
@@ -979,7 +980,9 @@ export default {
       const response = await handler(request, env, ctx);
 
       // Phase 1 telemetry — non-blocking, fire-and-forget (E0008)
-      // Phase 1.5: cache_tier from tracer feeds blob9 (E0008.1)
+      // Phase 1.5: cache_tier interpreter retired (PR refactor/retire-indexsource-interpreter).
+      // The tracer now records per-fetch facts; recordTelemetry receives the
+      // arithmetic (cache_hits, cache_lookups) instead of a single "winner" tier.
       // Phase 2: payload shape (bytes_in/out, tokens_in/out) feeds doubles
       // 3-6. tokenize_ms was tried and dropped — Workers freezes both
       // performance.now() and Date.now() during synchronous CPU work, making
@@ -989,15 +992,15 @@ export default {
       // response. The helper handles clone failures safely.
       if (telemetryClone) {
         const durationMs = Date.now() - startTime;
-        // NOTE: Do NOT read tracer.indexSource here. The MCP handler returns
+        // NOTE: Do NOT read tracer.cacheStats here. The MCP handler returns
         // a streaming Response — `await handler(...)` resolves with the
         // Response object before the tool handler closure has finished
-        // running, so the tracer has not yet recorded the `index` span at
-        // this point. Reading here yields "none" for every tool. The tracer
-        // is only fully populated once the response body has been consumed
-        // (which forces the streaming tool handler to complete). The read
-        // therefore happens inside the waitUntil callback below, after
-        // `await responseClone.text()` resolves.
+        // running, so the tracer has not yet recorded any fetch records at
+        // this point. Reading here yields {hits:0,total:0} for every tool.
+        // The tracer is only fully populated once the response body has been
+        // consumed (which forces the streaming tool handler to complete).
+        // The read therefore happens inside the waitUntil callback below,
+        // after `await responseClone.text()` resolves. (PR #138 fix retained.)
         // Clone the response synchronously before returning so the body is
         // still available to read inside the deferred waitUntil callback.
         const responseClone = response.clone();
@@ -1016,15 +1019,15 @@ export default {
               } catch {
                 // Fall through with empty string; bytes_out / tokens_out will be 0.
               }
-              // Read tracer.indexSource AFTER the response body has been
+              // Read tracer.cacheStats AFTER the response body has been
               // consumed. By this point the streaming tool handler has
-              // completed and any "index" / "index-build" spans have been
-              // recorded. Reading earlier (e.g. immediately after `await
-              // handler()` returned) was the streaming-race bug that caused
-              // every tool call to record cache_tier="none" in production.
-              const cacheTier = tracer.indexSource;
+              // completed and any storage-tier fetches have been recorded.
+              // Reading earlier (e.g. immediately after `await handler()`
+              // returned) was the streaming-race bug that caused every tool
+              // call to record cache_tier="none" in production (PR #138).
+              const stats = tracer.cacheStats;
               const shape = await measurePayloadShape(requestText, responseText);
-              recordTelemetry(request, requestText, env, durationMs, cacheTier, shape);
+              recordTelemetry(request, requestText, env, durationMs, stats, shape);
             } catch {
               // Telemetry must never break MCP requests
             }

diff --git a/workers/src/telemetry.ts b/workers/src/telemetry.ts
@@ -17,7 +17,10 @@
  *                            (deploy-time injection) with a build-time fallback
  *                            to workers/package.json::version. Never "unknown"
  *                            on a normal deploy.
- *   blob9: cache_tier    — which storage tier served the index (E0008.1)
+ *   blob9: RETIRED         — was cache_tier (E0008.1, removed in the
+ *                            retire-indexsource-interpreter refactor). Slot
+ *                            stays free; cache effectiveness is now derived
+ *                            from double7/double8.
  *   double1: count        — always 1 (for SUM aggregation)
  *   double2: duration_ms  — Full MCP request wall-clock, measured at the worker
  *                            edge from request entry through handler return.
@@ -39,6 +42,12 @@
  *                            0 when tokenization was skipped or failed.
  *   double6: tokens_out   — cl100k_base token count of the response body. 0 for
  *                            streamed responses or tokenizer failure.
+ *   double7: cache_hits   — count of per-fetch records in the request whose
+ *                            `cached` flag was true. Sourced from
+ *                            tracer.cacheStats.hits.
+ *   double8: cache_lookups — total per-fetch records in the request (the
+ *                            denominator for hit rate). Sourced from
+ *                            tracer.cacheStats.total.
  *
  *   NOTE: a previous iteration shipped a `double7: tokenize_ms` field intended
  *   to capture the wall-clock cost of tokenization for bench-vs-prod
@@ -236,7 +245,7 @@ export function recordTelemetry(
   requestBody: string,
   env: Env,
   durationMs: number,
-  cacheTier?: string,
+  cacheStats?: { hits: number; total: number },
   shape?: PayloadShape | null,
 ): void {
   if (!env.ODDKIT_TELEMETRY) return;
@@ -258,6 +267,8 @@ export function recordTelemetry(
   const bytesOut = shape?.bytes_out ?? 0;
   const tokensIn = shape?.tokens_in ?? 0;
   const tokensOut = shape?.tokens_out ?? 0;
+  const cacheHits = cacheStats?.hits ?? 0;
+  const cacheLookups = cacheStats?.total ?? 0;
 
   for (const payload of messages) {
     const { label: consumerLabel, source: consumerSource } = parseConsumerLabel(
@@ -286,7 +297,9 @@ export function recordTelemetry(
         toolCall?.knowledgeBaseUrl || env.DEFAULT_KNOWLEDGE_BASE_URL || "",
         documentUri,
         env.ODDKIT_VERSION || BUILD_VERSION,
-        cacheTier || "none", // blob9: E0008.1 x-ray cache tier
+        // blob9 retired (was cache_tier). Slot stays free per the
+        // "no deprecation, nobody uses them yet" rule. Cache effectiveness
+        // moved to double7/double8.
       ],
       doubles: [
         1,                // double1: count
@@ -295,6 +308,8 @@ export function recordTelemetry(
         bytesOut,         // double4: bytes_out
         tokensIn,         // double5: tokens_in
         tokensOut,        // double6: tokens_out
+        cacheHits,        // double7: cache_hits
+        cacheLookups,     // double8: cache_lookups
       ],
       indexes: [consumerLabel],
     });
@@ -339,7 +354,9 @@ const BASELINE_BLOB_SEMANTIC_NAMES = [
   "knowledge_base_url", // blob6
   "document_uri",     // blob7
   "worker_version",   // blob8
-  "cache_tier",       // blob9
+  // blob9 retired (was cache_tier). Slot stays free per the
+  // "no deprecation, nobody uses them yet" rule. Hit-rate moved to
+  // double7/double8.
 ] as const;
 
 /**
@@ -348,12 +365,14 @@ const BASELINE_BLOB_SEMANTIC_NAMES = [
  * which are parseable at runtime. Baseline is the safety net.
  */
 const BASELINE_DOUBLE_SEMANTIC_NAMES = [
-  "count",       // double1
-  "duration_ms", // double2
-  "bytes_in",    // double3
-  "bytes_out",   // double4
-  "tokens_in",   // double5
-  "tokens_out",  // double6
+  "count",         // double1
+  "duration_ms",   // double2
+  "bytes_in",      // double3
+  "bytes_out",     // double4
+  "tokens_in",     // double5
+  "tokens_out",    // double6
+  "cache_hits",    // double7
+  "cache_lookups", // double8
 ] as const;
 
 /** Build a SchemaMap from ordered blob/double name arrays. Exported for unit testing. */

diff --git a/workers/src/tracing.ts b/workers/src/tracing.ts
@@ -1,12 +1,19 @@
 /**
  * Lightweight edge-compatible request tracer for X-Ray performance diagnostics.
  *
- * Adapted from aquifer-mcp's RequestTracer (which was modeled on
- * translation-helps-mcp's EdgeXRayTracer). Records every I/O operation
- * with timing, source tier, and optional detail.
+ * Adapted from translation-helps-mcp's EdgeXRayTracer. Records every fetch
+ * (storage tier access or network call) as a per-fetch fact with timing,
+ * cached boolean, optional status and size. The URL prefix carries the tier
+ * (memory:// cf-cache:// r2:// build:// or a real https:// URL); there is no
+ * separate `source` field on FetchRecord — `cached` is the primary fact, the
+ * URL is the breadcrumb.
  *
- * Usage: create one RequestTracer per inbound request, thread it through
- * storage reads and tool handlers, then serialize via toHeader() or toJSON().
+ * Telemetry derives cache_hits and cache_lookups from the per-fetch records
+ * via the `cacheStats` getter — the dashboard does the aggregation, not the
+ * tracer. There is no interpretation layer that picks a "winning" tier.
+ *
+ * `addSpan` is retained for non-fetcher events (action timing, sha:* SHA
+ * resolution, anything that is not a storage tier read).
  *
  * Part of E0008: Observability epoch.
  */
@@ -18,46 +25,65 @@ export interface TraceSpan {
   detail?: string;
 }
 
+export interface FetchRecord {
+  url: string;
+  duration_ms: number;
+  cached: boolean;
+  status?: number;
+  size?: number;
+}
+
+export interface CacheStats {
+  hits: number;
+  misses: number;
+  total: number;
+}
+
 export class RequestTracer {
   private spans: TraceSpan[] = [];
+  private fetches: FetchRecord[] = [];
   private startTime: number;
-  private _indexSource: TraceSpan["source"] | null = null;
 
   constructor() {
     this.startTime = performance.now();
   }
 
-  /** Record a span with explicit timing, source, and detail. */
+  /**
+   * Record a non-fetcher span (action timing, SHA resolution, etc.).
+   * Storage tier reads should use `recordFetch` instead so they roll into
+   * the `cacheStats` arithmetic.
+   */
   addSpan(label: string, duration_ms: number, source?: TraceSpan["source"], detail?: string): void {
     this.spans.push({
       label,
       duration_ms: Math.round(duration_ms),
       ...(source ? { source } : {}),
       ...(detail ? { detail } : {}),
     });
+  }
 
-    // Track the primary cache tier for telemetry (first span matching a data
-    // fetch). Three label families count:
-    //   - "index" / "index-build"  → navigability index fetch (search/orient/etc.)
-    //   - "file:*"                 → individual file fetch (oddkit_get fast path)
-    // First-wins: actions like runSearch call getIndex *before* getFile, so
-    // the index tier wins for those — file:* spans that fire later are
-    // ignored. Actions like runGet for klappy:// URIs call getFile only,
-    // so the file tier wins. file-r2:* (r2 miss with source="miss") is
-    // excluded because "miss" is not a tier.
-    if (!this._indexSource && source && source !== "miss") {
-      if (
-        label === "index" ||
-        label === "index-build" ||
-        label.startsWith("file:")
-      ) {
-        this._indexSource = source;
-      }
-    }
+  /**
+   * Record one storage-tier fetch as a per-fetch fact. URL prefix carries
+   * the tier:
+   *   - memory://path     → module-level cache hit (always cached: true)
+   *   - cf-cache://key    → Cloudflare Cache API
+   *   - r2://path         → R2 durable storage
+   *   - build://path      → cold rebuild from ZIP
+   *   - https://...       → real network fetch (status, size populated)
+   *
+   * `cached: true` for hits, `cached: false` for misses and live fetches.
+   * `cacheStats` aggregates these into hit/miss/total counts; the dashboard
+   * does any further per-tier breakdown via debug.trace.fetches[].
+   */
+  recordFetch(record: FetchRecord): void {
+    this.fetches.push({
+      ...record,
+      duration_ms: Math.round(record.duration_ms),
+    });
   }
 
   /**
-   * Time an async operation and record it as a span.
+   * Time an async operation and record it as a (non-fetch) span.
    * Returns the operation's result.
    */
   async trace<T>(
@@ -78,22 +104,16 @@ export class RequestTracer {
   }
 
   /**
-   * Which storage tier served the primary data fetch for this request.
-   * This is the single summary value that feeds telemetry blob9 (cache_tier).
-   * "memory" = module-level cache hit (0ms, best case)
-   * "cache"  = Cache API edge hit (~1ms)
-   * "r2"     = R2 durable storage read (~40ms)
-   * "build"  = cold build from ZIP (seconds, worst case)
-   * "github" = GitHub network fetch (when no R2/cache layers exist)
-   * "none"   = no data fetch happened (e.g. version, time actions)
-   *
-   * The value reflects the primary fetch — for actions like search/orient
-   * that load the navigability index first, this is the index tier. For
-   * oddkit_get with a klappy:// URI (the fast path, no index needed), this
-   * is the file fetch tier. Either way: where did the work come from?
+   * Cache-hit arithmetic over the recorded fetches. `total` is the number of
+   * fetches; `hits` is the count where `cached === true`. This is what
+   * telemetry uses to populate `cache_hits` and `cache_lookups` doubles.
+   * Replaces the retired `indexSource` interpretation: no winner is chosen,
+   * the dashboard computes hit-rate as `SUM(cache_hits) / SUM(cache_lookups)`.
    */
-  get indexSource(): string {
-    return this._indexSource ?? "none";
+  get cacheStats(): CacheStats {
+    let hits = 0;
+    for (const f of this.fetches) if (f.cached) hits++;
+    return { hits, misses: this.fetches.length - hits, total: this.fetches.length };
   }
 
   /** Total elapsed time since tracer creation. */
@@ -109,27 +129,36 @@ export class RequestTracer {
       if (s.detail) val += `[${s.detail}]`;
       return val;
     });
+    for (const f of this.fetches) {
+      const tag = f.cached ? "hit" : "miss";
+      parts.push(`fetch=${f.duration_ms}ms(${tag})[${f.url}]`);
+    }
     parts.push(`total=${this.elapsed_ms}ms`);
     return parts.join(", ");
   }
 
   /** Structured JSON for debug envelope inclusion. */
-  toJSON(): { spans: TraceSpan[]; total_ms: number; index_source: string } {
-    return { spans: [...this.spans], total_ms: this.elapsed_ms, index_source: this.indexSource };
+  toJSON(): {
+    spans: TraceSpan[];
+    fetches: FetchRecord[];
+    cacheStats: CacheStats;
+    total_ms: number;
+  } {
+    return {
+      spans: [...this.spans],
+      fetches: [...this.fetches],
+      cacheStats: this.cacheStats,
+      total_ms: this.elapsed_ms,
+    };
   }
 
-  /** Number of recorded spans. */
+  /** Number of recorded spans (non-fetch). */
   get spanCount(): number {
     return this.spans.length;
   }
-}
 
-/** Shorten a cache/storage key for readable trace output. */
-export function shortKey(key: string): string {
-  const parts = key.split("/");
-  if (parts.length <= 2) return key;
-  const mid = parts.slice(1, -1).map((p) => (p.length > 10 ? p.slice(0, 10) + "…" : p)).join("/");
-  const last = parts[parts.length - 1]!;
-  const shortLast = last.length > 12 ? last.slice(0, 12) + "…" : last;
-  return `${parts[0]}/${mid}/${shortLast}`;
+  /** Number of recorded fetches. */
+  get fetchCount(): number {
+    return this.fetches.length;
+  }
 }