diff --git a/workers/src/index.ts b/workers/src/index.ts index a8db22c..ec43520 100644 --- a/workers/src/index.ts +++ b/workers/src/index.ts @@ -444,13 +444,14 @@ Schema: knowledge_base_url — which knowledge base is being served document_uri — for get calls, the klappy:// URI requested worker_version — oddkit version string - cache_tier — which storage tier served the index count — always 1 (use SUM for aggregation) duration_ms — request processing time (full wall-clock at worker edge) bytes_in — UTF-8 byte length of the request body bytes_out — UTF-8 byte length of the response body (0 for SSE streams) tokens_in — cl100k_base token count of the request body tokens_out — cl100k_base token count of the response body + cache_hits — count of fetches in the request that hit a cache tier + cache_lookups — total fetches in the request (denominator for hit rate) index1 — sampling key (consumer label) Use SUM(_sample_interval) instead of COUNT(*) to account for Analytics Engine sampling. @@ -979,7 +980,9 @@ export default { const response = await handler(request, env, ctx); // Phase 1 telemetry — non-blocking, fire-and-forget (E0008) - // Phase 1.5: cache_tier from tracer feeds blob9 (E0008.1) + // Phase 1.5: cache_tier interpreter retired (PR refactor/retire-indexsource-interpreter). + // The tracer now records per-fetch facts; recordTelemetry receives the + // arithmetic (cache_hits, cache_lookups) instead of a single "winner" tier. // Phase 2: payload shape (bytes_in/out, tokens_in/out) feeds doubles // 3-6. tokenize_ms was tried and dropped — Workers freezes both // performance.now() and Date.now() during synchronous CPU work, making @@ -989,15 +992,15 @@ export default { // response. The helper handles clone failures safely. if (telemetryClone) { const durationMs = Date.now() - startTime; - // NOTE: Do NOT read tracer.indexSource here. The MCP handler returns + // NOTE: Do NOT read tracer.cacheStats here. The MCP handler returns // a streaming Response — `await handler(...)` resolves with the // Response object before the tool handler closure has finished - // running, so the tracer has not yet recorded the `index` span at - // this point. Reading here yields "none" for every tool. The tracer - // is only fully populated once the response body has been consumed - // (which forces the streaming tool handler to complete). The read - // therefore happens inside the waitUntil callback below, after - // `await responseClone.text()` resolves. + // running, so the tracer has not yet recorded any fetch records at + // this point. Reading here yields {hits:0,total:0} for every tool. + // The tracer is only fully populated once the response body has been + // consumed (which forces the streaming tool handler to complete). + // The read therefore happens inside the waitUntil callback below, + // after `await responseClone.text()` resolves. (PR #138 fix retained.) // Clone the response synchronously before returning so the body is // still available to read inside the deferred waitUntil callback. const responseClone = response.clone(); @@ -1016,15 +1019,15 @@ export default { } catch { // Fall through with empty string; bytes_out / tokens_out will be 0. } - // Read tracer.indexSource AFTER the response body has been + // Read tracer.cacheStats AFTER the response body has been // consumed. By this point the streaming tool handler has - // completed and any "index" / "index-build" spans have been - // recorded. Reading earlier (e.g. immediately after `await - // handler()` returned) was the streaming-race bug that caused - // every tool call to record cache_tier="none" in production. - const cacheTier = tracer.indexSource; + // completed and any storage-tier fetches have been recorded. + // Reading earlier (e.g. immediately after `await handler()` + // returned) was the streaming-race bug that caused every tool + // call to record cache_tier="none" in production (PR #138). + const stats = tracer.cacheStats; const shape = await measurePayloadShape(requestText, responseText); - recordTelemetry(request, requestText, env, durationMs, cacheTier, shape); + recordTelemetry(request, requestText, env, durationMs, stats, shape); } catch { // Telemetry must never break MCP requests } diff --git a/workers/src/telemetry.ts b/workers/src/telemetry.ts index 07f5413..6753da5 100644 --- a/workers/src/telemetry.ts +++ b/workers/src/telemetry.ts @@ -17,7 +17,10 @@ * (deploy-time injection) with a build-time fallback * to workers/package.json::version. Never "unknown" * on a normal deploy. - * blob9: cache_tier — which storage tier served the index (E0008.1) + * blob9: RETIRED — was cache_tier (E0008.1, removed in the + * retire-indexsource-interpreter refactor). Slot + * stays free; cache effectiveness is now derived + * from double7/double8. * double1: count — always 1 (for SUM aggregation) * double2: duration_ms — Full MCP request wall-clock, measured at the worker * edge from request entry through handler return. @@ -39,6 +42,12 @@ * 0 when tokenization was skipped or failed. * double6: tokens_out — cl100k_base token count of the response body. 0 for * streamed responses or tokenizer failure. + * double7: cache_hits — count of per-fetch records in the request whose + * `cached` flag was true. Sourced from + * tracer.cacheStats.hits. + * double8: cache_lookups — total per-fetch records in the request (the + * denominator for hit rate). Sourced from + * tracer.cacheStats.total. * * NOTE: a previous iteration shipped a `double7: tokenize_ms` field intended * to capture the wall-clock cost of tokenization for bench-vs-prod @@ -236,7 +245,7 @@ export function recordTelemetry( requestBody: string, env: Env, durationMs: number, - cacheTier?: string, + cacheStats?: { hits: number; total: number }, shape?: PayloadShape | null, ): void { if (!env.ODDKIT_TELEMETRY) return; @@ -258,6 +267,8 @@ export function recordTelemetry( const bytesOut = shape?.bytes_out ?? 0; const tokensIn = shape?.tokens_in ?? 0; const tokensOut = shape?.tokens_out ?? 0; + const cacheHits = cacheStats?.hits ?? 0; + const cacheLookups = cacheStats?.total ?? 0; for (const payload of messages) { const { label: consumerLabel, source: consumerSource } = parseConsumerLabel( @@ -286,7 +297,9 @@ export function recordTelemetry( toolCall?.knowledgeBaseUrl || env.DEFAULT_KNOWLEDGE_BASE_URL || "", documentUri, env.ODDKIT_VERSION || BUILD_VERSION, - cacheTier || "none", // blob9: E0008.1 x-ray cache tier + // blob9 retired (was cache_tier). Slot stays free per the + // "no deprecation, nobody uses them yet" rule. Cache effectiveness + // moved to double7/double8. ], doubles: [ 1, // double1: count @@ -295,6 +308,8 @@ export function recordTelemetry( bytesOut, // double4: bytes_out tokensIn, // double5: tokens_in tokensOut, // double6: tokens_out + cacheHits, // double7: cache_hits + cacheLookups, // double8: cache_lookups ], indexes: [consumerLabel], }); @@ -339,7 +354,9 @@ const BASELINE_BLOB_SEMANTIC_NAMES = [ "knowledge_base_url", // blob6 "document_uri", // blob7 "worker_version", // blob8 - "cache_tier", // blob9 + // blob9 retired (was cache_tier). Slot stays free per the + // "no deprecation, nobody uses them yet" rule. Hit-rate moved to + // double7/double8. ] as const; /** @@ -348,12 +365,14 @@ const BASELINE_BLOB_SEMANTIC_NAMES = [ * which are parseable at runtime. Baseline is the safety net. */ const BASELINE_DOUBLE_SEMANTIC_NAMES = [ - "count", // double1 - "duration_ms", // double2 - "bytes_in", // double3 - "bytes_out", // double4 - "tokens_in", // double5 - "tokens_out", // double6 + "count", // double1 + "duration_ms", // double2 + "bytes_in", // double3 + "bytes_out", // double4 + "tokens_in", // double5 + "tokens_out", // double6 + "cache_hits", // double7 + "cache_lookups", // double8 ] as const; /** Build a SchemaMap from ordered blob/double name arrays. Exported for unit testing. */ diff --git a/workers/src/tracing.ts b/workers/src/tracing.ts index 5a86183..74b07c2 100644 --- a/workers/src/tracing.ts +++ b/workers/src/tracing.ts @@ -1,12 +1,19 @@ /** * Lightweight edge-compatible request tracer for X-Ray performance diagnostics. * - * Adapted from aquifer-mcp's RequestTracer (which was modeled on - * translation-helps-mcp's EdgeXRayTracer). Records every I/O operation - * with timing, source tier, and optional detail. + * Adapted from translation-helps-mcp's EdgeXRayTracer. Records every fetch + * (storage tier access or network call) as a per-fetch fact with timing, + * cached boolean, optional status and size. The URL prefix carries the tier + * (memory:// cf-cache:// r2:// build:// or a real https:// URL); there is no + * separate `source` field on FetchRecord — `cached` is the primary fact, the + * URL is the breadcrumb. * - * Usage: create one RequestTracer per inbound request, thread it through - * storage reads and tool handlers, then serialize via toHeader() or toJSON(). + * Telemetry derives cache_hits and cache_lookups from the per-fetch records + * via the `cacheStats` getter — the dashboard does the aggregation, not the + * tracer. There is no interpretation layer that picks a "winning" tier. + * + * `addSpan` is retained for non-fetcher events (action timing, sha:* SHA + * resolution, anything that is not a storage tier read). * * Part of E0008: Observability epoch. */ @@ -18,16 +25,34 @@ export interface TraceSpan { detail?: string; } +export interface FetchRecord { + url: string; + duration_ms: number; + cached: boolean; + status?: number; + size?: number; +} + +export interface CacheStats { + hits: number; + misses: number; + total: number; +} + export class RequestTracer { private spans: TraceSpan[] = []; + private fetches: FetchRecord[] = []; private startTime: number; - private _indexSource: TraceSpan["source"] | null = null; constructor() { this.startTime = performance.now(); } - /** Record a span with explicit timing, source, and detail. */ + /** + * Record a non-fetcher span (action timing, SHA resolution, etc.). + * Storage tier reads should use `recordFetch` instead so they roll into + * the `cacheStats` arithmetic. + */ addSpan(label: string, duration_ms: number, source?: TraceSpan["source"], detail?: string): void { this.spans.push({ label, @@ -35,29 +60,30 @@ export class RequestTracer { ...(source ? { source } : {}), ...(detail ? { detail } : {}), }); + } - // Track the primary cache tier for telemetry (first span matching a data - // fetch). Three label families count: - // - "index" / "index-build" → navigability index fetch (search/orient/etc.) - // - "file:*" → individual file fetch (oddkit_get fast path) - // First-wins: actions like runSearch call getIndex *before* getFile, so - // the index tier wins for those — file:* spans that fire later are - // ignored. Actions like runGet for klappy:// URIs call getFile only, - // so the file tier wins. file-r2:* (r2 miss with source="miss") is - // excluded because "miss" is not a tier. - if (!this._indexSource && source && source !== "miss") { - if ( - label === "index" || - label === "index-build" || - label.startsWith("file:") - ) { - this._indexSource = source; - } - } + /** + * Record one storage-tier fetch as a per-fetch fact. URL prefix carries + * the tier: + * - memory://path → module-level cache hit (always cached: true) + * - cf-cache://key → Cloudflare Cache API + * - r2://path → R2 durable storage + * - build://path → cold rebuild from ZIP + * - https://... → real network fetch (status, size populated) + * + * `cached: true` for hits, `cached: false` for misses and live fetches. + * `cacheStats` aggregates these into hit/miss/total counts; the dashboard + * does any further per-tier breakdown via debug.trace.fetches[]. + */ + recordFetch(record: FetchRecord): void { + this.fetches.push({ + ...record, + duration_ms: Math.round(record.duration_ms), + }); } /** - * Time an async operation and record it as a span. + * Time an async operation and record it as a (non-fetch) span. * Returns the operation's result. */ async trace( @@ -78,22 +104,16 @@ export class RequestTracer { } /** - * Which storage tier served the primary data fetch for this request. - * This is the single summary value that feeds telemetry blob9 (cache_tier). - * "memory" = module-level cache hit (0ms, best case) - * "cache" = Cache API edge hit (~1ms) - * "r2" = R2 durable storage read (~40ms) - * "build" = cold build from ZIP (seconds, worst case) - * "github" = GitHub network fetch (when no R2/cache layers exist) - * "none" = no data fetch happened (e.g. version, time actions) - * - * The value reflects the primary fetch — for actions like search/orient - * that load the navigability index first, this is the index tier. For - * oddkit_get with a klappy:// URI (the fast path, no index needed), this - * is the file fetch tier. Either way: where did the work come from? + * Cache-hit arithmetic over the recorded fetches. `total` is the number of + * fetches; `hits` is the count where `cached === true`. This is what + * telemetry uses to populate `cache_hits` and `cache_lookups` doubles. + * Replaces the retired `indexSource` interpretation: no winner is chosen, + * the dashboard computes hit-rate as `SUM(cache_hits) / SUM(cache_lookups)`. */ - get indexSource(): string { - return this._indexSource ?? "none"; + get cacheStats(): CacheStats { + let hits = 0; + for (const f of this.fetches) if (f.cached) hits++; + return { hits, misses: this.fetches.length - hits, total: this.fetches.length }; } /** Total elapsed time since tracer creation. */ @@ -109,27 +129,36 @@ export class RequestTracer { if (s.detail) val += `[${s.detail}]`; return val; }); + for (const f of this.fetches) { + const tag = f.cached ? "hit" : "miss"; + parts.push(`fetch=${f.duration_ms}ms(${tag})[${f.url}]`); + } parts.push(`total=${this.elapsed_ms}ms`); return parts.join(", "); } /** Structured JSON for debug envelope inclusion. */ - toJSON(): { spans: TraceSpan[]; total_ms: number; index_source: string } { - return { spans: [...this.spans], total_ms: this.elapsed_ms, index_source: this.indexSource }; + toJSON(): { + spans: TraceSpan[]; + fetches: FetchRecord[]; + cacheStats: CacheStats; + total_ms: number; + } { + return { + spans: [...this.spans], + fetches: [...this.fetches], + cacheStats: this.cacheStats, + total_ms: this.elapsed_ms, + }; } - /** Number of recorded spans. */ + /** Number of recorded spans (non-fetch). */ get spanCount(): number { return this.spans.length; } -} -/** Shorten a cache/storage key for readable trace output. */ -export function shortKey(key: string): string { - const parts = key.split("/"); - if (parts.length <= 2) return key; - const mid = parts.slice(1, -1).map((p) => (p.length > 10 ? p.slice(0, 10) + "…" : p)).join("/"); - const last = parts[parts.length - 1]!; - const shortLast = last.length > 12 ? last.slice(0, 12) + "…" : last; - return `${parts[0]}/${mid}/${shortLast}`; + /** Number of recorded fetches. */ + get fetchCount(): number { + return this.fetches.length; + } } diff --git a/workers/src/zip-baseline-fetcher.ts b/workers/src/zip-baseline-fetcher.ts index e4b007e..6d1a2d6 100644 --- a/workers/src/zip-baseline-fetcher.ts +++ b/workers/src/zip-baseline-fetcher.ts @@ -21,7 +21,7 @@ */ import { unzipSync } from "fflate"; -import { RequestTracer, shortKey } from "./tracing"; +import { RequestTracer } from "./tracing"; // Index schema version — included in cache key so that code changes // to the indexing pipeline (filters, fields, scoring) invalidate stale indexes. @@ -513,12 +513,24 @@ export class KnowledgeBaseFetcher { const hit = await c.match(this.cacheRequest(key)); if (hit) { const text = await hit.text(); - this.tracer?.addSpan(`cache:${shortKey(key)}`, performance.now() - start, "cache"); + this.tracer?.recordFetch({ + url: `cf-cache://${key}`, + duration_ms: performance.now() - start, + cached: true, + }); return JSON.parse(text) as T; } - this.tracer?.addSpan(`cache:${shortKey(key)}`, performance.now() - start, "miss"); + this.tracer?.recordFetch({ + url: `cf-cache://${key}`, + duration_ms: performance.now() - start, + cached: false, + }); } catch { - this.tracer?.addSpan(`cache:${shortKey(key)}`, performance.now() - start, "miss", "error"); + this.tracer?.recordFetch({ + url: `cf-cache://${key}`, + duration_ms: performance.now() - start, + cached: false, + }); } return null; } @@ -670,7 +682,7 @@ export class KnowledgeBaseFetcher { // Check memory cache first (unless skipping cache) if (!skipCache && this.zipCache.has(cacheKey)) { - this.tracer?.addSpan("zip", 0, "memory"); + this.tracer?.recordFetch({ url: `memory://${cacheKey}`, duration_ms: 0, cached: true }); return this.zipCache.get(cacheKey)!; } @@ -680,11 +692,20 @@ export class KnowledgeBaseFetcher { const r2Object = await this.env.BASELINE.get(cacheKey); if (r2Object) { const data = new Uint8Array(await r2Object.arrayBuffer()); - this.tracer?.addSpan("zip", performance.now() - r2Start, "r2"); + this.tracer?.recordFetch({ + url: `r2://${cacheKey}`, + duration_ms: performance.now() - r2Start, + cached: true, + size: data.length, + }); this.zipCache.set(cacheKey, data); return data; } - this.tracer?.addSpan("zip-r2", performance.now() - r2Start, "miss"); + this.tracer?.recordFetch({ + url: `r2://${cacheKey}`, + duration_ms: performance.now() - r2Start, + cached: false, + }); } // Fetch from GitHub @@ -696,13 +717,23 @@ export class KnowledgeBaseFetcher { if (!response.ok) { console.error(`Failed to fetch ZIP: ${response.status} ${url}`); - this.tracer?.addSpan("zip", performance.now() - ghStart, "github", `${response.status}`); + this.tracer?.recordFetch({ + url, + duration_ms: performance.now() - ghStart, + cached: false, + status: response.status, + }); return null; } const data = new Uint8Array(await response.arrayBuffer()); - this.tracer?.addSpan("zip", performance.now() - ghStart, "github", - `${(data.length / 1024).toFixed(0)}KB`); + this.tracer?.recordFetch({ + url, + duration_ms: performance.now() - ghStart, + cached: false, + status: response.status, + size: data.length, + }); // Cache in R2 (no TTL — content-addressed by SHA at the index/file layer) if (this.env.BASELINE) { @@ -856,7 +887,7 @@ export class KnowledgeBaseFetcher { // Step 0: Module-level memory cache (0ms, 5-min TTL) const expectedKey = `v${INDEX_VERSION}/${getCacheKey(knowledgeBaseUrl || "default")}`; if (cachedIndex && cachedIndexKey === expectedKey && Date.now() - indexCachedAt < MODULE_CACHE_TTL_MS) { - this.tracer?.addSpan("index", 0, "memory"); + this.tracer?.recordFetch({ url: `memory://index/${expectedKey}`, duration_ms: 0, cached: true }); return cachedIndex; } @@ -869,8 +900,8 @@ export class KnowledgeBaseFetcher { const shaKey = `${baselineSha || "unknown"}_${canonSha || "none"}`; const cacheKey = `index/v${INDEX_VERSION}/${getCacheKey(knowledgeBaseUrl || "default")}_${shaKey}`; - // Step 2: Cache API (~1ms edge read) - const cacheStart = performance.now(); + // Step 2: Cache API (~1ms edge read) — cacheGet records the cf-cache:// fetch. + // We do NOT add a duplicate "index" span here; one I/O = one fetch record. const cacheHit = await this.cacheGet(cacheKey); if (cacheHit) { // Verify SHAs match (Cache API doesn't have KV's eventual consistency issue, @@ -878,7 +909,6 @@ export class KnowledgeBaseFetcher { const baselineShaMatch = !baselineSha || cacheHit.commit_sha === baselineSha; const canonShaMatch = !canonSha || cacheHit.canon_commit_sha === canonSha; if (baselineShaMatch && canonShaMatch) { - this.tracer?.addSpan("index", performance.now() - cacheStart, "cache"); // Populate module cache cachedIndex = cacheHit; cachedIndexKey = expectedKey; @@ -895,7 +925,12 @@ export class KnowledgeBaseFetcher { try { const text = await r2Object.text(); const r2Index = JSON.parse(text) as BaselineIndex; - this.tracer?.addSpan("index", performance.now() - r2Start, "r2"); + this.tracer?.recordFetch({ + url: `r2://${cacheKey}`, + duration_ms: performance.now() - r2Start, + cached: true, + size: text.length, + }); // Populate upstream caches cachedIndex = r2Index; @@ -905,10 +940,18 @@ export class KnowledgeBaseFetcher { return r2Index; } catch { - this.tracer?.addSpan("index", performance.now() - r2Start, "r2", "parse-error"); + this.tracer?.recordFetch({ + url: `r2://${cacheKey}`, + duration_ms: performance.now() - r2Start, + cached: false, + }); } } else { - this.tracer?.addSpan("index-r2", performance.now() - r2Start, "miss"); + this.tracer?.recordFetch({ + url: `r2://${cacheKey}`, + duration_ms: performance.now() - r2Start, + cached: false, + }); } } @@ -948,8 +991,11 @@ export class KnowledgeBaseFetcher { canon_commit_sha: canonSha || undefined, }; - this.tracer?.addSpan("index-build", performance.now() - buildStart, "build", - `${allEntries.length} entries`); + this.tracer?.recordFetch({ + url: `build://index/${expectedKey}`, + duration_ms: performance.now() - buildStart, + cached: false, + }); // Persist to R2 + Cache API (no KV) if (this.env.BASELINE) { @@ -1028,7 +1074,7 @@ export class KnowledgeBaseFetcher { // Tier 0: Module-level file cache (0ms) const moduleCached = fileCache.get(cacheKey); if (moduleCached && Date.now() - moduleCached.cachedAt < MODULE_CACHE_TTL_MS) { - this.tracer?.addSpan(`file:${path}`, 0, "memory"); + this.tracer?.recordFetch({ url: `memory://${path}`, duration_ms: 0, cached: true }); return moduleCached.content; } @@ -1038,7 +1084,12 @@ export class KnowledgeBaseFetcher { const r2Object = await this.env.BASELINE.get(cacheKey); if (r2Object) { const content = await r2Object.text(); - this.tracer?.addSpan(`file:${path}`, performance.now() - r2Start, "r2"); + this.tracer?.recordFetch({ + url: `r2://${path}`, + duration_ms: performance.now() - r2Start, + cached: true, + size: content.length, + }); // Populate module cache if (fileCache.size >= MAX_FILE_CACHE_ENTRIES) evictExpiredFileCache(); @@ -1047,7 +1098,11 @@ export class KnowledgeBaseFetcher { } return content; } - this.tracer?.addSpan(`file-r2:${path}`, performance.now() - r2Start, "miss"); + this.tracer?.recordFetch({ + url: `r2://${path}`, + duration_ms: performance.now() - r2Start, + cached: false, + }); } // Tier 2: Extract from ZIP @@ -1067,7 +1122,12 @@ export class KnowledgeBaseFetcher { if (repoPath === path) { const content = new TextDecoder().decode(fileData); - this.tracer?.addSpan(`file:${path}`, performance.now() - zipStart, "build", "zip-extract"); + this.tracer?.recordFetch({ + url: `build://${path}`, + duration_ms: performance.now() - zipStart, + cached: false, + size: content.length, + }); // Persist to R2 (no TTL — content-addressed) if (this.env.BASELINE) { diff --git a/workers/test/telemetry-integration.test.mjs b/workers/test/telemetry-integration.test.mjs index 18136c6..cf551f9 100644 --- a/workers/test/telemetry-integration.test.mjs +++ b/workers/test/telemetry-integration.test.mjs @@ -179,14 +179,14 @@ await test("oddkit_time tool call lands a complete telemetry record", async () = }); const shape = await measurePayloadShape(requestBody, responseBody); - recordTelemetry(mockRequest(), requestBody, env, 42, "memory", shape); + recordTelemetry(mockRequest(), requestBody, env, 42, { hits: 1, total: 1 }, shape); assert.equal(env.ODDKIT_TELEMETRY.writes.length, 1, "should write 1 data point"); const point = env.ODDKIT_TELEMETRY.writes[0]; - // Schema shape - assert.equal(point.blobs.length, 9, `blobs should be 9, got ${point.blobs.length}`); - assert.equal(point.doubles.length, 6, `doubles should be 6, got ${point.doubles.length}`); + // Schema shape — blob9 retired, doubles 7 and 8 added + assert.equal(point.blobs.length, 8, `blobs should be 8 (blob9 retired), got ${point.blobs.length}`); + assert.equal(point.doubles.length, 8, `doubles should be 8, got ${point.doubles.length}`); assert.equal(point.indexes.length, 1, "indexes should be 1"); // Blobs @@ -196,7 +196,6 @@ await test("oddkit_time tool call lands a complete telemetry record", async () = assert.equal(point.blobs[3], "integration-test", "blob4 = consumer_label"); assert.equal(point.blobs[4], "query-param", "blob5 = consumer_source"); assert.equal(point.blobs[7], "0.23.1-test", "blob8 = worker_version"); - assert.equal(point.blobs[8], "memory", "blob9 = cache_tier"); // Doubles assert.equal(point.doubles[0], 1, "double1 = count"); @@ -205,6 +204,8 @@ await test("oddkit_time tool call lands a complete telemetry record", async () = assert.equal(point.doubles[3], shape.bytes_out, "double4 = bytes_out"); assert.equal(point.doubles[4], shape.tokens_in, "double5 = tokens_in"); assert.equal(point.doubles[5], shape.tokens_out, "double6 = tokens_out"); + assert.equal(point.doubles[6], 1, "double7 = cache_hits"); + assert.equal(point.doubles[7], 1, "double8 = cache_lookups"); console.log(` bytes_in=${shape.bytes_in} bytes_out=${shape.bytes_out} ` + `tokens_in=${shape.tokens_in} tokens_out=${shape.tokens_out}`); @@ -231,7 +232,7 @@ await test("oddkit_search with realistic ~8KB response — measurements are sane }); const shape = await measurePayloadShape(requestBody, responseBody); - recordTelemetry(mockRequest("realistic-test"), requestBody, env, 215, "r2", shape); + recordTelemetry(mockRequest("realistic-test"), requestBody, env, 215, { hits: 0, total: 1 }, shape); const point = env.ODDKIT_TELEMETRY.writes[0]; assert.equal(point.blobs[2], "oddkit", "tool_name = oddkit (router)"); @@ -256,7 +257,7 @@ await test("SSE response (empty body) records bytes_out=0 and tokens_out=0", asy }); // Simulating the call site path where Content-Type was not application/json const shape = await measurePayloadShape(requestBody, ""); - recordTelemetry(mockRequest(), requestBody, env, 50, "memory", shape); + recordTelemetry(mockRequest(), requestBody, env, 50, { hits: 1, total: 1 }, shape); const point = env.ODDKIT_TELEMETRY.writes[0]; assert.equal(point.doubles[3], 0, "bytes_out should be 0 for empty response"); @@ -277,7 +278,7 @@ await test("batch JSON-RPC produces one data point per message", async () => { const responseBody = JSON.stringify(batch.map(m => ({ jsonrpc: "2.0", id: m.id, result: { ok: true } }))); const shape = await measurePayloadShape(requestBody, responseBody); - recordTelemetry(mockRequest(), requestBody, env, 30, "cache", shape); + recordTelemetry(mockRequest(), requestBody, env, 30, { hits: 1, total: 1 }, shape); assert.equal(env.ODDKIT_TELEMETRY.writes.length, 3, `should write 3 data points, got ${env.ODDKIT_TELEMETRY.writes.length}`); assert.equal(env.ODDKIT_TELEMETRY.writes[0].blobs[2], "oddkit_time"); @@ -304,10 +305,12 @@ const { // Build a test schema map (mirrors the production baseline) const TEST_BLOB_NAMES = [ "event_type", "method", "tool_name", "consumer_label", "consumer_source", - "knowledge_base_url", "document_uri", "worker_version", "cache_tier", + "knowledge_base_url", "document_uri", "worker_version", + // blob9 (cache_tier) retired in retire-indexsource-interpreter ]; const TEST_DOUBLE_NAMES = [ "count", "duration_ms", "bytes_in", "bytes_out", "tokens_in", "tokens_out", + "cache_hits", "cache_lookups", ]; const testMap = buildSchemaMapFromArrays(TEST_BLOB_NAMES, TEST_DOUBLE_NAMES); @@ -341,18 +344,18 @@ await test("detectRawSlotNames: rejects double5 with helpful message", async () }); await test("rewriteSqlToRaw: translates all blob semantic names", async () => { - const sql = "SELECT event_type, method, tool_name, consumer_label, consumer_source, knowledge_base_url, document_uri, worker_version, cache_tier FROM oddkit_telemetry"; + const sql = "SELECT event_type, method, tool_name, consumer_label, consumer_source, knowledge_base_url, document_uri, worker_version FROM oddkit_telemetry"; const rewritten = rewriteSqlToRaw(sql, testMap); assert.ok(rewritten.includes("blob1"), "event_type → blob1"); assert.ok(rewritten.includes("blob2"), "method → blob2"); assert.ok(rewritten.includes("blob3"), "tool_name → blob3"); assert.ok(rewritten.includes("blob6"), "knowledge_base_url → blob6"); - assert.ok(rewritten.includes("blob9"), "cache_tier → blob9"); + assert.ok(rewritten.includes("blob8"), "worker_version → blob8"); assert.ok(!rewritten.includes("event_type"), "event_type should be gone"); }); await test("rewriteSqlToRaw: translates all double semantic names", async () => { - const sql = "SELECT SUM(count) AS n, AVG(duration_ms), SUM(bytes_in), SUM(bytes_out), AVG(tokens_in), AVG(tokens_out) FROM oddkit_telemetry"; + const sql = "SELECT SUM(count) AS n, AVG(duration_ms), SUM(bytes_in), SUM(bytes_out), AVG(tokens_in), AVG(tokens_out), SUM(cache_hits), SUM(cache_lookups) FROM oddkit_telemetry"; const rewritten = rewriteSqlToRaw(sql, testMap); assert.ok(rewritten.includes("double1"), "count → double1"); assert.ok(rewritten.includes("double2"), "duration_ms → double2"); @@ -360,8 +363,12 @@ await test("rewriteSqlToRaw: translates all double semantic names", async () => assert.ok(rewritten.includes("double4"), "bytes_out → double4"); assert.ok(rewritten.includes("double5"), "tokens_in → double5"); assert.ok(rewritten.includes("double6"), "tokens_out → double6"); + assert.ok(rewritten.includes("double7"), "cache_hits → double7"); + assert.ok(rewritten.includes("double8"), "cache_lookups → double8"); assert.ok(!rewritten.includes("duration_ms"), "duration_ms should be gone"); assert.ok(!rewritten.includes("tokens_out"), "tokens_out should be gone"); + assert.ok(!rewritten.includes("cache_hits"), "cache_hits should be gone"); + assert.ok(!rewritten.includes("cache_lookups"), "cache_lookups should be gone"); }); await test("rewriteSqlToRaw: knowledge_base_url doesn't clobber shorter substrings", async () => { @@ -525,7 +532,7 @@ await test("malformed JSON-RPC is silently dropped (telemetry never throws)", as const shape = await measurePayloadShape(requestBody, "ok"); // Should not throw - recordTelemetry(mockRequest(), requestBody, env, 10, "none", shape); + recordTelemetry(mockRequest(), requestBody, env, 10, { hits: 0, total: 0 }, shape); assert.equal(env.ODDKIT_TELEMETRY.writes.length, 0, "should not write anything for malformed input"); }); @@ -536,65 +543,63 @@ await test("missing env.ODDKIT_TELEMETRY is a graceful no-op", async () => { const requestBody = JSON.stringify({ jsonrpc: "2.0", id: 1, method: "tools/list" }); const shape = await measurePayloadShape(requestBody, "{}"); // Should not throw - recordTelemetry(mockRequest(), requestBody, env, 5, "memory", shape); + recordTelemetry(mockRequest(), requestBody, env, 5, { hits: 1, total: 1 }, shape); }); -// ─── Test 7: Streaming-race regression — cacheTier must be read AFTER body ── +// ─── Test 7: Streaming-race regression — cacheStats must be read AFTER body ── -await test("cache_tier reads must happen after the streaming response body completes", async () => { +await test("cacheStats reads must happen after the streaming response body completes", async () => { // The MCP handler from agents/mcp returns a streaming Response. `await // handler(...)` resolves with the Response object before the tool handler - // closure has finished populating the tracer. Reading `tracer.indexSource` - // immediately after the await yields "none" for every tool because the - // "index" / "index-build" span has not been recorded yet. The fix in + // closure has finished populating the tracer. Reading `tracer.cacheStats` + // immediately after the await yields {hits:0,total:0} for every tool + // because no fetch records have been written yet. The fix in // workers/src/index.ts moves the read inside the waitUntil callback, // after the response body has been consumed (which forces the streaming // tool handler to complete). // - // This test simulates the timing pattern. A tracer is created, then a - // deferred task adds the "index" span asynchronously (mimicking the tool - // handler running while the response body streams). We assert: - // (a) the OLD pattern (read immediately) returns "none" — this is the - // observable bug the fix exists to prevent - // (b) the FIXED pattern (read after a microtask flush) returns the - // actual span source — this proves the fix recovers the value + // The interpretation layer (`indexSource`) was retired in + // refactor/retire-indexsource-interpreter, but the streaming-race + // regression survives unchanged — semantics are identical, only the + // accessor changed (indexSource → cacheStats). const tracer = new RequestTracer(); - // Schedule the "index" span for the next tick — this models a streaming - // tool handler that has not yet recorded its index access at the moment - // the outer handler's `await` resolves. + // Schedule a fetch record for the next tick — this models a streaming + // tool handler that has not yet recorded its storage access at the + // moment the outer handler's `await` resolves. const handlerDone = new Promise((resolve) => { setImmediate(() => { - tracer.addSpan("index", 12, "cache"); + tracer.recordFetch({ url: "cf-cache://index/v2.4/baseline_abc", duration_ms: 12, cached: true }); resolve(); }); }); - // (a) OLD pattern: read tracer.indexSource synchronously, before the - // deferred span has been added. This reproduces the production bug. - const oldPatternRead = tracer.indexSource; + // (a) OLD pattern: read tracer.cacheStats synchronously, before the + // deferred fetch has been added. This reproduces the production bug. + const oldPatternRead = tracer.cacheStats; assert.equal( - oldPatternRead, - "none", - "OLD pattern (read immediately after await) returns 'none' — this is the streaming-race bug", + oldPatternRead.total, + 0, + "OLD pattern (read immediately after await) sees zero fetches — the streaming-race bug", ); - // Wait for the deferred span to land (modeling `await responseClone.text()` + // Wait for the deferred fetch to land (modeling `await responseClone.text()` // forcing the streaming tool handler to finish). await handlerDone; - // (b) FIXED pattern: read tracer.indexSource AFTER the deferred work has - // completed. The tracer now reflects the actual cache tier. - const fixedPatternRead = tracer.indexSource; + // (b) FIXED pattern: read tracer.cacheStats AFTER the deferred work has + // completed. The tracer now reflects the actual fetch. + const fixedPatternRead = tracer.cacheStats; assert.equal( - fixedPatternRead, - "cache", - "FIXED pattern (read after body consumption) returns the actual span source", + fixedPatternRead.total, + 1, + "FIXED pattern (read after body consumption) sees the actual fetch", ); + assert.equal(fixedPatternRead.hits, 1, "the deferred fetch was a cache hit"); // Round-trip: feed the fixed value through recordTelemetry and verify it - // lands in blob9. + // lands in cache_hits / cache_lookups doubles. const env = mockEnv(); const requestBody = JSON.stringify({ jsonrpc: "2.0", id: 1, method: "tools/call", params: { name: "oddkit_search", arguments: { input: "test" } } }); const responseBody = JSON.stringify({ jsonrpc: "2.0", id: 1, result: { content: [{ type: "text", text: "ok" }] } }); @@ -602,111 +607,68 @@ await test("cache_tier reads must happen after the streaming response body compl recordTelemetry(mockRequest(), requestBody, env, 42, fixedPatternRead, shape); assert.equal(env.ODDKIT_TELEMETRY.writes.length, 1, "exactly one data point written"); assert.equal( - env.ODDKIT_TELEMETRY.writes[0].blobs[8], - "cache", - "blob9 (cache_tier) carries the post-body-consumption tracer value", + env.ODDKIT_TELEMETRY.writes[0].doubles[6], + 1, + "double7 (cache_hits) carries the post-body-consumption arithmetic", + ); + assert.equal( + env.ODDKIT_TELEMETRY.writes[0].doubles[7], + 1, + "double8 (cache_lookups) carries the post-body-consumption arithmetic", ); - // Sanity: if we had used the broken old-pattern read, blob9 would be "none" + // Sanity: if we had used the broken old-pattern read, both doubles would be 0 const env2 = mockEnv(); recordTelemetry(mockRequest(), requestBody, env2, 42, oldPatternRead, shape); assert.equal( - env2.ODDKIT_TELEMETRY.writes[0].blobs[8], - "none", - "blob9 with the OLD-pattern read would be 'none' — what production has been recording", + env2.ODDKIT_TELEMETRY.writes[0].doubles[6], + 0, + "double7 with the OLD-pattern read would be 0 — what production previously recorded", ); -}); - -// ─── Test 8: file:* spans count as primary tier (oddkit_get fast path) ────── - -await test("tracer recognizes file:* spans as primary tier when no index span fires", async () => { - // oddkit_get for klappy:// URIs takes the fast path: no getIndex call, - // straight to getFile. The fetcher emits `file:${path}` spans (memory/r2/ - // build). Before this fix, only "index" / "index-build" labels updated - // _indexSource, so klappy:// gets always recorded cache_tier="none" even - // after the streaming-race fix. This test pins the broader recognition. - - const tracer = new RequestTracer(); - tracer.addSpan("file:canon/foo.md", 12, "memory"); assert.equal( - tracer.indexSource, - "memory", - "file:* span with source 'memory' must populate indexSource (klappy:// fast path)", + env2.ODDKIT_TELEMETRY.writes[0].doubles[7], + 0, + "double8 with the OLD-pattern read would be 0", ); - - // r2 source on file fetch - const tracer2 = new RequestTracer(); - tracer2.addSpan("file:canon/bar.md", 40, "r2"); - assert.equal(tracer2.indexSource, "r2", "file:* with r2 source captured"); - - // build source on file fetch (cold ZIP extract) - const tracer3 = new RequestTracer(); - tracer3.addSpan("file:canon/baz.md", 1500, "build", "zip-extract"); - assert.equal(tracer3.indexSource, "build", "file:* with build source captured"); }); -await test("tracer keeps index-wins when index span fires before file spans (search pattern)", async () => { - // runSearch calls getIndex first (emits `index` span), then getFile for - // each hit (emits `file:*` spans). First-wins guard ensures the index - // tier — which represents the primary work — wins, not the per-file - // tiers from secondary fetches. +// ─── Test 8: tracer.recordFetch arithmetic — cacheStats reflects fetches ── - const tracer = new RequestTracer(); - tracer.addSpan("index", 33, "cache"); - tracer.addSpan("file:canon/result-1.md", 100, "r2"); - tracer.addSpan("file:canon/result-2.md", 250, "build", "zip-extract"); - - assert.equal( - tracer.indexSource, - "cache", - "index tier wins when it fires first (search/orient/catalog pattern)", - ); -}); - -await test("tracer file:* recognition still excludes file-r2:* miss spans", async () => { - // file-r2:${path} fires on R2 miss with source="miss". "miss" is not a - // tier and must not be recorded as one. The setter excludes any span - // whose source is the literal string "miss". +await test("tracer.recordFetch arithmetic: cacheStats {hits, misses, total} mirrors fetches[]", async () => { + // Replaces the four PR #139 file:* / index-wins / regression tests that + // pinned the retired interpreter behavior. The new contract is simple + // arithmetic over the per-fetch records — no winner selection, no + // first-vs-slowest debate, no special-case label recognition. const tracer = new RequestTracer(); - tracer.addSpan("file-r2:canon/foo.md", 100, "miss"); - assert.equal( - tracer.indexSource, - "none", - "file-r2:* with source 'miss' must not be captured as a tier", - ); - - // After the miss, the actual fetch fires with a real source — that one - // should be captured. - tracer.addSpan("file:canon/foo.md", 200, "build", "zip-extract"); - assert.equal( - tracer.indexSource, - "build", - "real file fetch after r2-miss is captured normally", + assert.deepEqual( + tracer.cacheStats, + { hits: 0, misses: 0, total: 0 }, + "fresh tracer has zero of everything", ); -}); - -await test("tracer existing index-only behavior still works (no regression)", async () => { - // Sanity: the original case (just index/index-build with no file:* spans) - // must continue to work exactly as before. - - const tracer1 = new RequestTracer(); - tracer1.addSpan("index", 0, "memory"); - assert.equal(tracer1.indexSource, "memory", "memory index tier captured"); - const tracer2 = new RequestTracer(); - tracer2.addSpan("index-build", 2000, "build"); - assert.equal(tracer2.indexSource, "build", "index-build with build source captured"); - - // Without a recognized data fetch, indexSource is "none" - const tracer3 = new RequestTracer(); - tracer3.addSpan("action:version", 5); - tracer3.addSpan("sha:klappy.dev", 0, "memory"); - assert.equal( - tracer3.indexSource, - "none", - "action and sha spans alone do not count as primary tier", - ); + // Two cache hits, one miss, one cold rebuild + tracer.recordFetch({ url: "memory://canon/foo.md", duration_ms: 0, cached: true }); + tracer.recordFetch({ url: "cf-cache://index/v2.4/k", duration_ms: 1, cached: true }); + tracer.recordFetch({ url: "r2://canon/bar.md", duration_ms: 40, cached: false }); + tracer.recordFetch({ url: "build://canon/bar.md", duration_ms: 1500, cached: false }); + + const stats = tracer.cacheStats; + assert.equal(stats.hits, 2, "two cached: true records → hits = 2"); + assert.equal(stats.misses, 2, "two cached: false records → misses = 2"); + assert.equal(stats.total, 4, "total = hits + misses"); + + // toJSON exposes the per-fetch records and the derived stats + const json = tracer.toJSON(); + assert.equal(json.fetches.length, 4, "all four records survive in toJSON.fetches"); + assert.deepEqual(json.cacheStats, stats, "toJSON.cacheStats matches the getter"); + assert.ok(!("index_source" in json), "retired index_source field is gone from toJSON"); + + // addSpan still records non-fetch events without affecting cacheStats + tracer.addSpan("sha:klappy.dev", 0, "memory"); + tracer.addSpan("action:search", 30); + assert.equal(tracer.cacheStats.total, 4, "addSpan does not increment cacheStats"); + assert.equal(tracer.spanCount, 2, "spans tracked separately from fetches"); }); console.log(`\n${pass} passed, ${fail} failed`);