From 945f810357b1b6e99d2d3fbae95659eb629958c1 Mon Sep 17 00:00:00 2001 From: "Claw (AINYC Agent)" Date: Wed, 17 Jun 2026 21:36:22 +0000 Subject: [PATCH] feat(health): surface mentionRate on the health snapshot cnry health was the last citation-only read surface; mention is already live on overview/report/analytics. Add mentionRate alongside citedRate so health/dashboard/overviews can lead with mention while keeping cited. The two signals stay independent: mention counts a (query x provider) pair only when answer_mentioned === true (tri-state; null = "not checked" and is never coerced to false). buildRunData now threads query_snapshots.answer_ mentioned through computeHealth, which emits overallMentionRate + mentioned Pairs + per-provider mentionRate beside the untouched cited fields. - intelligence: Snapshot.answerMentioned + HealthScore mention fields; compute - canonry: buildRunData SELECT + persist the new columns; CLI renders mention first - db: nullable overall_mention_rate / mentioned_pairs columns; idempotent migration v80 (guarded run, table+column existence checks) - contracts: additive HealthSnapshotDto fields (public SDK contract) - api-routes: map/aggregate/empty health rows; coalesce legacy NULL -> 0 Additive contract, idempotent migration, legacy rows read back as 0. Co-Authored-By: Claude Opus 4.8 (1M context) --- package.json | 2 +- packages/api-routes/src/composites.ts | 16 ++++- packages/api-routes/src/intelligence.ts | 45 ++++++++++-- .../api-routes/test/health-latest.test.ts | 63 +++++++++++++++-- packages/canonry/package.json | 2 +- packages/canonry/src/commands/health-cmd.ts | 26 ++++--- packages/canonry/src/intelligence-service.ts | 7 ++ packages/canonry/src/run-coordinator.ts | 2 + packages/canonry/test/health-jsonl.test.ts | 68 +++++++++++++++++-- .../canonry/test/intelligence-service.test.ts | 64 ++++++++++++++++- packages/contracts/src/intelligence.ts | 11 ++- packages/db/src/migrate.ts | 25 +++++++ packages/db/src/schema.ts | 10 ++- packages/db/test/index.test.ts | 66 ++++++++++++++++++ packages/intelligence/src/health.ts | 15 +++- packages/intelligence/src/types.ts | 21 +++++- packages/intelligence/test/health.test.ts | 64 ++++++++++++++++- 17 files changed, 471 insertions(+), 36 deletions(-) diff --git a/package.json b/package.json index 0c892766..6cc03a6d 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "canonry", "private": true, - "version": "4.83.0", + "version": "4.84.0", "type": "module", "packageManager": "pnpm@10.28.2", "scripts": { diff --git a/packages/api-routes/src/composites.ts b/packages/api-routes/src/composites.ts index 13e8a7f5..556584de 100644 --- a/packages/api-routes/src/composites.ts +++ b/packages/api-routes/src/composites.ts @@ -872,14 +872,28 @@ function mapInsightRow(r: typeof insights.$inferSelect): InsightDto { } function mapHealthRow(r: typeof healthSnapshots.$inferSelect): HealthSnapshotDto { + // Coalesce legacy provider entries (written before v80, no mention keys) to 0. + const providerBreakdown: HealthSnapshotDto['providerBreakdown'] = {} + for (const [provider, entry] of Object.entries(r.providerBreakdown)) { + providerBreakdown[provider] = { + citedRate: entry.citedRate, + mentionRate: entry.mentionRate ?? 0, + cited: entry.cited, + mentioned: entry.mentioned ?? 0, + total: entry.total, + } + } return { id: r.id, projectId: r.projectId, runId: r.runId ?? null, overallCitedRate: Number(r.overallCitedRate), + // Legacy rows (persisted before v80) have NULL mention columns → 0. + overallMentionRate: r.overallMentionRate == null ? 0 : Number(r.overallMentionRate), totalPairs: r.totalPairs, citedPairs: r.citedPairs, - providerBreakdown: r.providerBreakdown, + mentionedPairs: r.mentionedPairs ?? 0, + providerBreakdown, createdAt: r.createdAt, status: 'ready', } diff --git a/packages/api-routes/src/intelligence.ts b/packages/api-routes/src/intelligence.ts index 79c2d4bd..876a5481 100644 --- a/packages/api-routes/src/intelligence.ts +++ b/packages/api-routes/src/intelligence.ts @@ -10,8 +10,10 @@ function emptyHealthSnapshot(projectId: string): HealthSnapshotDto { projectId, runId: null, overallCitedRate: 0, + overallMentionRate: 0, totalPairs: 0, citedPairs: 0, + mentionedPairs: 0, providerBreakdown: {}, createdAt: '', status: 'no-data', @@ -36,15 +38,40 @@ function mapInsightRow(r: typeof insights.$inferSelect): InsightDto { } } +/** + * Coalesce a persisted providerBreakdown into the current DTO shape. Rows + * written before the mention columns existed have entries with no + * `mentionRate` / `mentioned` keys — fill them with 0 so the contract field + * is always present. Cited fields pass through untouched. + */ +function coalesceProviderBreakdown( + breakdown: Record, +): HealthSnapshotDto['providerBreakdown'] { + const out: HealthSnapshotDto['providerBreakdown'] = {} + for (const [provider, entry] of Object.entries(breakdown)) { + out[provider] = { + citedRate: entry.citedRate, + mentionRate: entry.mentionRate ?? 0, + cited: entry.cited, + mentioned: entry.mentioned ?? 0, + total: entry.total, + } + } + return out +} + function mapHealthRow(r: typeof healthSnapshots.$inferSelect): HealthSnapshotDto { return { id: r.id, projectId: r.projectId, runId: r.runId ?? null, overallCitedRate: Number(r.overallCitedRate), + // Legacy rows (persisted before v80) have NULL mention columns → 0. + overallMentionRate: r.overallMentionRate == null ? 0 : Number(r.overallMentionRate), totalPairs: r.totalPairs, citedPairs: r.citedPairs, - providerBreakdown: r.providerBreakdown, + mentionedPairs: r.mentionedPairs ?? 0, + providerBreakdown: coalesceProviderBreakdown(r.providerBreakdown), createdAt: r.createdAt, status: 'ready', } @@ -69,28 +96,36 @@ function aggregateHealthSnapshots( let totalPairs = 0 let citedPairs = 0 - const mergedProviders: Record = {} + let mentionedPairs = 0 + const mergedProviders: Record = {} let newestCreatedAt = '' const runIds: string[] = [] for (const row of rows) { totalPairs += row.totalPairs citedPairs += row.citedPairs + // Legacy rows (pre-v80) have NULL mention columns → contribute 0 to the + // numerator. Cited and mention are merged identically but independently. + mentionedPairs += row.mentionedPairs ?? 0 if (row.createdAt > newestCreatedAt) newestCreatedAt = row.createdAt if (row.runId) runIds.push(row.runId) const providerBreakdown = row.providerBreakdown for (const [provider, entry] of Object.entries(providerBreakdown)) { - const existing = mergedProviders[provider] ?? { total: 0, cited: 0, citedRate: 0 } + const existing = mergedProviders[provider] ?? { total: 0, cited: 0, mentioned: 0, citedRate: 0, mentionRate: 0 } existing.total += entry.total existing.cited += entry.cited + existing.mentioned += entry.mentioned ?? 0 mergedProviders[provider] = existing } } - // Compute per-provider rates after summing. + // Compute per-provider rates after summing. Cited and mention are computed + // separately — neither is derived from the other. for (const entry of Object.values(mergedProviders)) { entry.citedRate = entry.total > 0 ? entry.cited / entry.total : 0 + entry.mentionRate = entry.total > 0 ? entry.mentioned / entry.total : 0 } const overallCitedRate = totalPairs > 0 ? citedPairs / totalPairs : 0 + const overallMentionRate = totalPairs > 0 ? mentionedPairs / totalPairs : 0 return { // Synthetic id so consumers can tell this is an aggregate; concatenate @@ -99,8 +134,10 @@ function aggregateHealthSnapshots( projectId, runId: runIds[0] ?? null, overallCitedRate, + overallMentionRate, totalPairs, citedPairs, + mentionedPairs, providerBreakdown: mergedProviders, createdAt: newestCreatedAt, status: 'ready', diff --git a/packages/api-routes/test/health-latest.test.ts b/packages/api-routes/test/health-latest.test.ts index 4c5d82a1..e4d646ec 100644 --- a/packages/api-routes/test/health-latest.test.ts +++ b/packages/api-routes/test/health-latest.test.ts @@ -55,8 +55,10 @@ test('returns 200 with no-data sentinel when no health snapshot exists', async ( projectId, runId: null, overallCitedRate: 0, + overallMentionRate: 0, totalPairs: 0, citedPairs: 0, + mentionedPairs: 0, providerBreakdown: {}, createdAt: '', status: 'no-data', @@ -71,9 +73,11 @@ test('returns 200 with status:"ready" when a snapshot exists', async () => { projectId, runId: null, overallCitedRate: 0.42, + overallMentionRate: 0.3, totalPairs: 10, citedPairs: 4, - providerBreakdown: { gemini: { citedRate: 0.5, cited: 5, total: 10 } }, + mentionedPairs: 3, + providerBreakdown: { gemini: { citedRate: 0.5, mentionRate: 0.3, cited: 5, mentioned: 3, total: 10 } }, createdAt: '2026-04-27T00:00:00Z', }).run() await ctx.app.ready() @@ -86,7 +90,42 @@ test('returns 200 with status:"ready" when a snapshot exists', async () => { expect(body.overallCitedRate).toBe(0.42) expect(body.citedPairs).toBe(4) expect(body.totalPairs).toBe(10) - expect(body.providerBreakdown).toEqual({ gemini: { citedRate: 0.5, cited: 5, total: 10 } }) + // Mention is surfaced alongside cited, never in place of it. + expect(body.overallMentionRate).toBe(0.3) + expect(body.mentionedPairs).toBe(3) + expect(body.providerBreakdown).toEqual({ gemini: { citedRate: 0.5, mentionRate: 0.3, cited: 5, mentioned: 3, total: 10 } }) +}) + +test('coalesces a legacy row with NULL mention columns to 0 instead of crashing', async () => { + const projectId = insertProject(ctx.db, 'legacy-row') + // Simulate a row persisted before the v80 mention migration: the mention + // columns are NULL and the providerBreakdown JSON has no mention keys. + ctx.db.insert(healthSnapshots).values({ + id: 'legacy-1', + projectId, + runId: null, + overallCitedRate: 0.6, + overallMentionRate: null, + totalPairs: 10, + citedPairs: 6, + mentionedPairs: null, + // Cast: this is intentionally the OLD JSON shape with no mention keys. + providerBreakdown: { gemini: { citedRate: 0.6, cited: 6, total: 10 } } as never, + createdAt: '2026-04-20T00:00:00Z', + }).run() + await ctx.app.ready() + + const res = await ctx.app.inject({ method: 'GET', url: '/api/v1/projects/legacy-row/health/latest' }) + expect(res.statusCode).toBe(200) + const body = JSON.parse(res.body) as HealthSnapshotDto + expect(body.status).toBe('ready') + // Cited fields read through unchanged. + expect(body.overallCitedRate).toBe(0.6) + expect(body.citedPairs).toBe(6) + // Missing mention data reads back as 0 (NULL→0), not NaN/null/undefined. + expect(body.overallMentionRate).toBe(0) + expect(body.mentionedPairs).toBe(0) + expect(body.providerBreakdown.gemini).toEqual({ citedRate: 0.6, mentionRate: 0, cited: 6, mentioned: 0, total: 10 }) }) test('still returns 404 when the project itself does not exist', async () => { @@ -109,17 +148,19 @@ test('aggregates healthSnapshots across the latest fan-out group when a multi-lo { id: miRunId, projectId, kind: 'answer-visibility', status: 'completed', trigger: 'manual', location: 'michigan', createdAt, finishedAt: createdAt }, ]).run() - // florida: 6 of 10 pairs cited. michigan: 2 of 10 pairs cited. - // Project-level aggregate: 8 of 20 (40%). + // florida: 6 of 10 pairs cited, 4 mentioned. michigan: 2 of 10 cited, 1 mentioned. + // Project-level aggregate: cited 8/20 (40%), mentioned 5/20 (25%). ctx.db.insert(healthSnapshots).values([ { id: 'snap-fl', projectId, runId: flRunId, overallCitedRate: '0.6', + overallMentionRate: '0.4', totalPairs: 10, citedPairs: 6, - providerBreakdown: { gemini: { citedRate: 0.6, cited: 6, total: 10 } }, + mentionedPairs: 4, + providerBreakdown: { gemini: { citedRate: 0.6, mentionRate: 0.4, cited: 6, mentioned: 4, total: 10 } }, createdAt, }, { @@ -127,9 +168,11 @@ test('aggregates healthSnapshots across the latest fan-out group when a multi-lo projectId, runId: miRunId, overallCitedRate: '0.2', + overallMentionRate: '0.1', totalPairs: 10, citedPairs: 2, - providerBreakdown: { gemini: { citedRate: 0.2, cited: 2, total: 10 } }, + mentionedPairs: 1, + providerBreakdown: { gemini: { citedRate: 0.2, mentionRate: 0.1, cited: 2, mentioned: 1, total: 10 } }, createdAt, }, ]).run() @@ -144,10 +187,16 @@ test('aggregates healthSnapshots across the latest fan-out group when a multi-lo expect(body.citedPairs).toBe(8) expect(body.overallCitedRate).toBeCloseTo(0.4, 5) - // Per-provider breakdown also aggregated. + // Mention sums independently of cited: 4 + 1 = 5 over 20 = 25%. + expect(body.mentionedPairs).toBe(5) + expect(body.overallMentionRate).toBeCloseTo(0.25, 5) + + // Per-provider breakdown also aggregated — cited AND mention merged. expect(body.providerBreakdown.gemini?.total).toBe(20) expect(body.providerBreakdown.gemini?.cited).toBe(8) expect(body.providerBreakdown.gemini?.citedRate).toBeCloseTo(0.4, 5) + expect(body.providerBreakdown.gemini?.mentioned).toBe(5) + expect(body.providerBreakdown.gemini?.mentionRate).toBeCloseTo(0.25, 5) // Synthesized id signals this is a group aggregate. expect(body.id).toMatch(/^group:/) diff --git a/packages/canonry/package.json b/packages/canonry/package.json index b486dad3..0f3d9a4f 100644 --- a/packages/canonry/package.json +++ b/packages/canonry/package.json @@ -1,6 +1,6 @@ { "name": "@ainyc/canonry", - "version": "4.83.0", + "version": "4.84.0", "type": "module", "description": "Agent-first open-source AEO operating platform - track how answer engines cite your domain", "license": "FSL-1.1-ALv2", diff --git a/packages/canonry/src/commands/health-cmd.ts b/packages/canonry/src/commands/health-cmd.ts index 76ae20c9..4522af9d 100644 --- a/packages/canonry/src/commands/health-cmd.ts +++ b/packages/canonry/src/commands/health-cmd.ts @@ -26,13 +26,17 @@ export async function showHealth( return } - console.log('Date Cited Rate Cited/Total') - console.log('─'.repeat(55)) + // Mention leads, cited second — both signals are independent and shown + // side by side (never one in place of the other). + console.log('Date Mention Rate Mentioned/Total Cited Rate Cited/Total') + console.log('─'.repeat(86)) for (const snap of snapshots) { - const rate = (snap.overallCitedRate * 100).toFixed(1).padStart(5) + '%' - const ratio = `${snap.citedPairs}/${snap.totalPairs}` + const mRate = (snap.overallMentionRate * 100).toFixed(1).padStart(5) + '%' + const mRatio = `${snap.mentionedPairs}/${snap.totalPairs}`.padEnd(15) + const cRate = (snap.overallCitedRate * 100).toFixed(1).padStart(5) + '%' + const cRatio = `${snap.citedPairs}/${snap.totalPairs}` const date = snap.createdAt.slice(0, 19).padEnd(25) - console.log(`${date} ${rate} ${ratio}`) + console.log(`${date} ${mRate} ${mRatio} ${cRate} ${cRatio}`) } return } @@ -52,15 +56,19 @@ export async function showHealth( return } - const rate = (health.overallCitedRate * 100).toFixed(1) - console.log(`Health: ${rate}% cited (${health.citedPairs}/${health.totalPairs} pairs)`) + // Mention leads, cited second — two independent signals, both surfaced. + const mentionRate = (health.overallMentionRate * 100).toFixed(1) + const citedRate = (health.overallCitedRate * 100).toFixed(1) + console.log(`Health: ${mentionRate}% mentioned (${health.mentionedPairs}/${health.totalPairs} pairs)`) + console.log(` ${citedRate}% cited (${health.citedPairs}/${health.totalPairs} pairs)`) console.log('') if (health.providerBreakdown && Object.keys(health.providerBreakdown).length > 0) { console.log('Provider Breakdown:') for (const [provider, stats] of Object.entries(health.providerBreakdown)) { - const pRate = (stats.citedRate * 100).toFixed(1) - console.log(` ${provider.padEnd(15)} ${pRate}% (${stats.cited}/${stats.total})`) + const pMention = (stats.mentionRate * 100).toFixed(1) + const pCited = (stats.citedRate * 100).toFixed(1) + console.log(` ${provider.padEnd(15)} ${pMention}% mentioned (${stats.mentioned}/${stats.total}) ${pCited}% cited (${stats.cited}/${stats.total})`) } } } diff --git a/packages/canonry/src/intelligence-service.ts b/packages/canonry/src/intelligence-service.ts index 60085397..08956705 100644 --- a/packages/canonry/src/intelligence-service.ts +++ b/packages/canonry/src/intelligence-service.ts @@ -690,8 +690,10 @@ export class IntelligenceService { projectId, runId, overallCitedRate: String(result.health.overallCitedRate), + overallMentionRate: String(result.health.overallMentionRate), totalPairs: result.health.totalPairs, citedPairs: result.health.citedPairs, + mentionedPairs: result.health.mentionedPairs, providerBreakdown: result.health.providerBreakdown, createdAt: now, }).run() @@ -874,6 +876,7 @@ export class IntelligenceService { queryText: querySnapshots.queryText, provider: querySnapshots.provider, citationState: querySnapshots.citationState, + answerMentioned: querySnapshots.answerMentioned, citedDomains: querySnapshots.citedDomains, competitorOverlap: querySnapshots.competitorOverlap, snapshotLocation: querySnapshots.location, @@ -904,6 +907,10 @@ export class IntelligenceService { query: resolvedQuery, provider: r.provider, cited: r.citationState === CitationStates.cited, + // Independent answer-text signal. Tri-state passes through untouched + // (true / false / null = "not checked"); computeHealth counts only + // exact `true`. Never coerce null→false here. + answerMentioned: r.answerMentioned, // The project's OWN cited domain — never a co-cited competitor that // happens to sort first in the full citedDomains set. citationUrl: pickProjectCitedDomain(domains, projectDomains), diff --git a/packages/canonry/src/run-coordinator.ts b/packages/canonry/src/run-coordinator.ts index af7492e3..bbc9fa58 100644 --- a/packages/canonry/src/run-coordinator.ts +++ b/packages/canonry/src/run-coordinator.ts @@ -210,8 +210,10 @@ function analysisResultFromInsights(insights: Insight[]): AnalysisResult { competitorLosses: [], health: { overallCitedRate: 0, + overallMentionRate: 0, totalPairs: 0, citedPairs: 0, + mentionedPairs: 0, providerBreakdown: {}, }, insights, diff --git a/packages/canonry/test/health-jsonl.test.ts b/packages/canonry/test/health-jsonl.test.ts index f05c2576..77e4b575 100644 --- a/packages/canonry/test/health-jsonl.test.ts +++ b/packages/canonry/test/health-jsonl.test.ts @@ -30,9 +30,11 @@ const snapshots: HealthSnapshotDto[] = [ projectId: 'proj-1', runId: 'run-1', overallCitedRate: 0.42, + overallMentionRate: 0.25, totalPairs: 12, citedPairs: 5, - providerBreakdown: { openai: { citedRate: 0.5, cited: 3, total: 6 } }, + mentionedPairs: 3, + providerBreakdown: { openai: { citedRate: 0.5, mentionRate: 0.33, cited: 3, mentioned: 2, total: 6 } }, createdAt: '2026-04-28T00:00:00.000Z', status: 'ready', }, @@ -41,9 +43,11 @@ const snapshots: HealthSnapshotDto[] = [ projectId: 'proj-1', runId: 'run-2', overallCitedRate: 0.5, + overallMentionRate: 0.33, totalPairs: 12, citedPairs: 6, - providerBreakdown: { openai: { citedRate: 0.6, cited: 4, total: 6 } }, + mentionedPairs: 4, + providerBreakdown: { openai: { citedRate: 0.6, mentionRate: 0.5, cited: 4, mentioned: 3, total: 6 } }, createdAt: '2026-04-29T00:00:00.000Z', status: 'ready', }, @@ -54,9 +58,11 @@ const health: HealthSnapshotDto = { projectId: 'proj-1', runId: 'run-2', overallCitedRate: 0.5, + overallMentionRate: 0.33, totalPairs: 12, citedPairs: 6, - providerBreakdown: { openai: { citedRate: 0.6, cited: 4, total: 6 } }, + mentionedPairs: 4, + providerBreakdown: { openai: { citedRate: 0.6, mentionRate: 0.5, cited: 4, mentioned: 3, total: 6 } }, createdAt: '2026-04-29T00:00:00.000Z', status: 'ready', } @@ -82,8 +88,11 @@ describe('showHealth --format jsonl', () => { overallCitedRate: 0.42, citedPairs: 5, totalPairs: 12, + // Mention fields ride alongside cited in the machine output. + overallMentionRate: 0.25, + mentionedPairs: 3, }) - expect(records[1]).toMatchObject({ project: 'demo', id: 'snap-2', citedPairs: 6 }) + expect(records[1]).toMatchObject({ project: 'demo', id: 'snap-2', citedPairs: 6, mentionedPairs: 4 }) }) it('empty history emits nothing on jsonl', async () => { @@ -138,3 +147,54 @@ describe('showHealth --format jsonl', () => { }) }) }) + +describe('showHealth human render', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('leads with mention rate and shows cited rate alongside it', async () => { + mockGetHealth.mockResolvedValue(health) + const logs: string[] = [] + const origLog = console.log + console.log = (...args: unknown[]) => logs.push(args.join(' ')) + try { + await showHealth('demo', {}) + } finally { + console.log = origLog + } + const out = logs.join('\n') + // Both signals present, mention first. + expect(out).toContain('33.0% mentioned (4/12 pairs)') + expect(out).toContain('50.0% cited (6/12 pairs)') + const mentionIdx = out.indexOf('mentioned') + const citedIdx = out.indexOf('cited') + expect(mentionIdx).toBeGreaterThanOrEqual(0) + expect(mentionIdx).toBeLessThan(citedIdx) // mention leads + // Per-provider line carries both rates too. + expect(out).toContain('50.0% mentioned (3/6)') + expect(out).toContain('60.0% cited (4/6)') + }) + + it('--history table renders a Mention Rate column next to Cited Rate', async () => { + mockGetHealthHistory.mockResolvedValue(snapshots) + const logs: string[] = [] + const origLog = console.log + console.log = (...args: unknown[]) => logs.push(args.join(' ')) + try { + await showHealth('demo', { history: true }) + } finally { + console.log = origLog + } + const out = logs.join('\n') + expect(out).toContain('Mention Rate') + expect(out).toContain('Cited Rate') + // Header puts mention before cited. + const header = logs.find(l => l.includes('Mention Rate')) ?? '' + expect(header.indexOf('Mention Rate')).toBeLessThan(header.indexOf('Cited Rate')) + // Row carries the mention numerator/denominator (3/12) and cited (5/12). + expect(out).toContain('25.0%') + expect(out).toContain('3/12') + expect(out).toContain('5/12') + }) +}) diff --git a/packages/canonry/test/intelligence-service.test.ts b/packages/canonry/test/intelligence-service.test.ts index c7e7b263..1fda703d 100644 --- a/packages/canonry/test/intelligence-service.test.ts +++ b/packages/canonry/test/intelligence-service.test.ts @@ -62,7 +62,7 @@ function seedSnapshot( queryId: string, provider: string, citationState: string, - opts?: { citedDomains?: string[]; competitorOverlap?: string[] }, + opts?: { citedDomains?: string[]; competitorOverlap?: string[]; answerMentioned?: boolean | null }, ) { db.insert(querySnapshots).values({ id: crypto.randomUUID(), @@ -71,6 +71,9 @@ function seedSnapshot( provider, model: 'test-model', citationState, + // Tri-state mention signal. Default null ("not checked") when the caller + // doesn't specify, mirroring legacy snapshots written before the signal. + answerMentioned: opts?.answerMentioned ?? null, citedDomains: opts?.citedDomains ?? [], competitorOverlap: opts?.competitorOverlap ?? [], createdAt: new Date().toISOString(), @@ -106,6 +109,65 @@ describe('IntelligenceService', () => { } }) + it('threads answerMentioned from query_snapshots → computeHealth → persisted mention columns', () => { + const { db } = createTempDb('intel-mention-') + const projectId = seedProject(db) + const q1 = seedQuery(db, projectId, 'roof repair') + const q2 = seedQuery(db, projectId, 'metal roofing') + const q3 = seedQuery(db, projectId, 'roof coating') + const runId = seedRun(db, projectId, 'completed') + // 3 pairs. Mentioned set is DIFFERENT from cited set, proving the two + // signals are independent end-to-end: + // cited : q1, q2 → 2/3 + // mention : q1, q3 → 2/3 (but different queries) + seedSnapshot(db, runId, q1, 'gemini', 'cited', { citedDomains: ['example.com'], answerMentioned: true }) + seedSnapshot(db, runId, q2, 'gemini', 'cited', { citedDomains: ['example.com'], answerMentioned: false }) + seedSnapshot(db, runId, q3, 'gemini', 'not-cited', { answerMentioned: true }) + + const service = new IntelligenceService(db) + const result = service.analyzeAndPersist(runId, projectId) + + expect(result).not.toBeNull() + // In-memory health carries the mention math. + expect(result!.health.totalPairs).toBe(3) + expect(result!.health.citedPairs).toBe(2) + expect(result!.health.mentionedPairs).toBe(2) + expect(result!.health.overallMentionRate).toBeCloseTo(2 / 3, 5) + expect(result!.health.providerBreakdown.gemini.mentioned).toBe(2) + expect(result!.health.providerBreakdown.gemini.mentionRate).toBeCloseTo(2 / 3, 5) + + // Round-trip: the persisted health_snapshots row carries the new columns. + const saved = db.select().from(healthSnapshots).all() + expect(saved).toHaveLength(1) + expect(saved[0]!.mentionedPairs).toBe(2) + expect(Number(saved[0]!.overallMentionRate)).toBeCloseTo(2 / 3, 5) + const providerBreakdown = saved[0]!.providerBreakdown as Record + expect(providerBreakdown.gemini.mentioned).toBe(2) + expect(providerBreakdown.gemini.total).toBe(3) + }) + + it('never counts a null answerMentioned (legacy snapshot) as mentioned', () => { + const { db } = createTempDb('intel-mention-null-') + const projectId = seedProject(db) + const q1 = seedQuery(db, projectId, 'q1') + const q2 = seedQuery(db, projectId, 'q2') + const runId = seedRun(db, projectId, 'completed') + // One mentioned, one with null (default — never checked). Mention = 1/2, + // not 0/2 nor 2/2; null must not coerce to false in the numerator. + seedSnapshot(db, runId, q1, 'gemini', 'cited', { citedDomains: ['example.com'], answerMentioned: true }) + seedSnapshot(db, runId, q2, 'gemini', 'cited', { citedDomains: ['example.com'] }) // answerMentioned → null + + const service = new IntelligenceService(db) + const result = service.analyzeAndPersist(runId, projectId) + + expect(result!.health.totalPairs).toBe(2) + expect(result!.health.mentionedPairs).toBe(1) + expect(result!.health.overallMentionRate).toBe(0.5) + + const saved = db.select().from(healthSnapshots).all() + expect(saved[0]!.mentionedPairs).toBe(1) + }) + it('returns null when run has no snapshots', () => { const { db } = createTempDb('intel-empty-') const projectId = seedProject(db) diff --git a/packages/contracts/src/intelligence.ts b/packages/contracts/src/intelligence.ts index 2de4738c..f3633c87 100644 --- a/packages/contracts/src/intelligence.ts +++ b/packages/contracts/src/intelligence.ts @@ -43,9 +43,18 @@ export interface HealthSnapshotDto { projectId: string runId: string | null overallCitedRate: number + /** + * Share of (query × provider) pairs where the project was MENTIONED in the + * answer text. Independent of `overallCitedRate` — never derived from it. + * Legacy snapshots persisted before the mention columns existed read back + * as 0 (the API coalesces NULL→0). + */ + overallMentionRate: number totalPairs: number citedPairs: number - providerBreakdown: Record + /** Count of pairs mentioned in the answer text. Legacy rows read back as 0. */ + mentionedPairs: number + providerBreakdown: Record createdAt: string /** * `'ready'` when the snapshot reflects real data; `'no-data'` for the diff --git a/packages/db/src/migrate.ts b/packages/db/src/migrate.ts index 640deec9..74a8248f 100644 --- a/packages/db/src/migrate.ts +++ b/packages/db/src/migrate.ts @@ -1770,6 +1770,31 @@ export const MIGRATION_VERSIONS: ReadonlyArray = [ `ALTER TABLE discovery_probes ADD COLUMN answer_mentioned INTEGER`, ], }, + { + // Mention-rate columns on the persisted health snapshot, mirroring the + // existing cited columns (overall_cited_rate / cited_pairs) for the + // independent answer-text mention signal. Nullable: rows written before + // this version have no mention math, so they read back as NULL ("not + // measured") and readers coalesce NULL→0. + // + // Guarded `run` rather than bare `statements` (the v66 pattern): the + // table-existence check makes this a no-op when `health_snapshots` is + // absent — only possible on a legacy fixture whose recorded + // `_migrations` version skips v23's `CREATE TABLE` (the bootstrap is + // bypassed). The column-existence check keeps a replay idempotent. + version: 80, + name: 'health-snapshots-mention-rate', + statements: [], + run: (db) => { + if (!tableExists(db, 'health_snapshots')) return + if (!columnExists(db, 'health_snapshots', 'overall_mention_rate')) { + db.run(sql.raw(`ALTER TABLE health_snapshots ADD COLUMN overall_mention_rate TEXT`)) + } + if (!columnExists(db, 'health_snapshots', 'mentioned_pairs')) { + db.run(sql.raw(`ALTER TABLE health_snapshots ADD COLUMN mentioned_pairs INTEGER`)) + } + }, + }, ] /** diff --git a/packages/db/src/schema.ts b/packages/db/src/schema.ts index 62c828cf..76dfb2a4 100644 --- a/packages/db/src/schema.ts +++ b/packages/db/src/schema.ts @@ -533,9 +533,17 @@ export const healthSnapshots = sqliteTable('health_snapshots', { projectId: text('project_id').notNull().references(() => projects.id, { onDelete: 'cascade' }), runId: text('run_id').references(() => runs.id, { onDelete: 'cascade' }), overallCitedRate: text('overall_cited_rate').notNull(), + // Answer-text mention rate, independent of citation. Nullable because the + // column is added by migration v80 via ALTER TABLE ADD COLUMN — rows + // persisted before v80 read back as NULL ("not measured"); readers coalesce + // NULL→0. New writes always populate it (see intelligence-service persist). + overallMentionRate: text('overall_mention_rate'), totalPairs: integer('total_pairs').notNull(), citedPairs: integer('cited_pairs').notNull(), - providerBreakdown: text('provider_breakdown', { mode: 'json' }).$type>().notNull().default({}), + // Count of pairs MENTIONED in the answer text. Nullable for the same + // legacy-row reason as overall_mention_rate; coalesced NULL→0 on read. + mentionedPairs: integer('mentioned_pairs'), + providerBreakdown: text('provider_breakdown', { mode: 'json' }).$type>().notNull().default({}), createdAt: text('created_at').notNull(), }, (table) => [ index('idx_health_snapshots_project').on(table.projectId), diff --git a/packages/db/test/index.test.ts b/packages/db/test/index.test.ts index 415b0644..8e11b9cb 100644 --- a/packages/db/test/index.test.ts +++ b/packages/db/test/index.test.ts @@ -868,3 +868,69 @@ test('_migrations table is created on first migrate', () => { const tableInfo = sqlite.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='_migrations'").all() expect(tableInfo.length).toBe(1) }) + +test('health_snapshots round-trips the v80 mention columns (text rate + integer pairs + json breakdown)', () => { + const { db, tmpDir } = createTempDb() + onTestFinished(() => cleanup(tmpDir)) + const now = new Date().toISOString() + + db.insert(projects).values({ + id: 'proj_health', name: 'health-rt', displayName: 'Health RT', + canonicalDomain: 'example.com', country: 'US', language: 'en', + createdAt: now, updatedAt: now, + }).run() + + db.insert(healthSnapshots).values({ + id: 'hs_1', + projectId: 'proj_health', + runId: null, + overallCitedRate: '0.5', + overallMentionRate: '0.25', + totalPairs: 8, + citedPairs: 4, + mentionedPairs: 2, + providerBreakdown: { gemini: { citedRate: 0.5, mentionRate: 0.25, cited: 4, mentioned: 2, total: 8 } }, + createdAt: now, + }).run() + + const [row] = db.select().from(healthSnapshots).where(eq(healthSnapshots.id, 'hs_1')).all() + // Cited columns unchanged. + expect(row.overallCitedRate).toBe('0.5') + expect(row.citedPairs).toBe(4) + // Mention columns persisted and typed correctly (text rate, integer pairs). + expect(row.overallMentionRate).toBe('0.25') + expect(row.mentionedPairs).toBe(2) + // JSON breakdown survives the round-trip with both signals. + expect(row.providerBreakdown.gemini).toEqual({ citedRate: 0.5, mentionRate: 0.25, cited: 4, mentioned: 2, total: 8 }) +}) + +test('health_snapshots accepts NULL mention columns (legacy-shaped row) without coercion', () => { + const { db, tmpDir } = createTempDb() + onTestFinished(() => cleanup(tmpDir)) + const now = new Date().toISOString() + + db.insert(projects).values({ + id: 'proj_legacy', name: 'health-legacy', displayName: 'Health Legacy', + canonicalDomain: 'example.com', country: 'US', language: 'en', + createdAt: now, updatedAt: now, + }).run() + + // A row written as if before v80: mention columns omitted → stored NULL. + db.insert(healthSnapshots).values({ + id: 'hs_legacy', + projectId: 'proj_legacy', + runId: null, + overallCitedRate: '0.6', + totalPairs: 5, + citedPairs: 3, + providerBreakdown: {}, + createdAt: now, + }).run() + + const [row] = db.select().from(healthSnapshots).where(eq(healthSnapshots.id, 'hs_legacy')).all() + // Mention columns read back as NULL (the DB layer does not coerce — the API + // read layer is responsible for NULL→0). Cited columns are intact. + expect(row.overallMentionRate).toBeNull() + expect(row.mentionedPairs).toBeNull() + expect(row.citedPairs).toBe(3) +}) diff --git a/packages/intelligence/src/health.ts b/packages/intelligence/src/health.ts index f391513d..104a8626 100644 --- a/packages/intelligence/src/health.ts +++ b/packages/intelligence/src/health.ts @@ -1,18 +1,25 @@ import type { RunData, HealthScore, HealthTrend } from './types.js' export function computeHealth(run: RunData): HealthScore { - const providerStats = new Map() + const providerStats = new Map() let totalPairs = 0 let citedPairs = 0 + let mentionedPairs = 0 for (const snap of run.snapshots) { totalPairs++ if (snap.cited) citedPairs++ + // Mention is the independent answer-text signal. Tri-state: count a pair + // ONLY when it is exactly `true`. `false` and `null`/`undefined` ("not + // checked") both leave the numerator untouched — null is never coerced to + // false. Never derive this from `cited`. + if (snap.answerMentioned === true) mentionedPairs++ - const stats = providerStats.get(snap.provider) ?? { cited: 0, total: 0 } + const stats = providerStats.get(snap.provider) ?? { cited: 0, mentioned: 0, total: 0 } stats.total++ if (snap.cited) stats.cited++ + if (snap.answerMentioned === true) stats.mentioned++ providerStats.set(snap.provider, stats) } @@ -20,15 +27,19 @@ export function computeHealth(run: RunData): HealthScore { for (const [provider, stats] of providerStats) { providerBreakdown[provider] = { citedRate: stats.total > 0 ? stats.cited / stats.total : 0, + mentionRate: stats.total > 0 ? stats.mentioned / stats.total : 0, cited: stats.cited, + mentioned: stats.mentioned, total: stats.total, } } return { overallCitedRate: totalPairs > 0 ? citedPairs / totalPairs : 0, + overallMentionRate: totalPairs > 0 ? mentionedPairs / totalPairs : 0, totalPairs, citedPairs, + mentionedPairs, providerBreakdown, } } diff --git a/packages/intelligence/src/types.ts b/packages/intelligence/src/types.ts index 4cf91b90..602f5f06 100644 --- a/packages/intelligence/src/types.ts +++ b/packages/intelligence/src/types.ts @@ -2,6 +2,16 @@ export interface Snapshot { query: string provider: string cited: boolean + /** + * Did the project's brand/domain appear in the actual LLM answer TEXT + * (the prose the model returns)? Independent of `cited` (source-list + * presence) — a model can do either, both, or neither. TRI-STATE: + * `true` mentioned, `false` not mentioned, `null`/`undefined` "not + * checked" (legacy rows written before the signal existed, or providers + * that don't emit it). Health counts a pair as mentioned ONLY when this + * is exactly `true`; null is never coerced to false. + */ + answerMentioned?: boolean | null citationUrl?: string position?: number snippet?: string @@ -66,9 +76,18 @@ export interface Gain { export interface HealthScore { overallCitedRate: number + /** + * Share of (query × provider) pairs where the project was MENTIONED in the + * answer text. Independent of `overallCitedRate` — never derived from it. + * `mentionedPairs / totalPairs`, or 0 when there are no pairs. Pairs with a + * null `answerMentioned` ("not checked") count toward the denominator but + * never the numerator. + */ + overallMentionRate: number totalPairs: number citedPairs: number - providerBreakdown: Record + mentionedPairs: number + providerBreakdown: Record } export interface HealthTrend { diff --git a/packages/intelligence/test/health.test.ts b/packages/intelligence/test/health.test.ts index 30569f7a..79e0779c 100644 --- a/packages/intelligence/test/health.test.ts +++ b/packages/intelligence/test/health.test.ts @@ -26,8 +26,66 @@ describe('computeHealth', () => { expect(health.overallCitedRate).toBe(0.75) expect(health.totalPairs).toBe(4) expect(health.citedPairs).toBe(3) - expect(health.providerBreakdown.chatgpt).toEqual({ citedRate: 0.5, cited: 1, total: 2 }) - expect(health.providerBreakdown.gemini).toEqual({ citedRate: 1.0, cited: 2, total: 2 }) + expect(health.providerBreakdown.chatgpt).toEqual({ citedRate: 0.5, mentionRate: 0, cited: 1, mentioned: 0, total: 2 }) + expect(health.providerBreakdown.gemini).toEqual({ citedRate: 1.0, mentionRate: 0, cited: 2, mentioned: 0, total: 2 }) + }) + + it('computes mention rate independently of cited rate (numerator/denominator/rounded)', () => { + // 4 pairs. Cited and mention are deliberately DIFFERENT sets to prove they + // are not derived from one another: + // cited : k1/chatgpt, k1/gemini, k2/gemini → 3/4 = 0.75 + // mention : k1/chatgpt, k2/chatgpt → 2/4 = 0.50 + const run = makeRun({ + snapshots: [ + { query: 'k1', provider: 'chatgpt', cited: true, answerMentioned: true }, + { query: 'k1', provider: 'gemini', cited: true, answerMentioned: false }, + { query: 'k2', provider: 'chatgpt', cited: false, answerMentioned: true }, + { query: 'k2', provider: 'gemini', cited: true, answerMentioned: false }, + ], + }) + + const health = computeHealth(run) + + // Mention math: numerator 2, denominator 4, rate 0.5 — distinct from cited. + expect(health.mentionedPairs).toBe(2) + expect(health.totalPairs).toBe(4) + expect(health.overallMentionRate).toBe(0.5) + expect((health.overallMentionRate * 100).toFixed(1)).toBe('50.0') + + // Cited math is unchanged and independent. + expect(health.citedPairs).toBe(3) + expect(health.overallCitedRate).toBe(0.75) + + // Per-provider: chatgpt mentioned on both its pairs, cited on one. + expect(health.providerBreakdown.chatgpt).toEqual({ citedRate: 0.5, mentionRate: 1.0, cited: 1, mentioned: 2, total: 2 }) + // gemini cited on both, mentioned on neither. + expect(health.providerBreakdown.gemini).toEqual({ citedRate: 1.0, mentionRate: 0, cited: 2, mentioned: 0, total: 2 }) + }) + + it('never counts a null answerMentioned as mentioned (tri-state, null ≠ false)', () => { + // 4 pairs: 1 true, 1 false, 1 null, 1 undefined ("not checked"). + // Only the single `true` counts toward the mention numerator. + const run = makeRun({ + snapshots: [ + { query: 'k1', provider: 'chatgpt', cited: true, answerMentioned: true }, + { query: 'k2', provider: 'chatgpt', cited: true, answerMentioned: false }, + { query: 'k3', provider: 'chatgpt', cited: true, answerMentioned: null }, + { query: 'k4', provider: 'chatgpt', cited: true }, // answerMentioned undefined + ], + }) + + const health = computeHealth(run) + + // null and undefined contribute to the DENOMINATOR but never the numerator. + expect(health.totalPairs).toBe(4) + expect(health.mentionedPairs).toBe(1) + expect(health.overallMentionRate).toBe(0.25) + expect(health.providerBreakdown.chatgpt.mentioned).toBe(1) + expect(health.providerBreakdown.chatgpt.mentionRate).toBe(0.25) + + // Cited is unaffected — all four are cited. + expect(health.citedPairs).toBe(4) + expect(health.overallCitedRate).toBe(1.0) }) it('returns 0 rate for empty snapshots', () => { @@ -76,7 +134,7 @@ describe('computeHealth', () => { const health = computeHealth(run) expect(Object.keys(health.providerBreakdown)).toEqual(['chatgpt']) - expect(health.providerBreakdown.chatgpt).toEqual({ citedRate: 0.5, cited: 1, total: 2 }) + expect(health.providerBreakdown.chatgpt).toEqual({ citedRate: 0.5, mentionRate: 0, cited: 1, mentioned: 0, total: 2 }) }) it('handles many providers', () => {