diff --git a/README.md b/README.md index d415f39..4ce79d6 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,11 @@ tokenleak focus --provider codex --days 30 tokenleak nutrition tokenleak nutrition --days 30 --format json +# Optimization intelligence +tokenleak simulate-routing --days 30 +tokenleak waste --severity high +tokenleak behavior-diff --provider claude-code,codex --days 30 --format json + # Authenticate Cursor and sync its local cache tokenleak cursor login --name work @@ -151,7 +156,7 @@ tokenleak --list-providers ### Analysis commands -Tokenleak ships three dedicated investigation commands in addition to the main dashboard flow: +Tokenleak ships dedicated investigation and optimization commands in addition to the main dashboard flow: ```bash # Explain what drove a specific day @@ -181,12 +186,24 @@ tokenleak nutrition --days 30 # Emit the AI ROI report as JSON tokenleak nutrition --format json --output ai-roi.json + +# Estimate savings from model routing +tokenleak simulate-routing --days 30 + +# Detect agent waste signals with evidence and recipes +tokenleak waste --days 30 + +# Compare two agent/provider/model cohorts +tokenleak behavior-diff --provider claude-code,codex --days 30 ``` - `tokenleak explain ` builds a narrative day report with top providers, sessions, projects, models, and anomaly flags. - `tokenleak focus` ranks sessions by a deep-work score derived from duration, token density, and project streak. - `tokenleak replay [date]` shows a chronological timeline of all sessions for a day, clustering events into flow blocks with a pulse chart and flow/think ratio. Defaults to today. Pass `--interactive` (or `-i`) to open a browser scrub UI on `http://localhost:3567` — drag the timeline, press space to play the day at 60–600× speed, watch the cumulative cost odometer tick up. Combine with `--open` to launch the browser automatically. - `tokenleak nutrition` powers the TUI **AI ROI** view. It resolves local Git repo roots from provider project paths, runs read-only `git log --numstat`, and reports tokens/cost per commit and changed line. `No Git signal` means Tokenleak saw AI usage for a repo path but found no commits in the selected date window; switch to a wider window or ensure the project path exists locally as a Git worktree. +- `tokenleak simulate-routing` re-prices historical events under conservative downgrade rules so pro users can estimate savings before changing model habits or team guidance. +- `tokenleak waste` detects deterministic waste signals such as context drag, repeated prompt clusters, model churn, cache misses, and premium models used for small tasks. +- `tokenleak behavior-diff` compares cohorts such as provider-vs-provider or model-vs-model and emits deterministic takeaways for engineering teams. ### Cursor commands diff --git a/packages/cli/src/cli.test.ts b/packages/cli/src/cli.test.ts index aeb9298..d4e3805 100644 --- a/packages/cli/src/cli.test.ts +++ b/packages/cli/src/cli.test.ts @@ -797,7 +797,9 @@ describe('CLI invocation', () => { expect(stdout).toContain('--more'); expect(stdout).toContain('tokenleak explain '); expect(stdout).toContain('focus'); - expect(stdout).not.toContain('tokenleak waste'); + expect(stdout).toContain('tokenleak simulate-routing'); + expect(stdout).toContain('tokenleak waste'); + expect(stdout).toContain('tokenleak behavior-diff'); expect(stdout).toContain('interactive launcher'); expect(stdout).toContain('Examples:'); }); @@ -874,16 +876,74 @@ describe('CLI invocation', () => { expect(stderr).toContain('--days must be a positive number'); }); - test('waste is not exposed as a standalone command', async () => { + test('waste --help exits with code 0 and prints waste usage', async () => { const proc = Bun.spawn(['bun', cliPath, 'waste', '--help'], { stdout: 'pipe', stderr: 'pipe', }); const exitCode = await proc.exited; - const stderr = await new Response(proc.stderr).text(); + const stdout = await new Response(proc.stdout).text(); - expect(exitCode).toBe(1); - expect(stderr).toContain('Advisor view for Waste Patterns'); + expect(exitCode).toBe(0); + expect(stdout).toContain('tokenleak waste'); + expect(stdout).toContain('agent waste signals'); + }); + + test('waste emits a JSON agent waste report', async () => { + const { env, cleanup } = createProviderFixtureEnv(); + + try { + const waste = Bun.spawn(['bun', cliPath, 'waste', '--format', 'json', '--provider', 'pi'], { + stdout: 'pipe', + stderr: 'pipe', + env, + }); + const wasteExit = await waste.exited; + const wasteStdout = await new Response(waste.stdout).text(); + expect(wasteExit).toBe(0); + expect(JSON.parse(wasteStdout).summary).toBeDefined(); + } finally { + cleanup(); + } + }); + + test('simulate-routing emits a JSON routing report', async () => { + const { env, cleanup } = createProviderFixtureEnv(); + + try { + const simulate = Bun.spawn(['bun', cliPath, 'simulate-routing', '--format', 'json', '--provider', 'pi'], { + stdout: 'pipe', + stderr: 'pipe', + env, + }); + const simulateExit = await simulate.exited; + const simulateStdout = await new Response(simulate.stdout).text(); + expect(simulateExit).toBe(0); + expect(JSON.parse(simulateStdout).strategy).toBe('conservative'); + } finally { + cleanup(); + } + }); + + test('behavior-diff emits a JSON behavior diff report', async () => { + const { env, cleanup } = createProviderFixtureEnv(); + + try { + const diff = Bun.spawn( + ['bun', cliPath, 'behavior-diff', '--format', 'json', '--provider', 'pi,claude-code'], + { + stdout: 'pipe', + stderr: 'pipe', + env, + }, + ); + const diffExit = await diff.exited; + const diffStdout = await new Response(diff.stdout).text(); + expect(diffExit).toBe(0); + expect(JSON.parse(diffStdout).takeaways).toBeArray(); + } finally { + cleanup(); + } }); test('--version prints version', async () => { diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 3ca5d60..fe107c3 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -8,8 +8,11 @@ import { SCHEMA_VERSION, aggregate, analyzeEfficiency, + buildAgentBehaviorDiffReport, + buildAgentWasteReport, buildCommonsExport, buildCommonsPromptExport, + buildRoutingSimulationReport, buildNutritionReport, collectGitOutcomeSignals, buildExplainReport, @@ -22,12 +25,16 @@ import { getTodayLocal, } from '@tokenleak/core'; import type { + AgentBehaviorDiffReport, + AgentWasteReport, + BehaviorCohortSelector, DateRange, FocusReport, NutritionReport, ProviderWarning, RenderOptions, ReplayReport, + RoutingSimulationReport, TokenleakOutput, ProviderData, } from '@tokenleak/core'; @@ -72,7 +79,7 @@ import { import type { IRenderer } from '@tokenleak/renderers'; import { loadConfig } from './config.js'; -import { loadCompareTokenleakData, loadTokenleakData } from './data-loader.js'; +import { buildDefaultOptimization, loadCompareTokenleakData, loadTokenleakData } from './data-loader.js'; import { computeDateRange } from './date-range.js'; import { loadEnvOverrides } from './env.js'; import { @@ -233,6 +240,9 @@ function buildHelpText(): string { ' tokenleak nutrition [flags]', ' tokenleak replay [date] [flags]', ' tokenleak receipts [flags]', + ' tokenleak simulate-routing [flags]', + ' tokenleak waste [flags]', + ' tokenleak behavior-diff [flags]', ' tokenleak cursor ', '', 'Subcommands:', @@ -242,6 +252,9 @@ function buildHelpText(): string { ' nutrition Estimate token cost per local Git outcome signal', " replay [date] Replay a day's session timeline (defaults to today)", ' receipts Itemized receipt of spend by prompt behavior', + ' simulate-routing Estimate savings from model routing changes', + ' waste Detect agent waste signals with evidence', + ' behavior-diff Compare agent/model/project behavior cohorts', ' cursor Manage Cursor auth and cache sync', '', 'Provider Shortcuts:', @@ -299,6 +312,9 @@ function buildHelpText(): string { ' tokenleak replay', ' tokenleak replay 2026-03-10 --format json', ' tokenleak replay 2026-03-10 --interactive', + ' tokenleak simulate-routing --days 30', + ' tokenleak waste --severity high', + ' tokenleak behavior-diff --provider claude-code,codex --days 30', '', 'Version:', ` CLI ${VERSION}`, @@ -412,6 +428,72 @@ function buildNutritionHelpText(): string { ].join('\n'); } +function buildSimulateRoutingHelpText(): string { + return [ + `tokenleak simulate-routing ${VERSION}`, + 'Simulate cost impact from routing historical events to cheaper models.', + '', + 'Usage:', + ' tokenleak simulate-routing [flags]', + '', + 'Flags:', + ' -f, --format Output format: terminal, json', + ' -s, --since Start date in YYYY-MM-DD format', + ' -u, --until End date in YYYY-MM-DD format', + ` -d, --days Number of trailing days to include (default: ${DEFAULT_DAYS})`, + ' -o, --output Write output to a file', + ' -p, --provider Provider filter list, comma-separated', + ' --strategy conservative, aggressive, or manual', + ' --no-color Disable ANSI colors', + '', + ].join('\n'); +} + +function buildWasteHelpText(): string { + return [ + `tokenleak waste ${VERSION}`, + 'Detect deterministic agent waste signals with evidence and recipes.', + '', + 'Usage:', + ' tokenleak waste [flags]', + '', + 'Flags:', + ' -f, --format Output format: terminal, json', + ' -s, --since Start date in YYYY-MM-DD format', + ' -u, --until End date in YYYY-MM-DD format', + ` -d, --days Number of trailing days to include (default: ${DEFAULT_DAYS})`, + ' -o, --output Write output to a file', + ' -p, --provider Provider filter list, comma-separated', + ' --severity all, high, medium, or low', + ' --no-color Disable ANSI colors', + '', + ].join('\n'); +} + +function buildBehaviorDiffHelpText(): string { + return [ + `tokenleak behavior-diff ${VERSION}`, + 'Compare two agent/model/project/provider cohorts.', + '', + 'Usage:', + ' tokenleak behavior-diff --provider claude-code,codex [flags]', + ' tokenleak behavior-diff --model claude-opus-4,claude-sonnet-4 [flags]', + '', + 'Flags:', + ' -f, --format Output format: terminal, json', + ' -s, --since Start date in YYYY-MM-DD format', + ' -u, --until End date in YYYY-MM-DD format', + ` -d, --days Number of trailing days to include (default: ${DEFAULT_DAYS})`, + ' -o, --output Write output to a file', + ' --provider Compare two providers', + ' --model Compare two models', + ' --project Compare two project ids', + ' --repo Compare two repo roots', + ' --no-color Disable ANSI colors', + '', + ].join('\n'); +} + function buildVersionText(): string { return `tokenleak ${VERSION}\nschema ${SCHEMA_VERSION}\n`; } @@ -1438,6 +1520,136 @@ function renderNutritionReport(report: NutritionReport, width: number, noColor: return lines.join('\n'); } +function renderRoutingSimulationReport(report: RoutingSimulationReport, width: number, noColor: boolean): string { + const termWidth = Math.max(80, width || 80); + const lines = [ + bold('Tokenleak Routing Simulator', noColor), + report.method, + '', + `Range: ${report.dateRange.since} to ${report.dateRange.until} Strategy: ${report.strategy}`, + `Current: $${report.currentCost.toFixed(4)} Simulated: $${report.simulatedCost.toFixed(4)} Savings: $${report.estimatedSavings.toFixed(4)} (${(report.estimatedSavingsPercent * 100).toFixed(1)}%)`, + `Affected: ${report.affectedEvents.toLocaleString('en-US')} events / ${report.affectedTokens.toLocaleString('en-US')} tokens`, + '', + ]; + + const positive = report.candidates.filter((candidate) => (candidate.savings ?? 0) > 0).slice(0, 12); + if (positive.length === 0) { + lines.push('No positive routing candidates found.'); + } else { + const headers = ['Rule', 'From', 'To', 'Tokens', 'Savings', 'Conf']; + const widths = [22, 18, 18, 12, 10, 8]; + const totalWidth = widths.reduce((sum, value) => sum + value, 0) + widths.length + 1; + widths[0] = Math.max(12, widths[0]! - Math.max(0, totalWidth - termWidth)); + const row = (cells: string[]) => + `|${cells.map((cell, index) => ` ${truncateCell(cell, widths[index]! - 2).padEnd(widths[index]! - 2)} `).join('|')}|`; + lines.push(row(headers)); + lines.push(`|${widths.map((colWidth) => '-'.repeat(colWidth)).join('|')}|`); + for (const candidate of positive) { + lines.push(row([ + candidate.ruleId, + candidate.fromModel, + candidate.toModel, + candidate.tokens.toLocaleString('en-US'), + `$${(candidate.savings ?? 0).toFixed(4)}`, + candidate.confidence, + ])); + } + } + + if (report.warnings.length > 0) { + lines.push('', dim(`Warnings: ${report.warnings.slice(0, 3).join(' | ')}`, noColor)); + } + return lines.join('\n'); +} + +function renderAgentWasteReport( + report: AgentWasteReport, + width: number, + noColor: boolean, + severity: string = 'all', +): string { + const termWidth = Math.max(80, width || 80); + const visible = report.signals.filter((signal) => severity === 'all' || signal.severity === severity); + const lines = [ + bold('Tokenleak Agent Waste', noColor), + report.method, + '', + `Range: ${report.dateRange.since} to ${report.dateRange.until}`, + `Signals: ${report.summary.totalSignals} High: ${report.summary.highSeverity} Estimated savings: ${formatNullableCost(report.summary.estimatedSavings)}`, + `Analyzed: ${report.summary.analyzedEvents.toLocaleString('en-US')} events / ${report.summary.analyzedSessions.toLocaleString('en-US')} sessions`, + '', + ]; + + if (visible.length === 0) { + lines.push('No waste signals matched the selected filter.'); + } else { + const headers = ['Severity', 'Kind', 'Title', 'Savings', 'Confidence']; + const widths = [10, 22, 34, 10, 12]; + const totalWidth = widths.reduce((sum, value) => sum + value, 0) + widths.length + 1; + widths[2] = Math.max(14, widths[2]! - Math.max(0, totalWidth - termWidth)); + const row = (cells: string[]) => + `|${cells.map((cell, index) => ` ${truncateCell(cell, widths[index]! - 2).padEnd(widths[index]! - 2)} `).join('|')}|`; + lines.push(row(headers)); + lines.push(`|${widths.map((colWidth) => '-'.repeat(colWidth)).join('|')}|`); + for (const signal of visible.slice(0, 12)) { + lines.push(row([ + signal.severity, + signal.kind, + signal.title, + formatNullableCost(signal.estimatedSavings), + signal.confidence, + ])); + lines.push(dim(` ${truncateCell(signal.evidence.reason, termWidth - 4)}`, noColor)); + } + } + + if (report.warnings.length > 0) { + lines.push('', dim(`Warnings: ${report.warnings.join(' | ')}`, noColor)); + } + return lines.join('\n'); +} + +function renderAgentBehaviorDiffReport(report: AgentBehaviorDiffReport, width: number, noColor: boolean): string { + const termWidth = Math.max(80, width || 80); + const lines = [ + bold('Tokenleak Agent Behavior Diff', noColor), + report.method, + '', + `Range: ${report.dateRange.since} to ${report.dateRange.until}`, + `${report.baseline.selector.label} vs ${report.comparison.selector.label}`, + '', + ]; + const rows = [ + ['Events', report.baseline.metrics.events, report.comparison.metrics.events, report.deltas.events], + ['Sessions', report.baseline.metrics.sessions, report.comparison.metrics.sessions, report.deltas.sessions], + ['Tokens', report.baseline.metrics.tokens, report.comparison.metrics.tokens, report.deltas.tokens], + ['Cost', report.baseline.metrics.cost, report.comparison.metrics.cost, report.deltas.cost], + ['Input/Output', report.baseline.metrics.inputPerOutput, report.comparison.metrics.inputPerOutput, report.deltas.inputPerOutput], + ['Output/$', report.baseline.metrics.outputPerDollar, report.comparison.metrics.outputPerDollar, report.deltas.outputPerDollar], + ['Cache hit', report.baseline.metrics.cacheHitRate, report.comparison.metrics.cacheHitRate, report.deltas.cacheHitRate], + ['Waste signals', report.baseline.metrics.wasteSignals, report.comparison.metrics.wasteSignals, report.deltas.wasteSignals], + ] as const; + const widths = [18, 16, 16, 16]; + const row = (cells: string[]) => + `|${cells.map((cell, index) => ` ${truncateCell(cell, widths[index]! - 2).padEnd(widths[index]! - 2)} `).join('|')}|`; + lines.push(row(['Metric', 'Baseline', 'Compare', 'Delta'])); + lines.push(`|${widths.map((colWidth) => '-'.repeat(colWidth)).join('|')}|`); + for (const [label, base, comp, delta] of rows) { + const format = label === 'Cost' + ? (value: number | null) => formatNullableCost(value) + : (value: number | null) => formatNullableNumber(value, label.includes('/') || label === 'Cache hit' ? 2 : 0); + lines.push(row([label, format(base), format(comp), format(delta)])); + } + lines.push('', bold('Takeaways', noColor)); + for (const takeaway of report.takeaways) { + lines.push(`- ${truncateCell(takeaway, termWidth - 2)}`); + } + if (report.warnings.length > 0) { + lines.push('', dim(`Warnings: ${report.warnings.join(' | ')}`, noColor)); + } + return lines.join('\n'); +} + async function runNutrition(cliArgs: Record): Promise { const config = resolveConfig(cliArgs); const format = resolveTerminalJsonFormat('nutrition', cliArgs); @@ -1487,6 +1699,227 @@ async function runNutrition(cliArgs: Record): Promise { } } +async function loadOptimizationInput(config: ReturnType): Promise<{ + dateRange: DateRange; + providers: ProviderData[]; +}> { + const dateRange = computeDateRange({ + since: config.since, + until: config.until, + days: config.days, + }); + const { providerDataList } = await loadProviderDataForRange(config, dateRange); + emitProviderWarnings(providerDataList, 'Warning'); + return { dateRange, providers: providerDataList }; +} + +async function writeReportOutput(rendered: string, output: string | null): Promise { + if (output) { + writeFileSync(output, rendered); + } else { + process.stdout.write(`${rendered}\n`); + } +} + +async function runSimulateRouting(cliArgs: Record): Promise { + const config = resolveConfig(cliArgs); + const format = resolveTerminalJsonFormat('simulate-routing', cliArgs); + const strategy = typeof cliArgs['strategy'] === 'string' ? cliArgs['strategy'] : 'conservative'; + const { dateRange, providers } = await loadOptimizationInput(config); + const events = providers.flatMap((provider) => provider.events ?? []); + const report = buildRoutingSimulationReport(events, dateRange, MODEL_PRICING, { strategy }); + const rendered = format === 'json' + ? JSON.stringify(report, null, 2) + : renderRoutingSimulationReport(report, config.width, config.noColor); + await writeReportOutput(rendered, config.output); +} + +async function runWaste(cliArgs: Record): Promise { + const config = resolveConfig(cliArgs); + const format = resolveTerminalJsonFormat('waste', cliArgs); + const severity = typeof cliArgs['severity'] === 'string' ? cliArgs['severity'] : 'all'; + if (!['all', 'high', 'medium', 'low'].includes(severity)) { + throw new TokenleakError('tokenleak waste --severity must be all, high, medium, or low'); + } + const { dateRange, providers } = await loadOptimizationInput(config); + const events = providers.flatMap((provider) => provider.events ?? []); + const report = buildAgentWasteReport(providers, events, dateRange); + const rendered = format === 'json' + ? JSON.stringify( + severity === 'all' + ? report + : { ...report, signals: report.signals.filter((signal) => signal.severity === severity) }, + null, + 2, + ) + : renderAgentWasteReport(report, config.width, config.noColor, severity); + await writeReportOutput(rendered, config.output); +} + +function splitPair(value: unknown, flag: string): [string, string] | null { + if (typeof value !== 'string') return null; + const parts = value.split(',').map((part) => part.trim()).filter(Boolean); + if (parts.length !== 2) { + throw new TokenleakError(`tokenleak behavior-diff ${flag} expects exactly two comma-separated values`); + } + return [parts[0]!, parts[1]!]; +} + +function resolveBehaviorSelectors(cliArgs: Record): [BehaviorCohortSelector, BehaviorCohortSelector] { + const providerPair = splitPair(cliArgs['provider'], '--provider'); + if (providerPair) { + return [ + { label: providerPair[0], dimension: 'provider', provider: normalizeProviderToken(providerPair[0]) }, + { label: providerPair[1], dimension: 'provider', provider: normalizeProviderToken(providerPair[1]) }, + ]; + } + const modelPair = splitPair(cliArgs['model'], '--model'); + if (modelPair) { + return [ + { label: modelPair[0], dimension: 'model', model: modelPair[0] }, + { label: modelPair[1], dimension: 'model', model: modelPair[1] }, + ]; + } + const projectPair = splitPair(cliArgs['project'], '--project'); + if (projectPair) { + return [ + { label: projectPair[0], dimension: 'project', projectId: projectPair[0] }, + { label: projectPair[1], dimension: 'project', projectId: projectPair[1] }, + ]; + } + const repoPair = splitPair(cliArgs['repo'], '--repo'); + if (repoPair) { + return [ + { label: repoPair[0], dimension: 'repo', repoRoot: repoPair[0] }, + { label: repoPair[1], dimension: 'repo', repoRoot: repoPair[1] }, + ]; + } + + throw new TokenleakError( + 'tokenleak behavior-diff needs one selector pair: --provider a,b, --model a,b, --project a,b, or --repo a,b', + ); +} + +function parseOptimizationArgs(argv: string[]): Record { + const cliArgs: Record = {}; + for (let i = 0; i < argv.length; i++) { + const arg = argv[i]!; + const next = () => { + const value = argv[++i]; + if (!value) throw new TokenleakError(`Missing value for ${arg}`); + return value; + }; + + switch (arg) { + case '--help': + case '-h': + cliArgs['help'] = true; + break; + case '--version': + case '-v': + cliArgs['version'] = true; + break; + case '--format': + case '-f': + cliArgs['format'] = next(); + break; + case '--since': + case '-s': + cliArgs['since'] = next(); + break; + case '--until': + case '-u': + cliArgs['until'] = next(); + break; + case '--days': + case '-d': + cliArgs['days'] = Number(next()); + break; + case '--output': + case '-o': + cliArgs['output'] = next(); + break; + case '--width': + case '-w': + cliArgs['width'] = Number(next()); + break; + case '--provider': + case '-p': + cliArgs['provider'] = next(); + break; + case '--model': + cliArgs['model'] = next(); + break; + case '--project': + cliArgs['project'] = next(); + break; + case '--repo': + cliArgs['repo'] = next(); + break; + case '--strategy': + cliArgs['strategy'] = next(); + break; + case '--severity': + cliArgs['severity'] = next(); + break; + case '--claude': + cliArgs['claude'] = true; + break; + case '--codex': + cliArgs['codex'] = true; + break; + case '--cursor': + cliArgs['cursor'] = true; + break; + case '--pi': + cliArgs['pi'] = true; + break; + case '--openCode': + case '--open-code': + cliArgs['openCode'] = true; + break; + case '--allProviders': + case '--all-providers': + cliArgs['allProviders'] = true; + break; + case '--listProviders': + case '--list-providers': + cliArgs['listProviders'] = true; + break; + case '--noColor': + case '--no-color': + cliArgs['noColor'] = true; + break; + default: + throw new TokenleakError(`Unknown optimization flag "${arg}"`); + } + } + return cliArgs; +} + +async function runBehaviorDiff(cliArgs: Record): Promise { + const config = resolveConfig({ + ...cliArgs, + provider: undefined, + }); + const format = resolveTerminalJsonFormat('behavior-diff', cliArgs); + const selectors = resolveBehaviorSelectors(cliArgs); + const dateRange = computeDateRange({ + since: config.since, + until: config.until, + days: config.days, + }); + const available = await selectAvailableProviders({ ...config, provider: undefined }); + const { providerDataList } = await loadProviderDataForRange({ ...config, provider: undefined }, dateRange, available); + emitProviderWarnings(providerDataList, 'Warning'); + const events = providerDataList.flatMap((provider) => provider.events ?? []); + const report = buildAgentBehaviorDiffReport(events, dateRange, selectors[0], selectors[1]); + const rendered = format === 'json' + ? JSON.stringify(report, null, 2) + : renderAgentBehaviorDiffReport(report, config.width, config.noColor); + await writeReportOutput(rendered, config.output); +} + export async function runFocus(cliArgs: Record): Promise { const config = resolveFocusConfig(cliArgs); @@ -1762,6 +2195,8 @@ export async function run(cliArgs: Record): Promise { output: config.output, more: config.more, }; + const events = providerDataList.flatMap((provider) => provider.events ?? []); + output.optimization = buildDefaultOptimization(providerDataList, events, dateRange); const { port } = await startLiveServer(output, renderOptions); // Keep process alive until interrupted await new Promise((resolve) => { @@ -3365,11 +3800,58 @@ if (isDirectExecution) { process.exit(0); } if (argv[0] === 'waste') { - handleError( - new TokenleakError( - 'tokenleak waste is not a standalone command. Open the TUI and use the Advisor view for Waste Patterns.', - ), - ); + try { + const cliArgs = parseOptimizationArgs(argv.slice(1)); + if (cliArgs['help']) { + process.stdout.write(buildWasteHelpText()); + process.exit(0); + } + if (cliArgs['version']) { + process.stdout.write(buildVersionText()); + process.exit(0); + } + await initPricing(); + await runWaste(cliArgs); + process.exit(0); + } catch (error: unknown) { + handleError(error); + } + } + if (argv[0] === 'simulate-routing') { + try { + const cliArgs = parseOptimizationArgs(argv.slice(1)); + if (cliArgs['help']) { + process.stdout.write(buildSimulateRoutingHelpText()); + process.exit(0); + } + if (cliArgs['version']) { + process.stdout.write(buildVersionText()); + process.exit(0); + } + await initPricing(); + await runSimulateRouting(cliArgs); + process.exit(0); + } catch (error: unknown) { + handleError(error); + } + } + if (argv[0] === 'behavior-diff') { + try { + const cliArgs = parseOptimizationArgs(argv.slice(1)); + if (cliArgs['help']) { + process.stdout.write(buildBehaviorDiffHelpText()); + process.exit(0); + } + if (cliArgs['version']) { + process.stdout.write(buildVersionText()); + process.exit(0); + } + await initPricing(); + await runBehaviorDiff(cliArgs); + process.exit(0); + } catch (error: unknown) { + handleError(error); + } } if (argv[0] === 'nutrition') { const nutritionArgv = argv.slice(1); diff --git a/packages/cli/src/data-loader.ts b/packages/cli/src/data-loader.ts index bf194ec..51a269f 100644 --- a/packages/cli/src/data-loader.ts +++ b/packages/cli/src/data-loader.ts @@ -1,22 +1,32 @@ import { SCHEMA_VERSION, aggregate, + buildAgentBehaviorDiffReport, + buildAgentWasteReport, mergeProviderData, buildCompareOutput, buildMoreStats, + buildRoutingSimulationReport, computePreviousPeriod, parseCompareRange, mergeCostCompleteness, } from '@tokenleak/core'; import type { + BehaviorCohortSelector, CompareOutput, DateRange, ProviderData, TokenleakOutput, + UsageEvent, } from '@tokenleak/core'; import type { IProvider } from '@tokenleak/registry'; +import { MODEL_PRICING } from '@tokenleak/registry'; import { TokenleakError } from './errors.js'; +export interface LoadTokenleakDataOptions { + includeOptimization?: boolean; +} + /** * Load provider data for a date range, merge, aggregate, and build * a complete TokenleakOutput. Always computes MoreStats. @@ -24,8 +34,10 @@ import { TokenleakError } from './errors.js'; export async function loadTokenleakData( providers: IProvider[], range: DateRange, + options: LoadTokenleakDataOptions = {}, ): Promise { const { data: providerDataList, stats } = await loadAndAggregate(providers, range); + const events = providerDataList.flatMap((provider) => provider.events ?? []); return { schemaVersion: SCHEMA_VERSION, @@ -34,6 +46,56 @@ export async function loadTokenleakData( providers: providerDataList, aggregated: stats, more: buildMoreStats(providerDataList, range), + optimization: options.includeOptimization + ? buildDefaultOptimization(providerDataList, events, range) + : undefined, + }; +} + +function defaultBehaviorSelectors( + providers: ProviderData[], + events: UsageEvent[], + range: DateRange, +): [BehaviorCohortSelector, BehaviorCohortSelector] { + const topProviders = providers + .map((provider) => ({ provider: provider.provider, label: provider.displayName, tokens: provider.totalTokens })) + .filter((provider) => provider.tokens > 0) + .sort((a, b) => b.tokens - a.tokens || a.label.localeCompare(b.label)); + if (topProviders.length >= 2) { + return [ + { label: topProviders[0]!.label, dimension: 'provider', provider: topProviders[0]!.provider }, + { label: topProviders[1]!.label, dimension: 'provider', provider: topProviders[1]!.provider }, + ]; + } + + const modelTokens = new Map(); + for (const event of events) { + modelTokens.set(event.model, (modelTokens.get(event.model) ?? 0) + event.totalTokens); + } + const topModels = [...modelTokens.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])); + if (topModels.length >= 2) { + return [ + { label: topModels[0]![0], dimension: 'model', model: topModels[0]![0] }, + { label: topModels[1]![0], dimension: 'model', model: topModels[1]![0] }, + ]; + } + + return [ + { label: 'Current window', dimension: 'date-range', dateRange: range }, + { label: 'Current window', dimension: 'date-range', dateRange: range }, + ]; +} + +export function buildDefaultOptimization( + providers: ProviderData[], + events: UsageEvent[], + range: DateRange, +): NonNullable { + const [baseline, comparison] = defaultBehaviorSelectors(providers, events, range); + return { + routingSimulation: buildRoutingSimulationReport(events, range, MODEL_PRICING), + agentWaste: buildAgentWasteReport(providers, events, range), + behaviorDiff: buildAgentBehaviorDiffReport(events, range, baseline, comparison), }; } @@ -109,6 +171,7 @@ export async function loadCompareTokenleakData( providers: IProvider[], currentRange: DateRange, compareStr: string, + options: LoadTokenleakDataOptions = {}, ): Promise { const previousRange = resolveCompareRange(compareStr, currentRange); const [currentResult, previousResult] = await Promise.all([ @@ -137,6 +200,13 @@ export async function loadCompareTokenleakData( previousStats: compareOutput.periodA.stats, deltas: compareOutput.deltas, }), + optimization: options.includeOptimization + ? buildDefaultOptimization( + currentResult.data, + currentResult.data.flatMap((provider) => provider.events ?? []), + currentRange, + ) + : undefined, }, }; } diff --git a/packages/cli/src/tabbed-dashboard.ts b/packages/cli/src/tabbed-dashboard.ts index 26f589f..b0367b3 100644 --- a/packages/cli/src/tabbed-dashboard.ts +++ b/packages/cli/src/tabbed-dashboard.ts @@ -11,6 +11,9 @@ import { renderModelView, renderTokenView, renderCwdView, + renderRoutingSimulatorView, + renderAgentWasteView, + renderAgentBehaviorDiffView, TIME_RANGES, METRIC_TABS, } from '@tokenleak/renderers'; @@ -82,8 +85,8 @@ async function loadForRange( const range = resolveRange(state, timeRange); const loadPromise = (state.compare - ? loadCompareTokenleakData(providers, range, state.compare).then((result) => result.output) - : loadTokenleakData(providers, range)) + ? loadCompareTokenleakData(providers, range, state.compare, { includeOptimization: true }).then((result) => result.output) + : loadTokenleakData(providers, range, { includeOptimization: true })) .then((output) => { state.dataCache.set(timeRange, output); return output; @@ -138,6 +141,9 @@ function renderActiveView( case 'cwd': return renderCwdView(output, width, noColor); case 'dow': return renderDowView(output, width, noColor); case 'tod': return renderTodView(output, width, noColor); + case 'sim': return renderRoutingSimulatorView(output, width, noColor); + case 'waste': return renderAgentWasteView(output, width, noColor); + case 'diff': return renderAgentBehaviorDiffView(output, width, noColor); default: return renderOverviewView(output, options); } } @@ -351,8 +357,9 @@ export async function startTabbedDashboard( // Number keys 1-9 to jump to specific tab const digit = key.sequence?.match(/^[1-9]$/)?.[0]; - if (digit) { - const tabIdx = Number(digit) - 1; + const zero = key.sequence === '0'; + if (digit || zero) { + const tabIdx = zero ? 9 : Number(digit) - 1; if (tabIdx < METRIC_TABS.length) { state.metricTab = METRIC_TABS[tabIdx]!; state.scrollOffset = 0; diff --git a/packages/core/src/aggregation/agent-behavior-diff.ts b/packages/core/src/aggregation/agent-behavior-diff.ts new file mode 100644 index 0000000..b0e0d9e --- /dev/null +++ b/packages/core/src/aggregation/agent-behavior-diff.ts @@ -0,0 +1,238 @@ +import type { + AgentBehaviorDiffReport, + BehaviorCohortMetrics, + BehaviorCohortSelector, + DateRange, + UsageEvent, +} from '../types'; +import { buildSessionRollups } from './analytics'; +import { buildAgentWasteReport } from './agent-waste'; + +const METHOD = + 'Agent behavior diff v1: deterministic cohort comparison across providers, models, projects, repos, date ranges, and task styles.'; + +function normalize(value: string | undefined): string { + return (value ?? '').toLowerCase().trim(); +} + +function matchesSelector(event: UsageEvent, selector: BehaviorCohortSelector): boolean { + switch (selector.dimension) { + case 'provider': + return normalize(event.provider) === normalize(selector.provider); + case 'model': + return normalize(event.model) === normalize(selector.model); + case 'project': + return normalize(event.projectId) === normalize(selector.projectId); + case 'repo': + return normalize(event.repoRoot) === normalize(selector.repoRoot); + case 'date-range': + return Boolean(selector.dateRange && event.date >= selector.dateRange.since && event.date <= selector.dateRange.until); + case 'session-style': { + const duration = event.durationMs ?? 0; + if (selector.taskStyle === 'quick-hit') return duration <= 10 * 60 * 1_000 && event.totalTokens < 6_000; + if (selector.taskStyle === 'deep-work') return duration >= 45 * 60 * 1_000 || event.totalTokens >= 20_000; + if (selector.taskStyle === 'iterative') return duration >= 15 * 60 * 1_000 || event.totalTokens >= 6_000; + return true; + } + } +} + +function nullWhenMissing(value: number, hasDenominator: boolean): number | null { + return hasDenominator ? value : null; +} + +function modelSwitchesPerSession(events: UsageEvent[]): number { + const bySession = new Map(); + for (const event of events) { + const key = event.sessionId ?? `${event.provider}:${event.date}`; + const list = bySession.get(key) ?? []; + list.push(event); + bySession.set(key, list); + } + + if (bySession.size === 0) return 0; + let switches = 0; + for (const sessionEvents of bySession.values()) { + const ordered = sessionEvents.slice().sort((a, b) => a.timestamp.localeCompare(b.timestamp)); + for (let i = 1; i < ordered.length; i++) { + if (ordered[i]!.model !== ordered[i - 1]!.model) switches++; + } + } + return switches / bySession.size; +} + +function metricsFor(events: UsageEvent[], dateRange: DateRange): BehaviorCohortMetrics { + const sessions = buildSessionRollups(events); + const waste = buildAgentWasteReport([], events, dateRange); + const input = events.reduce((sum, event) => sum + event.inputTokens, 0); + const output = events.reduce((sum, event) => sum + event.outputTokens, 0); + const read = events.reduce((sum, event) => sum + event.cacheReadTokens, 0); + const write = events.reduce((sum, event) => sum + event.cacheWriteTokens, 0); + const tokens = events.reduce((sum, event) => sum + event.totalTokens, 0); + const cost = events.reduce((sum, event) => sum + event.cost, 0); + const activeDays = new Set(events.map((event) => event.date)).size; + const durationSessions = sessions.filter((session) => session.durationMs !== null); + const estimatedSavings = waste.signals + .map((signal) => signal.estimatedSavings) + .filter((value): value is number => value !== null); + + return { + events: events.length, + sessions: sessions.length, + activeDays, + tokens, + cost, + inputPerOutput: output > 0 ? input / output : null, + outputPerDollar: cost > 0 ? output / cost : null, + cacheHitRate: input + read > 0 ? read / (input + read) : 0, + cacheReuseRatio: write > 0 ? read / write : null, + modelSwitchesPerSession: modelSwitchesPerSession(events), + wasteSignals: waste.signals.length, + highSeverityWasteSignals: waste.summary.highSeverity, + estimatedWasteSavings: estimatedSavings.length > 0 ? estimatedSavings.reduce((sum, value) => sum + value, 0) : null, + averageSessionDurationMs: durationSessions.length > 0 + ? durationSessions.reduce((sum, session) => sum + (session.durationMs ?? 0), 0) / durationSessions.length + : null, + }; +} + +function deltaValue(a: number | null, b: number | null): number | null { + if (a === null || b === null) return null; + return b - a; +} + +function buildDeltas( + baseline: BehaviorCohortMetrics, + comparison: BehaviorCohortMetrics, +): Record { + if (baseline.events === 0 || comparison.events === 0) { + return { + events: null, + sessions: null, + activeDays: null, + tokens: null, + cost: null, + inputPerOutput: null, + outputPerDollar: null, + cacheHitRate: null, + cacheReuseRatio: null, + modelSwitchesPerSession: null, + wasteSignals: null, + highSeverityWasteSignals: null, + estimatedWasteSavings: null, + averageSessionDurationMs: null, + }; + } + + return { + events: deltaValue(baseline.events, comparison.events), + sessions: deltaValue(baseline.sessions, comparison.sessions), + activeDays: deltaValue(baseline.activeDays, comparison.activeDays), + tokens: deltaValue(baseline.tokens, comparison.tokens), + cost: deltaValue(baseline.cost, comparison.cost), + inputPerOutput: deltaValue(baseline.inputPerOutput, comparison.inputPerOutput), + outputPerDollar: deltaValue(baseline.outputPerDollar, comparison.outputPerDollar), + cacheHitRate: deltaValue(baseline.cacheHitRate, comparison.cacheHitRate), + cacheReuseRatio: deltaValue(baseline.cacheReuseRatio, comparison.cacheReuseRatio), + modelSwitchesPerSession: deltaValue(baseline.modelSwitchesPerSession, comparison.modelSwitchesPerSession), + wasteSignals: deltaValue(baseline.wasteSignals, comparison.wasteSignals), + highSeverityWasteSignals: deltaValue(baseline.highSeverityWasteSignals, comparison.highSeverityWasteSignals), + estimatedWasteSavings: deltaValue(baseline.estimatedWasteSavings, comparison.estimatedWasteSavings), + averageSessionDurationMs: deltaValue(baseline.averageSessionDurationMs, comparison.averageSessionDurationMs), + }; +} + +function percentChange(from: number | null, to: number | null): number | null { + if (from === null || to === null || from === 0) return null; + return (to - from) / from; +} + +function formatPercent(value: number): string { + return `${Math.abs(value * 100).toFixed(0)}%`; +} + +function buildTakeaways( + baseline: BehaviorCohortSelector, + comparison: BehaviorCohortSelector, + baselineMetrics: BehaviorCohortMetrics, + comparisonMetrics: BehaviorCohortMetrics, +): string[] { + const takeaways: string[] = []; + const inputChange = percentChange(baselineMetrics.inputPerOutput, comparisonMetrics.inputPerOutput); + if (inputChange !== null && Math.abs(inputChange) >= 0.05) { + takeaways.push( + `${comparison.label} used ${formatPercent(inputChange)} ${inputChange < 0 ? 'fewer' : 'more'} input tokens per output token than ${baseline.label}.`, + ); + } + + const costChange = percentChange(baselineMetrics.cost, comparisonMetrics.cost); + if (costChange !== null && Math.abs(costChange) >= 0.05) { + takeaways.push( + `${comparison.label} cost ${formatPercent(costChange)} ${costChange < 0 ? 'less' : 'more'} than ${baseline.label}.`, + ); + } + + const cacheDelta = comparisonMetrics.cacheHitRate - baselineMetrics.cacheHitRate; + if (Math.abs(cacheDelta) >= 0.05) { + takeaways.push( + `${comparison.label} had ${(Math.abs(cacheDelta) * 100).toFixed(0)} points ${cacheDelta > 0 ? 'higher' : 'lower'} cache hit rate.`, + ); + } + + const wasteDelta = comparisonMetrics.wasteSignals - baselineMetrics.wasteSignals; + if (wasteDelta !== 0) { + takeaways.push( + `${comparison.label} produced ${Math.abs(wasteDelta)} ${wasteDelta > 0 ? 'more' : 'fewer'} waste signals.`, + ); + } + + if (takeaways.length === 0) { + takeaways.push(`${comparison.label} and ${baseline.label} look similar on the selected metrics.`); + } + return takeaways; +} + +function selectorsIdentical(a: BehaviorCohortSelector, b: BehaviorCohortSelector): boolean { + return JSON.stringify(a) === JSON.stringify(b); +} + +function addWarnings( + warnings: string[], + label: string, + metrics: BehaviorCohortMetrics, + role: 'Baseline' | 'Comparison', +): void { + if (metrics.events === 0) warnings.push(`${role} cohort is empty: ${label}.`); + if (metrics.sessions < 5 || metrics.events < 10) warnings.push(`${role} cohort is sparse: ${label}.`); + if (metrics.cost === 0) warnings.push(`${role} cohort has no cost data: ${label}.`); +} + +export function buildAgentBehaviorDiffReport( + events: UsageEvent[], + dateRange: DateRange, + baselineSelector: BehaviorCohortSelector, + comparisonSelector: BehaviorCohortSelector, +): AgentBehaviorDiffReport { + const warnings: string[] = []; + if (selectorsIdentical(baselineSelector, comparisonSelector)) { + warnings.push('Baseline and comparison selectors are identical.'); + } + + const baselineEvents = events.filter((event) => matchesSelector(event, baselineSelector)); + const comparisonEvents = events.filter((event) => matchesSelector(event, comparisonSelector)); + const baselineMetrics = metricsFor(baselineEvents, baselineSelector.dateRange ?? dateRange); + const comparisonMetrics = metricsFor(comparisonEvents, comparisonSelector.dateRange ?? dateRange); + + addWarnings(warnings, baselineSelector.label, baselineMetrics, 'Baseline'); + addWarnings(warnings, comparisonSelector.label, comparisonMetrics, 'Comparison'); + + return { + method: METHOD, + dateRange, + baseline: { selector: baselineSelector, metrics: baselineMetrics }, + comparison: { selector: comparisonSelector, metrics: comparisonMetrics }, + deltas: buildDeltas(baselineMetrics, comparisonMetrics), + takeaways: buildTakeaways(baselineSelector, comparisonSelector, baselineMetrics, comparisonMetrics), + warnings: [...new Set(warnings)], + }; +} diff --git a/packages/core/src/aggregation/agent-waste.ts b/packages/core/src/aggregation/agent-waste.ts new file mode 100644 index 0000000..16e3ff8 --- /dev/null +++ b/packages/core/src/aggregation/agent-waste.ts @@ -0,0 +1,299 @@ +import type { + AgentWasteReport, + AgentWasteSignal, + DateRange, + OptimizationConfidence, + OptimizationEvidence, + ProviderData, + UsageEvent, + WasteRecipe, +} from '../types'; +import { buildSessionRollups } from './analytics'; +import { clusterPrompts } from './prompt-clusters'; + +const METHOD = + 'Agent waste detector v1: deterministic signals from local events, prompts, cache, model churn, and session rollups.'; +const CONTEXT_DRAG_INPUT_PER_OUTPUT = 8; +const CACHE_HIT_LOW = 0.25; +const CACHE_REUSE_LOW = 2; +const MODEL_CHURN_SWITCHES = 3; +const PROMPT_REPEAT_COUNT = 3; +const PREMIUM_SMALL_OUTPUT = 1_000; +const PREMIUM_SMALL_TOKENS = 10_000; + +function severityFor(cost: number, fallback: AgentWasteSignal['severity'] = 'low'): AgentWasteSignal['severity'] { + if (cost >= 10) return 'high'; + if (cost >= 2) return 'medium'; + return fallback; +} + +function confidenceFor(count: number, degraded = false): OptimizationConfidence { + if (degraded || count < 3) return 'low'; + if (count < 6) return 'medium'; + return 'high'; +} + +function recipe(title: string, detail: string, command?: string): WasteRecipe { + return { title, detail, command }; +} + +function evidenceFromEvents(events: UsageEvent[], reason: string): OptimizationEvidence { + const first = events[0]; + return { + provider: first?.provider, + model: first?.model, + projectId: first?.projectId ?? null, + repoRoot: first?.repoRoot ?? null, + sessionId: first?.sessionId ?? null, + date: first?.date, + eventCount: events.length, + tokens: events.reduce((sum, event) => sum + event.totalTokens, 0), + cost: events.reduce((sum, event) => sum + event.cost, 0), + reason, + }; +} + +function estimatedSavings(cost: number, fraction: number): number | null { + return cost > 0 ? cost * fraction : null; +} + +function bySession(events: UsageEvent[]): Map { + const sessions = new Map(); + for (const event of events) { + const key = event.sessionId?.trim() || `${event.provider}:${event.date}`; + const list = sessions.get(key) ?? []; + list.push(event); + sessions.set(key, list); + } + return sessions; +} + +function detectContextDrag(events: UsageEvent[]): AgentWasteSignal[] { + const signals: AgentWasteSignal[] = []; + for (const sessionEvents of bySession(events).values()) { + const input = sessionEvents.reduce((sum, event) => sum + event.inputTokens, 0); + const output = sessionEvents.reduce((sum, event) => sum + event.outputTokens, 0); + if (output <= 0 || input / output < CONTEXT_DRAG_INPUT_PER_OUTPUT) { + continue; + } + const evidence = evidenceFromEvents( + sessionEvents, + `Input tokens are ${(input / output).toFixed(1)}x output tokens in this session.`, + ); + signals.push({ + kind: 'context-drag', + title: 'High context drag', + severity: severityFor(evidence.cost, 'medium'), + confidence: confidenceFor(sessionEvents.length, !sessionEvents[0]?.sessionId), + estimatedSavings: estimatedSavings(evidence.cost, 0.2), + evidence, + recipes: [ + recipe('Start a compact follow-up session', 'Ask for a concise handoff, then continue with only the files and context needed for the next step.'), + ], + }); + } + return signals; +} + +function detectPromptRepeats(events: UsageEvent[], warnings: string[]): AgentWasteSignal[] { + const prompted = events.filter((event) => event.prompt?.trim()); + if (prompted.length === 0) { + warnings.push('No prompt text captured; skipped retry-loop and prompt-repeat signals.'); + return []; + } + + return clusterPrompts(prompted) + .filter((cluster) => cluster.count >= PROMPT_REPEAT_COUNT) + .slice(0, 5) + .map((cluster): AgentWasteSignal => ({ + kind: 'prompt-repeat', + title: 'Repeated prompt cluster', + severity: severityFor(cluster.totalCost, 'low'), + confidence: confidenceFor(cluster.count), + estimatedSavings: estimatedSavings(cluster.totalCost, 0.3), + evidence: { + eventCount: cluster.count, + tokens: cluster.totalTokens, + cost: cluster.totalCost, + reason: `${cluster.count} similar prompts clustered around "${cluster.canonicalPrompt}".`, + }, + recipes: [ + recipe('Break the retry loop', 'Summarize what failed, state the next hypothesis, and ask for one targeted change instead of repeating the same request.'), + ], + })); +} + +function detectModelChurn(events: UsageEvent[]): AgentWasteSignal[] { + const signals: AgentWasteSignal[] = []; + for (const sessionEvents of bySession(events).values()) { + const ordered = sessionEvents.slice().sort((a, b) => a.timestamp.localeCompare(b.timestamp)); + let switches = 0; + for (let i = 1; i < ordered.length; i++) { + if (ordered[i]!.model !== ordered[i - 1]!.model) switches++; + } + if (switches < MODEL_CHURN_SWITCHES || new Set(ordered.map((event) => event.model)).size <= 1) { + continue; + } + const evidence = evidenceFromEvents(ordered, `Session switched models ${switches} times across ${ordered.length} events.`); + signals.push({ + kind: 'model-churn', + title: 'Frequent model switching', + severity: 'low', + confidence: confidenceFor(ordered.length), + estimatedSavings: estimatedSavings(evidence.cost, 0.1), + evidence, + recipes: [ + recipe('Choose model roles up front', 'Use one model for exploration and one for edits instead of switching repeatedly inside the same task.'), + ], + }); + } + return signals; +} + +function detectPremiumSmallTask(events: UsageEvent[]): AgentWasteSignal[] { + return events + .filter((event) => ( + /opus|gpt-4o|gpt-5\.5|gpt-5\.4/.test(event.model.toLowerCase()) && + event.outputTokens <= PREMIUM_SMALL_OUTPUT && + event.totalTokens <= PREMIUM_SMALL_TOKENS + )) + .slice(0, 5) + .map((event): AgentWasteSignal => ({ + kind: 'premium-for-small-task', + title: 'Premium model used for a small task', + severity: severityFor(event.cost, 'low'), + confidence: confidenceFor(1, true), + estimatedSavings: estimatedSavings(event.cost, 0.35), + evidence: evidenceFromEvents([event], `${event.model} produced ${event.outputTokens.toLocaleString('en-US')} output tokens.`), + recipes: [ + recipe('Route small asks down', 'Use routing simulation to estimate savings from sending short lookups and tiny fixes to a cheaper model.', 'tokenleak simulate-routing --days 30'), + ], + })); +} + +function detectCacheWaste(providers: ProviderData[]): AgentWasteSignal[] { + const signals: AgentWasteSignal[] = []; + for (const provider of providers) { + const input = provider.daily.reduce((sum, day) => sum + day.inputTokens, 0); + const read = provider.daily.reduce((sum, day) => sum + day.cacheReadTokens, 0); + const write = provider.daily.reduce((sum, day) => sum + day.cacheWriteTokens, 0); + if (read === 0 && write === 0) { + continue; + } + const hitRate = input + read > 0 ? read / (input + read) : 0; + if (hitRate < CACHE_HIT_LOW) { + signals.push({ + kind: 'cache-miss-heavy', + title: 'Low cache hit rate', + severity: provider.totalCost >= 10 ? 'medium' : 'low', + confidence: 'medium', + estimatedSavings: estimatedSavings(provider.totalCost, 0.15), + evidence: { + provider: provider.provider, + eventCount: provider.events?.length ?? 0, + tokens: provider.totalTokens, + cost: provider.totalCost, + reason: `Cache hit rate is ${(hitRate * 100).toFixed(0)}% for ${provider.displayName}.`, + }, + recipes: [ + recipe('Stabilize reusable context', 'Move stable instructions into project guidance and avoid resending large changing context blocks.'), + ], + }); + } + const reuseRatio = write > 0 ? read / write : null; + if (reuseRatio !== null && reuseRatio < CACHE_REUSE_LOW) { + signals.push({ + kind: 'cache-write-waste', + title: 'Cache writes are not paying back', + severity: write > 100_000 ? 'medium' : 'low', + confidence: 'medium', + estimatedSavings: null, + evidence: { + provider: provider.provider, + eventCount: provider.events?.length ?? 0, + tokens: write, + cost: provider.totalCost, + reason: `Cache reuse ratio is ${reuseRatio.toFixed(1)}x from ${write.toLocaleString('en-US')} write tokens.`, + }, + recipes: [ + recipe('Batch related work', 'Keep stable instructions unchanged and group related tasks so cache writes are reused.'), + ], + }); + } + } + return signals; +} + +function detectLongLowYield(events: UsageEvent[]): AgentWasteSignal[] { + return buildSessionRollups(events) + .filter((session) => ( + (session.durationMs ?? 0) >= 45 * 60 * 1_000 && + session.cost > 0 && + session.outputTokens / session.cost < 2_000 + )) + .slice(0, 5) + .map((session): AgentWasteSignal => ({ + kind: 'long-session-low-yield', + title: 'Long session with low output per dollar', + severity: severityFor(session.cost, 'low'), + confidence: confidenceFor(session.eventCount), + estimatedSavings: estimatedSavings(session.cost, 0.2), + evidence: { + provider: session.provider, + projectId: session.projectId, + repoRoot: session.repoRoot, + sessionId: session.sessionId, + eventCount: session.eventCount, + tokens: session.totalTokens, + cost: session.cost, + reason: `Session ran ${Math.round((session.durationMs ?? 0) / 60_000)} minutes with low output per dollar.`, + }, + recipes: [ + recipe('Inspect the replay', 'Review the session timeline for broad reads, stalled loops, or repeated commands.', `tokenleak replay ${session.start.slice(0, 10)}`), + ], + })); +} + +function sortSignals(signals: AgentWasteSignal[]): AgentWasteSignal[] { + const severityRank = { high: 3, medium: 2, low: 1 }; + return signals.sort((a, b) => ( + severityRank[b.severity] - severityRank[a.severity] || + (b.estimatedSavings ?? 0) - (a.estimatedSavings ?? 0) || + b.evidence.tokens - a.evidence.tokens || + a.title.localeCompare(b.title) + )); +} + +export function buildAgentWasteReport( + providers: ProviderData[], + events: UsageEvent[], + dateRange: DateRange, +): AgentWasteReport { + const warnings: string[] = []; + const signals = sortSignals([ + ...detectContextDrag(events), + ...detectPromptRepeats(events, warnings), + ...detectModelChurn(events), + ...detectPremiumSmallTask(events), + ...detectCacheWaste(providers), + ...detectLongLowYield(events), + ]); + const estimated = signals + .map((signal) => signal.estimatedSavings) + .filter((value): value is number => value !== null); + const sessions = new Set(events.map((event) => event.sessionId ?? `${event.provider}:${event.date}`)); + + return { + method: METHOD, + dateRange, + summary: { + totalSignals: signals.length, + highSeverity: signals.filter((signal) => signal.severity === 'high').length, + estimatedSavings: estimated.length > 0 ? estimated.reduce((sum, value) => sum + value, 0) : null, + analyzedEvents: events.length, + analyzedSessions: sessions.size, + }, + signals, + warnings: [...new Set(warnings)], + }; +} diff --git a/packages/core/src/aggregation/index.ts b/packages/core/src/aggregation/index.ts index e3fb636..e4c32f3 100644 --- a/packages/core/src/aggregation/index.ts +++ b/packages/core/src/aggregation/index.ts @@ -14,6 +14,10 @@ export { buildFocusReport } from './focus'; export { buildReplayReport } from './replay'; export { buildCommonsExport, buildCommonsPromptExport, inspectCommonsExport } from './commons'; export { buildWasteReport } from './waste'; +export { buildAgentWasteReport } from './agent-waste'; +export { buildRoutingSimulationReport } from './routing-simulator'; +export type { BuildRoutingSimulationOptions, RoutingModelPricing } from './routing-simulator'; +export { buildAgentBehaviorDiffReport } from './agent-behavior-diff'; export { buildNutritionReport } from './nutrition'; export { collectGitOutcomeSignals } from './nutrition-git'; export { clusterPrompts, tokenBigrams } from './prompt-clusters'; diff --git a/packages/core/src/aggregation/optimization-intelligence.test.ts b/packages/core/src/aggregation/optimization-intelligence.test.ts new file mode 100644 index 0000000..8a62ed7 --- /dev/null +++ b/packages/core/src/aggregation/optimization-intelligence.test.ts @@ -0,0 +1,170 @@ +import { describe, expect, test } from 'bun:test'; +import type { + BehaviorCohortSelector, + DateRange, + ProviderData, + UsageEvent, +} from '../types'; +import { + buildAgentBehaviorDiffReport, + buildAgentWasteReport, + buildRoutingSimulationReport, +} from './index'; + +const range: DateRange = { since: '2026-05-01', until: '2026-05-07' }; + +const pricing = { + 'claude-3-opus': { input: 15, output: 75, cacheRead: 1.5, cacheWrite: 18.75 }, + 'claude-3.5-sonnet': { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + 'gpt-4o': { input: 2.5, output: 10, cacheRead: 1.25, cacheWrite: 2.5 }, + 'gpt-4o-mini': { input: 0.15, output: 0.6, cacheRead: 0.075, cacheWrite: 0.15 }, +} as const; + +function event(overrides: Partial): UsageEvent { + return { + provider: 'claude-code', + timestamp: '2026-05-02T10:00:00.000Z', + date: '2026-05-02', + model: 'claude-3-opus', + inputTokens: 10_000, + outputTokens: 400, + cacheReadTokens: 2_000, + cacheWriteTokens: 500, + totalTokens: 12_900, + cost: 0.195375, + pricing: pricing['claude-3-opus'], + costSource: 'provider-reported', + sessionId: 's1', + projectId: '/work/repo', + repoRoot: '/work/repo', + durationMs: 120_000, + prompt: 'fix the lint error', + ...overrides, + }; +} + +function provider(events: UsageEvent[]): ProviderData { + return { + provider: 'claude-code', + displayName: 'Claude Code', + colors: { primary: '#fff', secondary: '#ddd', gradient: ['#fff', '#ddd'] }, + totalTokens: events.reduce((sum, e) => sum + e.totalTokens, 0), + totalCost: events.reduce((sum, e) => sum + e.cost, 0), + daily: [ + { + date: '2026-05-02', + inputTokens: events.reduce((sum, e) => sum + e.inputTokens, 0), + outputTokens: events.reduce((sum, e) => sum + e.outputTokens, 0), + cacheReadTokens: events.reduce((sum, e) => sum + e.cacheReadTokens, 0), + cacheWriteTokens: events.reduce((sum, e) => sum + e.cacheWriteTokens, 0), + totalTokens: events.reduce((sum, e) => sum + e.totalTokens, 0), + cost: events.reduce((sum, e) => sum + e.cost, 0), + models: [], + }, + ], + events, + }; +} + +describe('buildRoutingSimulationReport', () => { + test('simulates downgrade savings with cache-aware pricing and sparse confidence', () => { + const report = buildRoutingSimulationReport( + [event({ sessionId: 's1' }), event({ sessionId: 's2', timestamp: '2026-05-02T10:05:00.000Z' })], + range, + pricing, + { strategy: 'conservative' }, + ); + + expect(report.strategy).toBe('conservative'); + expect(report.affectedEvents).toBe(2); + expect(report.estimatedSavings).toBeGreaterThan(0); + expect(report.simulatedCost).toBeLessThan(report.currentCost); + expect(report.candidates[0]?.toModel).toBe('claude-3.5-sonnet'); + expect(report.candidates[0]?.confidence).toBe('low'); + expect(report.candidates[0]?.reasons).toContain('provider reported current cost'); + }); + + test('warns and skips malformed and unknown-priced events', () => { + const report = buildRoutingSimulationReport( + [ + event({ inputTokens: -1, totalTokens: 10 }), + event({ model: 'mystery-premium', pricing: null, costSource: 'unpriced' }), + ], + range, + pricing, + ); + + expect(report.affectedEvents).toBe(0); + expect(report.warnings.some((w) => w.includes('negative token counts'))).toBe(true); + expect(report.warnings.some((w) => w.includes('No downgrade path'))).toBe(true); + }); +}); + +describe('buildAgentWasteReport', () => { + test('detects context drag, prompt repeats, model churn, and cache waste with evidence', () => { + const events = [ + event({ sessionId: 's1', model: 'claude-3-opus', inputTokens: 30_000, outputTokens: 500, totalTokens: 31_000, prompt: 'fix flaky tests' }), + event({ sessionId: 's1', model: 'claude-3.5-sonnet', inputTokens: 28_000, outputTokens: 400, totalTokens: 28_900, prompt: 'fix flaky tests again' }), + event({ sessionId: 's1', model: 'claude-3-opus', inputTokens: 29_000, outputTokens: 300, totalTokens: 29_800, prompt: 'fix flaky tests please' }), + event({ sessionId: 's1', model: 'claude-3.5-sonnet', inputTokens: 25_000, outputTokens: 300, totalTokens: 25_800, prompt: 'fix flaky tests' }), + ]; + const report = buildAgentWasteReport([provider(events)], events, range); + + expect(report.summary.totalSignals).toBeGreaterThanOrEqual(3); + expect(report.signals.map((s) => s.kind)).toContain('context-drag'); + expect(report.signals.map((s) => s.kind)).toContain('prompt-repeat'); + expect(report.signals.map((s) => s.kind)).toContain('model-churn'); + expect(report.signals[0]?.evidence.reason).toBeTruthy(); + expect(report.signals[0]?.recipes[0]?.detail).toBeTruthy(); + }); + + test('skips prompt-only signals when prompt capture is missing', () => { + const events = [event({ prompt: undefined }), event({ prompt: undefined, sessionId: 's2' })]; + const report = buildAgentWasteReport([provider(events)], events, range); + + expect(report.signals.some((s) => s.kind === 'prompt-repeat' || s.kind === 'retry-loop')).toBe(false); + expect(report.warnings.some((w) => w.includes('No prompt text'))).toBe(true); + }); +}); + +describe('buildAgentBehaviorDiffReport', () => { + test('compares provider cohorts and produces deterministic takeaways', () => { + const events = [ + event({ provider: 'claude-code', model: 'claude-3-opus', sessionId: 'c1', cost: 2, inputTokens: 20_000, outputTokens: 500, totalTokens: 20_500 }), + event({ provider: 'codex', model: 'gpt-4o', sessionId: 'x1', cost: 0.4, inputTokens: 4_000, outputTokens: 700, totalTokens: 4_700 }), + event({ provider: 'codex', model: 'gpt-4o', sessionId: 'x2', cost: 0.3, inputTokens: 3_000, outputTokens: 600, totalTokens: 3_600 }), + ]; + const baseline: BehaviorCohortSelector = { + label: 'Claude', + dimension: 'provider', + provider: 'claude-code', + }; + const comparison: BehaviorCohortSelector = { + label: 'Codex', + dimension: 'provider', + provider: 'codex', + }; + + const report = buildAgentBehaviorDiffReport(events, range, baseline, comparison); + + expect(report.baseline.metrics.events).toBe(1); + expect(report.comparison.metrics.events).toBe(2); + expect(report.deltas.cost).toBeCloseTo(-1.3); + expect(report.takeaways.some((line) => line.includes('Codex'))).toBe(true); + }); + + test('warns on empty and identical cohorts instead of throwing', () => { + const selector: BehaviorCohortSelector = { + label: 'Missing', + dimension: 'provider', + provider: 'missing', + }; + const report = buildAgentBehaviorDiffReport([event({})], range, selector, selector); + + expect(report.baseline.metrics.events).toBe(0); + expect(report.comparison.metrics.events).toBe(0); + expect(report.deltas.cost).toBeNull(); + expect(report.warnings.some((w) => w.includes('identical'))).toBe(true); + expect(report.warnings.some((w) => w.includes('Baseline cohort is empty'))).toBe(true); + }); +}); diff --git a/packages/core/src/aggregation/routing-simulator.ts b/packages/core/src/aggregation/routing-simulator.ts new file mode 100644 index 0000000..d5ff02f --- /dev/null +++ b/packages/core/src/aggregation/routing-simulator.ts @@ -0,0 +1,266 @@ +import type { + DateRange, + OptimizationConfidence, + RoutingRule, + RoutingSimulationCandidate, + RoutingSimulationReport, + UsageEvent, +} from '../types'; + +const METHOD = + 'Model routing simulator v1: re-prices historical events under deterministic downgrade rules. Savings are estimates, not enforcement.'; +const TOKENS_PER_MILLION = 1_000_000; +const SHORT_OUTPUT_TOKENS = 1_000; +const QUICK_LOOKUP_TOKENS = 8_000; +const QUICK_LOOKUP_DURATION_MS = 10 * 60 * 1_000; +const LOW_OUTPUT_RATIO = 0.08; +const SPARSE_MATCH_COUNT = 5; + +export interface RoutingModelPricing { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; +} + +export interface BuildRoutingSimulationOptions { + strategy?: 'conservative' | 'aggressive' | 'manual' | string; + rules?: RoutingRule[]; + downgradePath?: (model: string) => string | null; +} + +function normalizeModel(model: string): string { + let normalized = model.toLowerCase().trim(); + const slashIndex = normalized.lastIndexOf('/'); + if (slashIndex >= 0) { + normalized = normalized.slice(slashIndex + 1); + } + return normalized.replace(/-\d{4}-?\d{2}-?\d{2}$/, ''); +} + +function defaultDowngradePath(model: string): string | null { + const normalized = normalizeModel(model); + const paths: Record = { + 'claude-opus-4-6': 'claude-sonnet-4-6', + 'claude-opus-4': 'claude-sonnet-4', + 'claude-opus-4-5': 'claude-sonnet-4-5', + 'claude-sonnet-4-5': 'claude-haiku-4-5', + 'claude-3-opus': 'claude-3.5-sonnet', + 'claude-3-sonnet': 'claude-3-haiku', + 'claude-3.5-sonnet': 'claude-3.5-haiku', + 'gpt-4o': 'gpt-4o-mini', + 'gpt-5.5': 'gpt-5-mini', + 'gpt-5.4': 'gpt-5.4-mini', + 'gpt-5': 'gpt-5-mini', + o1: 'o1-mini', + o3: 'o3-mini', + }; + return paths[normalized] ?? null; +} + +function eventId(event: UsageEvent, index: number): string { + return [ + event.provider, + event.sessionId ?? 'no-session', + event.timestamp, + index, + ].join(':'); +} + +function hasMalformedTokens(event: UsageEvent): boolean { + return [ + event.inputTokens, + event.outputTokens, + event.cacheReadTokens, + event.cacheWriteTokens, + event.totalTokens, + ].some((value) => !Number.isFinite(value) || value < 0); +} + +function priceEvent(event: UsageEvent, pricing: RoutingModelPricing): number { + return ( + (event.inputTokens / TOKENS_PER_MILLION) * pricing.input + + (event.outputTokens / TOKENS_PER_MILLION) * pricing.output + + (event.cacheReadTokens / TOKENS_PER_MILLION) * pricing.cacheRead + + (event.cacheWriteTokens / TOKENS_PER_MILLION) * pricing.cacheWrite + ); +} + +function confidenceFor(event: UsageEvent, matchedCount: number, reasons: string[]): OptimizationConfidence { + if (matchedCount < SPARSE_MATCH_COUNT || event.costSource === 'unpriced') { + return 'low'; + } + if (event.costSource === 'provider-reported' || reasons.some((reason) => reason.includes('cache'))) { + return 'medium'; + } + return 'high'; +} + +function builtInRuleFor(event: UsageEvent, downgradeTo: string): RoutingRule | null { + if (event.outputTokens > 0 && event.outputTokens <= SHORT_OUTPUT_TOKENS) { + return { + id: 'premium-short-output', + label: 'Premium model with short output', + kind: 'premium-short-output', + fromModels: [event.model], + toModel: downgradeTo, + provider: event.provider, + maxOutputTokens: SHORT_OUTPUT_TOKENS, + }; + } + + if ( + event.totalTokens <= QUICK_LOOKUP_TOKENS || + (typeof event.durationMs === 'number' && event.durationMs <= QUICK_LOOKUP_DURATION_MS) + ) { + return { + id: 'quick-lookup', + label: 'Quick lookup downgrade', + kind: 'quick-lookup', + fromModels: [event.model], + toModel: downgradeTo, + provider: event.provider, + maxTotalTokens: QUICK_LOOKUP_TOKENS, + maxDurationMs: QUICK_LOOKUP_DURATION_MS, + }; + } + + const ratio = event.inputTokens > 0 ? event.outputTokens / event.inputTokens : null; + if (ratio !== null && ratio <= LOW_OUTPUT_RATIO && event.outputTokens <= SHORT_OUTPUT_TOKENS * 2) { + return { + id: 'low-output-ratio', + label: 'Low output ratio downgrade', + kind: 'low-output-ratio', + fromModels: [event.model], + toModel: downgradeTo, + provider: event.provider, + maxOutputTokens: SHORT_OUTPUT_TOKENS * 2, + }; + } + + return null; +} + +function manualRuleFor(event: UsageEvent, rules: RoutingRule[]): RoutingRule | null { + const model = normalizeModel(event.model); + return rules.find((rule) => { + if (rule.provider && rule.provider !== event.provider) return false; + if (!rule.fromModels.map(normalizeModel).includes(model)) return false; + if (typeof rule.maxOutputTokens === 'number' && event.outputTokens > rule.maxOutputTokens) return false; + if (typeof rule.maxTotalTokens === 'number' && event.totalTokens > rule.maxTotalTokens) return false; + if ( + typeof rule.maxDurationMs === 'number' && + typeof event.durationMs === 'number' && + event.durationMs > rule.maxDurationMs + ) { + return false; + } + return true; + }) ?? null; +} + +export function buildRoutingSimulationReport( + events: UsageEvent[], + dateRange: DateRange, + pricing: Readonly>, + options: BuildRoutingSimulationOptions = {}, +): RoutingSimulationReport { + const strategy = options.strategy ?? 'conservative'; + const warnings: string[] = []; + const candidates: RoutingSimulationCandidate[] = []; + const rulesById = new Map(); + const downgradePath = options.downgradePath ?? defaultDowngradePath; + let currentCost = 0; + + for (let index = 0; index < events.length; index++) { + const event = events[index]!; + currentCost += Number.isFinite(event.cost) && event.cost > 0 ? event.cost : 0; + + if (hasMalformedTokens(event)) { + warnings.push(`Skipped ${event.provider} event at ${event.timestamp}: negative token counts are not routable.`); + continue; + } + if (event.inputTokens === 0 && event.outputTokens === 0) { + warnings.push(`Skipped ${event.provider} event at ${event.timestamp}: no input/output tokens.`); + continue; + } + + const manualRule = manualRuleFor(event, options.rules ?? []); + const targetModel = manualRule?.toModel ?? downgradePath(event.model); + if (!targetModel) { + warnings.push(`No downgrade path for ${event.model}.`); + continue; + } + if (normalizeModel(targetModel) === normalizeModel(event.model)) { + warnings.push(`Skipped ${event.model}: routing target matches source model.`); + continue; + } + + const rule = manualRule ?? builtInRuleFor(event, targetModel); + if (!rule || strategy === 'manual' && !manualRule) { + continue; + } + + const targetPricing = pricing[normalizeModel(targetModel)]; + if (!targetPricing) { + warnings.push(`Missing pricing for routing target ${targetModel}.`); + continue; + } + + const simulatedCost = priceEvent(event, targetPricing); + const eventCost = Number.isFinite(event.cost) && event.cost >= 0 ? event.cost : 0; + const reasons = [rule.label]; + if (event.costSource === 'provider-reported') { + reasons.push('provider reported current cost'); + } + if ((event.cacheReadTokens > 0 || event.cacheWriteTokens > 0) && targetPricing) { + reasons.push('cache-aware target pricing'); + } + + rulesById.set(rule.id, rule); + candidates.push({ + ruleId: rule.id, + eventId: eventId(event, index), + provider: event.provider, + fromModel: event.model, + toModel: targetModel, + currentCost: eventCost, + simulatedCost, + savings: eventCost - simulatedCost, + tokens: event.totalTokens, + confidence: 'medium', + reasons, + }); + } + + const matchedCount = candidates.length; + for (const candidate of candidates) { + const event = events.find((entry, index) => eventId(entry, index) === candidate.eventId); + if (event) { + candidate.confidence = confidenceFor(event, matchedCount, candidate.reasons); + } + } + + const positiveCandidates = candidates.filter((candidate) => (candidate.savings ?? 0) > 0); + const estimatedSavings = positiveCandidates.reduce((sum, candidate) => sum + (candidate.savings ?? 0), 0); + const affectedTokens = positiveCandidates.reduce((sum, candidate) => sum + candidate.tokens, 0); + + if (warnings.some((warning) => warning.includes('Missing pricing') || warning.includes('No downgrade'))) { + warnings.push('Savings are calculated on the priced subset only.'); + } + + return { + method: METHOD, + dateRange, + strategy, + currentCost, + simulatedCost: currentCost - estimatedSavings, + estimatedSavings, + estimatedSavingsPercent: currentCost > 0 ? estimatedSavings / currentCost : 0, + affectedEvents: positiveCandidates.length, + affectedTokens, + candidates: candidates.sort((a, b) => (b.savings ?? -Infinity) - (a.savings ?? -Infinity)), + rules: [...rulesById.values()].sort((a, b) => a.id.localeCompare(b.id)), + warnings: [...new Set(warnings)], + }; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 16aca61..f925ce1 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -47,6 +47,20 @@ export type { WasteRecipe, WasteFinding, WasteReport, + OptimizationConfidence, + OptimizationImpact, + OptimizationEvidence, + RoutingRuleKind, + RoutingRule, + RoutingSimulationCandidate, + RoutingSimulationReport, + WasteSignalKind, + AgentWasteSignal, + AgentWasteReport, + BehaviorDiffDimension, + BehaviorCohortSelector, + BehaviorCohortMetrics, + AgentBehaviorDiffReport, NutritionOutcomeSignal, NutritionRepoSummary, NutritionReport, @@ -95,6 +109,9 @@ export { buildCommonsPromptExport, inspectCommonsExport, buildWasteReport, + buildAgentWasteReport, + buildRoutingSimulationReport, + buildAgentBehaviorDiffReport, buildNutritionReport, collectGitOutcomeSignals, clusterPrompts, @@ -106,6 +123,8 @@ export { } from './aggregation'; export type { + BuildRoutingSimulationOptions, + RoutingModelPricing, PromptCluster, ClusterOptions, Receipt, diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index da325aa..a331dc0 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -520,6 +520,156 @@ export interface NutritionReport { missingOutcomeRepos: string[]; } +export type OptimizationConfidence = 'high' | 'medium' | 'low'; +export type OptimizationImpact = 'cost' | 'tokens' | 'cache' | 'time' | 'quality-risk'; + +export interface OptimizationEvidence { + provider?: string; + model?: string; + projectId?: string | null; + repoRoot?: string | null; + sessionId?: string | null; + date?: string; + eventCount: number; + tokens: number; + cost: number; + reason: string; +} + +export type RoutingRuleKind = + | 'premium-short-output' + | 'quick-lookup' + | 'low-output-ratio' + | 'receipt-category' + | 'session-style' + | 'project-default' + | 'manual-model-map'; + +export interface RoutingRule { + id: string; + label: string; + kind: RoutingRuleKind; + fromModels: string[]; + toModel: string; + provider?: string; + maxOutputTokens?: number; + maxTotalTokens?: number; + maxDurationMs?: number; + receiptCategories?: string[]; + taskStyles?: string[]; +} + +export interface RoutingSimulationCandidate { + ruleId: string; + eventId: string; + provider: string; + fromModel: string; + toModel: string; + currentCost: number; + simulatedCost: number | null; + savings: number | null; + tokens: number; + confidence: OptimizationConfidence; + reasons: string[]; +} + +export interface RoutingSimulationReport { + method: string; + dateRange: DateRange; + strategy: string; + currentCost: number; + simulatedCost: number; + estimatedSavings: number; + estimatedSavingsPercent: number; + affectedEvents: number; + affectedTokens: number; + candidates: RoutingSimulationCandidate[]; + rules: RoutingRule[]; + warnings: string[]; +} + +export type WasteSignalKind = + | 'context-drag' + | 'retry-loop' + | 'model-churn' + | 'premium-for-small-task' + | 'cache-miss-heavy' + | 'cache-write-waste' + | 'burst-with-low-output' + | 'long-session-low-yield' + | 'prompt-repeat' + | 'outcome-missing'; + +export interface AgentWasteSignal { + kind: WasteSignalKind; + title: string; + severity: 'high' | 'medium' | 'low'; + confidence: OptimizationConfidence; + estimatedSavings: number | null; + evidence: OptimizationEvidence; + recipes: WasteRecipe[]; +} + +export interface AgentWasteReport { + method: string; + dateRange: DateRange; + summary: { + totalSignals: number; + highSeverity: number; + estimatedSavings: number | null; + analyzedEvents: number; + analyzedSessions: number; + }; + signals: AgentWasteSignal[]; + warnings: string[]; +} + +export type BehaviorDiffDimension = 'provider' | 'model' | 'project' | 'repo' | 'date-range' | 'session-style'; + +export interface BehaviorCohortSelector { + label: string; + dimension: BehaviorDiffDimension; + provider?: string; + model?: string; + projectId?: string; + repoRoot?: string; + dateRange?: DateRange; + taskStyle?: AttributionTaskStyle; +} + +export interface BehaviorCohortMetrics { + events: number; + sessions: number; + activeDays: number; + tokens: number; + cost: number; + inputPerOutput: number | null; + outputPerDollar: number | null; + cacheHitRate: number; + cacheReuseRatio: number | null; + modelSwitchesPerSession: number; + wasteSignals: number; + highSeverityWasteSignals: number; + estimatedWasteSavings: number | null; + averageSessionDurationMs: number | null; +} + +export interface AgentBehaviorDiffReport { + method: string; + dateRange: DateRange; + baseline: { + selector: BehaviorCohortSelector; + metrics: BehaviorCohortMetrics; + }; + comparison: { + selector: BehaviorCohortSelector; + metrics: BehaviorCohortMetrics; + }; + deltas: Record; + takeaways: string[]; + warnings: string[]; +} + export interface ModelMixShiftEntry { model: string; currentShare: number; @@ -563,6 +713,11 @@ export interface TokenleakOutput { providers: ProviderData[]; aggregated: AggregatedStats; more?: MoreStats | null; + optimization?: { + routingSimulation?: RoutingSimulationReport | null; + agentWaste?: AgentWasteReport | null; + behaviorDiff?: AgentBehaviorDiffReport | null; + }; } export interface RenderOptions { diff --git a/packages/mcp/src/server.test.ts b/packages/mcp/src/server.test.ts index 2aacde9..15364de 100644 --- a/packages/mcp/src/server.test.ts +++ b/packages/mcp/src/server.test.ts @@ -105,19 +105,22 @@ async function createConnectedClient(registry?: ProviderRegistry) { // --------------------------------------------------------------------------- describe('MCP Server', () => { - it('lists all 8 tools', async () => { + it('lists all 11 tools', async () => { const { client } = await createConnectedClient(); const result = await client.listTools(); - expect(result.tools).toHaveLength(8); + expect(result.tools).toHaveLength(11); const names = result.tools.map((t) => t.name).sort(); expect(names).toEqual([ 'compare_periods', + 'get_agent_behavior_diff', + 'get_agent_waste', 'get_cost_breakdown', 'get_daily_usage', 'get_efficiency_advice', 'get_receipt_lines', + 'get_routing_simulation', 'get_streaks_and_habits', 'get_usage_summary', 'list_providers', @@ -288,6 +291,41 @@ describe('MCP Server', () => { expect(typeof parsed.analyzedDays).toBe('number'); }); + it('calls optimization intelligence tools and returns reports', async () => { + const { client } = await createConnectedClient(); + + const routing = await client.callTool({ + name: 'get_routing_simulation', + arguments: { days: 30 }, + }); + expect(routing.isError).toBeUndefined(); + const routingJson = JSON.parse((routing.content as Array<{ text: string }>)[0]!.text); + expect(routingJson.strategy).toBe('conservative'); + expect(Array.isArray(routingJson.candidates)).toBe(true); + + const waste = await client.callTool({ + name: 'get_agent_waste', + arguments: { days: 30 }, + }); + expect(waste.isError).toBeUndefined(); + const wasteJson = JSON.parse((waste.content as Array<{ text: string }>)[0]!.text); + expect(wasteJson.summary).toBeDefined(); + expect(Array.isArray(wasteJson.signals)).toBe(true); + + const diff = await client.callTool({ + name: 'get_agent_behavior_diff', + arguments: { + days: 30, + baseline: { label: 'A', dimension: 'provider', provider: 'test-provider-a' }, + comparison: { label: 'B', dimension: 'provider', provider: 'test-provider-b' }, + }, + }); + expect(diff.isError).toBeUndefined(); + const diffJson = JSON.parse((diff.content as Array<{ text: string }>)[0]!.text); + expect(diffJson.takeaways).toBeDefined(); + expect(diffJson.baseline.selector.label).toBe('A'); + }); + it('handles empty registry gracefully for get_usage_summary', async () => { const emptyRegistry = new ProviderRegistry(); const { client } = await createConnectedClient(emptyRegistry); diff --git a/packages/mcp/src/tools/get-agent-behavior-diff.ts b/packages/mcp/src/tools/get-agent-behavior-diff.ts new file mode 100644 index 0000000..6f16284 --- /dev/null +++ b/packages/mcp/src/tools/get-agent-behavior-diff.ts @@ -0,0 +1,36 @@ +import { buildAgentBehaviorDiffReport, SCHEMA_VERSION } from '@tokenleak/core'; +import type { BehaviorCohortSelector, UsageEvent } from '@tokenleak/core'; +import type { ProviderRegistry } from '@tokenleak/registry'; +import { resolveRange } from '../shared/date-range.js'; +import { loadProviderData } from '../shared/provider-load.js'; + +export async function handleGetAgentBehaviorDiff( + args: { + days?: number; + since?: string; + until?: string; + baseline: BehaviorCohortSelector; + comparison: BehaviorCohortSelector; + }, + registry: ProviderRegistry, +) { + try { + const range = resolveRange(args); + const available = await registry.getAvailable(); + const { data, warnings } = await loadProviderData(available, range); + const events: UsageEvent[] = data.flatMap((provider) => provider.events ?? []); + const report = buildAgentBehaviorDiffReport(events, range, args.baseline, args.comparison); + + return { + content: [ + { + type: 'text' as const, + text: JSON.stringify({ schemaVersion: SCHEMA_VERSION, ...report, providerWarnings: warnings }, null, 2), + }, + ], + }; + } catch (error: unknown) { + const message = error instanceof Error ? error.message : String(error); + return { isError: true, content: [{ type: 'text' as const, text: message }] }; + } +} diff --git a/packages/mcp/src/tools/get-agent-waste.ts b/packages/mcp/src/tools/get-agent-waste.ts new file mode 100644 index 0000000..8939e79 --- /dev/null +++ b/packages/mcp/src/tools/get-agent-waste.ts @@ -0,0 +1,47 @@ +import { buildAgentWasteReport, SCHEMA_VERSION } from '@tokenleak/core'; +import type { UsageEvent } from '@tokenleak/core'; +import type { ProviderRegistry } from '@tokenleak/registry'; +import { resolveRange } from '../shared/date-range.js'; +import { loadProviderData } from '../shared/provider-load.js'; + +export async function handleGetAgentWaste( + args: { days?: number; since?: string; until?: string; provider?: string; severity?: string }, + registry: ProviderRegistry, +) { + try { + const range = resolveRange(args); + const available = await registry.getAvailable(); + const filtered = args.provider ? available.filter((p) => p.name === args.provider) : available; + const { data, warnings } = await loadProviderData(filtered, range); + const events: UsageEvent[] = data.flatMap((provider) => provider.events ?? []); + const report = buildAgentWasteReport(data, events, range); + const severity = args.severity ?? 'all'; + if (!['all', 'high', 'medium', 'low'].includes(severity)) { + throw new Error('severity must be all, high, medium, or low'); + } + + return { + content: [ + { + type: 'text' as const, + text: JSON.stringify( + { + schemaVersion: SCHEMA_VERSION, + ...report, + signals: + severity === 'all' + ? report.signals + : report.signals.filter((signal) => signal.severity === severity), + providerWarnings: warnings, + }, + null, + 2, + ), + }, + ], + }; + } catch (error: unknown) { + const message = error instanceof Error ? error.message : String(error); + return { isError: true, content: [{ type: 'text' as const, text: message }] }; + } +} diff --git a/packages/mcp/src/tools/get-routing-simulation.ts b/packages/mcp/src/tools/get-routing-simulation.ts new file mode 100644 index 0000000..7b43daa --- /dev/null +++ b/packages/mcp/src/tools/get-routing-simulation.ts @@ -0,0 +1,33 @@ +import { buildRoutingSimulationReport, SCHEMA_VERSION } from '@tokenleak/core'; +import type { UsageEvent } from '@tokenleak/core'; +import { MODEL_PRICING, type ProviderRegistry } from '@tokenleak/registry'; +import { resolveRange } from '../shared/date-range.js'; +import { loadProviderData } from '../shared/provider-load.js'; + +export async function handleGetRoutingSimulation( + args: { days?: number; since?: string; until?: string; provider?: string; strategy?: string }, + registry: ProviderRegistry, +) { + try { + const range = resolveRange(args); + const available = await registry.getAvailable(); + const filtered = args.provider ? available.filter((p) => p.name === args.provider) : available; + const { data, warnings } = await loadProviderData(filtered, range); + const events: UsageEvent[] = data.flatMap((provider) => provider.events ?? []); + const report = buildRoutingSimulationReport(events, range, MODEL_PRICING, { + strategy: args.strategy ?? 'conservative', + }); + + return { + content: [ + { + type: 'text' as const, + text: JSON.stringify({ schemaVersion: SCHEMA_VERSION, ...report, providerWarnings: warnings }, null, 2), + }, + ], + }; + } catch (error: unknown) { + const message = error instanceof Error ? error.message : String(error); + return { isError: true, content: [{ type: 'text' as const, text: message }] }; + } +} diff --git a/packages/mcp/src/tools/index.ts b/packages/mcp/src/tools/index.ts index baafa63..112cc71 100644 --- a/packages/mcp/src/tools/index.ts +++ b/packages/mcp/src/tools/index.ts @@ -9,6 +9,20 @@ import { handleGetStreaksAndHabits } from './get-streaks-and-habits.js'; import { handleComparePeriods } from './compare-periods.js'; import { handleGetEfficiencyAdvice } from './get-efficiency-advice.js'; import { handleGetReceiptLines } from './get-receipt-lines.js'; +import { handleGetRoutingSimulation } from './get-routing-simulation.js'; +import { handleGetAgentWaste } from './get-agent-waste.js'; +import { handleGetAgentBehaviorDiff } from './get-agent-behavior-diff.js'; + +const behaviorSelectorSchema = z.object({ + label: z.string(), + dimension: z.enum(['provider', 'model', 'project', 'repo', 'date-range', 'session-style']), + provider: z.string().optional(), + model: z.string().optional(), + projectId: z.string().optional(), + repoRoot: z.string().optional(), + dateRange: z.object({ since: z.string(), until: z.string() }).optional(), + taskStyle: z.enum(['quick-hit', 'iterative', 'deep-work', 'mixed']).optional(), +}); export function registerTools(server: McpServer, registry: ProviderRegistry): void { server.tool( @@ -106,4 +120,43 @@ export function registerTools(server: McpServer, registry: ProviderRegistry): vo }, async (args) => handleGetReceiptLines(args, registry), ); + + server.tool( + 'get_routing_simulation', + 'Simulate model-routing savings by re-pricing historical events under conservative or manual downgrade rules.', + { + days: z.number().optional().describe('Number of days to look back (default: 30)'), + since: z.string().optional().describe('Start date in YYYY-MM-DD format'), + until: z.string().optional().describe('End date in YYYY-MM-DD format (default: today)'), + provider: z.string().optional().describe('Filter to a specific provider by name'), + strategy: z.string().optional().describe('Routing strategy name, default conservative'), + }, + async (args) => handleGetRoutingSimulation(args, registry), + ); + + server.tool( + 'get_agent_waste', + 'Detect agent waste signals such as context drag, repeated prompts, model churn, and cache waste.', + { + days: z.number().optional().describe('Number of days to look back (default: 30)'), + since: z.string().optional().describe('Start date in YYYY-MM-DD format'), + until: z.string().optional().describe('End date in YYYY-MM-DD format (default: today)'), + provider: z.string().optional().describe('Filter to a specific provider by name'), + severity: z.enum(['all', 'high', 'medium', 'low']).optional().describe('Optional severity filter'), + }, + async (args) => handleGetAgentWaste(args, registry), + ); + + server.tool( + 'get_agent_behavior_diff', + 'Compare two cohorts by provider, model, project, repo, date range, or session style.', + { + days: z.number().optional().describe('Number of days to look back (default: 30)'), + since: z.string().optional().describe('Start date in YYYY-MM-DD format'), + until: z.string().optional().describe('End date in YYYY-MM-DD format (default: today)'), + baseline: behaviorSelectorSchema.describe('Baseline cohort selector'), + comparison: behaviorSelectorSchema.describe('Comparison cohort selector'), + }, + async (args) => handleGetAgentBehaviorDiff(args, registry), + ); } diff --git a/packages/renderers/src/index.ts b/packages/renderers/src/index.ts index a61cfe9..9f43c06 100644 --- a/packages/renderers/src/index.ts +++ b/packages/renderers/src/index.ts @@ -26,6 +26,9 @@ export { renderModelView, renderTokenView, renderCwdView, + renderRoutingSimulatorView, + renderAgentWasteView, + renderAgentBehaviorDiffView, TIME_RANGES, METRIC_TABS, } from './terminal/index'; diff --git a/packages/renderers/src/live/template.ts b/packages/renderers/src/live/template.ts index da3c8aa..adfa593 100644 --- a/packages/renderers/src/live/template.ts +++ b/packages/renderers/src/live/template.ts @@ -222,6 +222,29 @@ export function generateHtml(output: TokenleakOutput, options: RenderOptions): s `; }).join(''); + const optimization = output.optimization; + const optimizationHtml = optimization + ? `
+
+
OPTIMIZATION INTELLIGENCE
+
+ Routing savings +
+ ${esc(formatCost(optimization.routingSimulation?.estimatedSavings ?? 0))} +
+
+ Waste signals +
+ ${optimization.agentWaste?.summary.totalSignals ?? 0} +
+
+ ${esc(optimization.behaviorDiff?.baseline.selector.label ?? 'Baseline')} vs ${esc(optimization.behaviorDiff?.comparison.selector.label ?? 'Compare')} +
+ ${optimization.behaviorDiff?.deltas.cost === null || optimization.behaviorDiff?.deltas.cost === undefined ? '-' : esc(formatCost(optimization.behaviorDiff.deltas.cost))} +
+
` + : ''; + const overallLabel = providers.length > 1 ? '
OVERALL
' : ''; @@ -371,6 +394,7 @@ export function generateHtml(output: TokenleakOutput, options: RenderOptions): s
TOP MODELS
${modelsHtml} + ${optimizationHtml} diff --git a/packages/renderers/src/terminal/index.ts b/packages/renderers/src/terminal/index.ts index 0cb1c69..94b9eee 100644 --- a/packages/renderers/src/terminal/index.ts +++ b/packages/renderers/src/terminal/index.ts @@ -26,6 +26,9 @@ export { renderModelView, renderTokenView, renderCwdView, + renderRoutingSimulatorView, + renderAgentWasteView, + renderAgentBehaviorDiffView, TIME_RANGES, METRIC_TABS, } from './tab-views'; diff --git a/packages/renderers/src/terminal/tab-views/agent-behavior-diff-view.ts b/packages/renderers/src/terminal/tab-views/agent-behavior-diff-view.ts new file mode 100644 index 0000000..55dad8a --- /dev/null +++ b/packages/renderers/src/terminal/tab-views/agent-behavior-diff-view.ts @@ -0,0 +1,41 @@ +import type { BehaviorCohortMetrics, TokenleakOutput } from '@tokenleak/core'; +import { bold, dim } from '../colors'; +import { truncateVisible } from '../layout'; + +function fmt(key: keyof BehaviorCohortMetrics, value: number | null): string { + if (value === null) return '-'; + if (key === 'cost' || key === 'estimatedWasteSavings') return `$${value.toFixed(4)}`; + if (key === 'cacheHitRate') return `${(value * 100).toFixed(0)}%`; + if (key === 'tokens') return Intl.NumberFormat('en-US', { notation: 'compact', maximumFractionDigits: 1 }).format(value); + if (key === 'inputPerOutput' || key === 'outputPerDollar' || key === 'modelSwitchesPerSession') return value.toFixed(2); + return Math.round(value).toLocaleString('en-US'); +} + +export function renderAgentBehaviorDiffView(output: TokenleakOutput, width: number, noColor: boolean): string { + const report = output.optimization?.behaviorDiff; + const lines = [bold('Agent Behavior Diff', noColor), '']; + if (!report) { + lines.push(dim('No behavior diff is available for this output.', noColor)); + return lines.join('\n'); + } + lines.push(`${report.baseline.selector.label} vs ${report.comparison.selector.label}`); + lines.push(''); + const metrics: Array<[string, keyof BehaviorCohortMetrics]> = [ + ['Events', 'events'], + ['Sessions', 'sessions'], + ['Tokens', 'tokens'], + ['Cost', 'cost'], + ['Input/Output', 'inputPerOutput'], + ['Output/$', 'outputPerDollar'], + ['Cache hit', 'cacheHitRate'], + ['Waste', 'wasteSignals'], + ]; + for (const [label, key] of metrics) { + lines.push(`${label.padEnd(14)} ${fmt(key, report.baseline.metrics[key]).padStart(10)} ${fmt(key, report.comparison.metrics[key]).padStart(10)} ${fmt(key, report.deltas[key]).padStart(10)}`); + } + lines.push('', bold('Takeaways', noColor)); + for (const takeaway of report.takeaways.slice(0, 4)) { + lines.push(truncateVisible(`- ${takeaway}`, width)); + } + return lines.join('\n'); +} diff --git a/packages/renderers/src/terminal/tab-views/agent-waste-view.ts b/packages/renderers/src/terminal/tab-views/agent-waste-view.ts new file mode 100644 index 0000000..a942603 --- /dev/null +++ b/packages/renderers/src/terminal/tab-views/agent-waste-view.ts @@ -0,0 +1,28 @@ +import type { TokenleakOutput } from '@tokenleak/core'; +import { bold, dim } from '../colors'; +import { truncateVisible } from '../layout'; + +function money(value: number | null): string { + return value === null ? '-' : `$${value.toFixed(4)}`; +} + +export function renderAgentWasteView(output: TokenleakOutput, width: number, noColor: boolean): string { + const report = output.optimization?.agentWaste; + const lines = [bold('Agent Waste', noColor), '']; + if (!report) { + lines.push(dim('No waste report is available for this output.', noColor)); + return lines.join('\n'); + } + lines.push(`Signals ${report.summary.totalSignals} High ${report.summary.highSeverity} Est. savings ${money(report.summary.estimatedSavings)}`); + lines.push(`Analyzed ${report.summary.analyzedEvents} events / ${report.summary.analyzedSessions} sessions`); + lines.push(''); + for (const signal of report.signals.slice(0, 12)) { + lines.push(truncateVisible( + `[${signal.severity.toUpperCase()}] ${signal.kind} ${money(signal.estimatedSavings)} ${signal.title}`, + width, + )); + lines.push(dim(truncateVisible(` ${signal.evidence.reason}`, width), noColor)); + } + if (report.signals.length === 0) lines.push(dim('No deterministic waste signals detected.', noColor)); + return lines.join('\n'); +} diff --git a/packages/renderers/src/terminal/tab-views/index.ts b/packages/renderers/src/terminal/tab-views/index.ts index 35be638..6b33935 100644 --- a/packages/renderers/src/terminal/tab-views/index.ts +++ b/packages/renderers/src/terminal/tab-views/index.ts @@ -10,3 +10,6 @@ export { renderSessionView } from './session-view'; export { renderModelView } from './model-view'; export { renderTokenView } from './token-view'; export { renderCwdView } from './cwd-view'; +export { renderRoutingSimulatorView } from './routing-simulator-view'; +export { renderAgentWasteView } from './agent-waste-view'; +export { renderAgentBehaviorDiffView } from './agent-behavior-diff-view'; diff --git a/packages/renderers/src/terminal/tab-views/routing-simulator-view.ts b/packages/renderers/src/terminal/tab-views/routing-simulator-view.ts new file mode 100644 index 0000000..35218cd --- /dev/null +++ b/packages/renderers/src/terminal/tab-views/routing-simulator-view.ts @@ -0,0 +1,35 @@ +import type { TokenleakOutput } from '@tokenleak/core'; +import { bold, dim } from '../colors'; +import { truncateVisible } from '../layout'; + +function formatCompactNumber(value: number): string { + return Intl.NumberFormat('en-US', { notation: 'compact', maximumFractionDigits: 1 }).format(value); +} + +function formatCurrency(value: number): string { + return `$${value.toFixed(4)}`; +} + +export function renderRoutingSimulatorView(output: TokenleakOutput, width: number, noColor: boolean): string { + const report = output.optimization?.routingSimulation; + const lines = [bold('Routing Simulator', noColor), '']; + if (!report) { + lines.push(dim('No routing simulation is available for this output.', noColor)); + return lines.join('\n'); + } + lines.push(`Current ${formatCurrency(report.currentCost)} -> Simulated ${formatCurrency(report.simulatedCost)} Savings ${formatCurrency(report.estimatedSavings)} (${(report.estimatedSavingsPercent * 100).toFixed(1)}%)`); + lines.push(`Affected ${report.affectedEvents} events / ${formatCompactNumber(report.affectedTokens)} tokens Strategy ${report.strategy}`); + lines.push(''); + const candidates = report.candidates.filter((candidate) => (candidate.savings ?? 0) > 0).slice(0, 12); + if (candidates.length === 0) { + lines.push(dim('No positive routing candidates found.', noColor)); + return lines.join('\n'); + } + for (const candidate of candidates) { + lines.push(truncateVisible( + `${candidate.ruleId.padEnd(22)} ${candidate.fromModel} -> ${candidate.toModel} ${formatCurrency(candidate.savings ?? 0)} [${candidate.confidence}]`, + width, + )); + } + return lines.join('\n'); +} diff --git a/packages/renderers/src/terminal/tab-views/tab-bar.ts b/packages/renderers/src/terminal/tab-views/tab-bar.ts index decc92e..0284c97 100644 --- a/packages/renderers/src/terminal/tab-views/tab-bar.ts +++ b/packages/renderers/src/terminal/tab-views/tab-bar.ts @@ -2,10 +2,10 @@ import { bold, bold256, dim, inverse256, SEMANTIC } from '../colors'; import { truncateVisible } from '../layout'; export type TimeRange = '7d' | '30d' | '90d' | '365d'; -export type MetricTab = 'overview' | 'delta' | 'provider' | 'sess' | 'tok' | 'model' | 'cwd' | 'dow' | 'tod'; +export type MetricTab = 'overview' | 'delta' | 'provider' | 'sess' | 'tok' | 'model' | 'cwd' | 'dow' | 'tod' | 'sim' | 'waste' | 'diff'; export const TIME_RANGES: TimeRange[] = ['7d', '30d', '90d', '365d']; -export const METRIC_TABS: MetricTab[] = ['overview', 'delta', 'provider', 'sess', 'tok', 'model', 'cwd', 'dow', 'tod']; +export const METRIC_TABS: MetricTab[] = ['overview', 'delta', 'provider', 'sess', 'tok', 'model', 'cwd', 'dow', 'tod', 'sim', 'waste', 'diff']; const TAB_LABELS: Record = { overview: 'overview', @@ -17,6 +17,9 @@ const TAB_LABELS: Record = { cwd: 'cwd', dow: 'dow', tod: 'tod', + sim: 'sim', + waste: 'waste', + diff: 'diff', }; const ACTIVE_COLOR = SEMANTIC.ACTIVE; @@ -48,7 +51,7 @@ export function renderTabBar( const hints = [ `${bold256('←/→', HINT_COLOR, noColor)} range`, `${bold256('tab', HINT_COLOR, noColor)} metric`, - `${bold256('1-9', HINT_COLOR, noColor)} jump`, + `${bold256('1-9/0', HINT_COLOR, noColor)} jump`, `${bold256('↑/↓', HINT_COLOR, noColor)} scroll`, `${bold256('q', HINT_COLOR, noColor)} close`, ]; diff --git a/packages/tui/src/index.ts b/packages/tui/src/index.ts index 017674c..226ca73 100644 --- a/packages/tui/src/index.ts +++ b/packages/tui/src/index.ts @@ -30,6 +30,9 @@ import { ensureMoreStats, ensureReplayReport, ensureWasteReport, + ensureAgentWasteReport, + ensureRoutingSimulationReport, + ensureAgentBehaviorDiffReport, ensureNutritionReport, ensureReceipt, deriveReceiptLines, @@ -80,6 +83,9 @@ import { import type { ReplayPlaybackSpeed } from './lib/state.js'; import { createNutritionPanel, NUTRITION_VISIBLE_ROWS } from './panels/nutrition.js'; import { createReceiptsPanel, RECEIPTS_MAX_CONTENT_WIDTH, RECEIPTS_VISIBLE_ROWS } from './panels/receipts.js'; +import { createSimulatorPanel, SIMULATOR_MAX_CONTENT_WIDTH, SIMULATOR_VISIBLE_ROWS } from './panels/simulator.js'; +import { createWastePanel, WASTE_MAX_CONTENT_WIDTH, WASTE_VISIBLE_ROWS } from './panels/waste.js'; +import { createBehaviorPanel } from './panels/behavior.js'; import { buildCursorBanner, createCursorSetupPanel, isEscapeKeySequence } from './panels/cursor-setup.js'; const CURSOR_SETUP_LABEL_INPUT_ID = 'cursor-setup-label-input'; @@ -154,6 +160,12 @@ function getSelectedViewTaskKey(state: AppState, view: ViewMode = state.selected return `nutrition:${base}`; case 'receipts': return `receipts:${base}`; + case 'simulator': + return `simulator:${base}`; + case 'waste': + return `waste:${base}`; + case 'behavior': + return `behavior:${base}`; default: return `${view}:${base}`; } @@ -431,6 +443,47 @@ function buildContent(state: AppState, renderer: CliRenderer) { render(state, renderer); }, ); + case 'simulator': + if (!hasWindowData) { + return createSimulatorPanel(null); + } + if (!state.cachedRoutingSimulationReport) { + const key = getSelectedViewTaskKey(state, 'simulator'); + return deferredPanelForTask(state, renderer, key, 'Routing Simulator', () => { + ensureRoutingSimulationReport(state); + }); + } + return createSimulatorPanel( + state.cachedRoutingSimulationReport, + state.simulatorScrollOffset, + getPanelContentWidth(renderer, SIMULATOR_MAX_CONTENT_WIDTH), + ); + case 'waste': + if (!hasWindowData) { + return createWastePanel(null); + } + if (!state.cachedAgentWasteReport) { + const key = getSelectedViewTaskKey(state, 'waste'); + return deferredPanelForTask(state, renderer, key, 'Waste Signals', () => { + ensureAgentWasteReport(state); + }); + } + return createWastePanel( + state.cachedAgentWasteReport, + state.wasteScrollOffset, + getPanelContentWidth(renderer, WASTE_MAX_CONTENT_WIDTH), + ); + case 'behavior': + if (!hasWindowData) { + return createBehaviorPanel(null); + } + if (!state.cachedBehaviorDiffReport) { + const key = getSelectedViewTaskKey(state, 'behavior'); + return deferredPanelForTask(state, renderer, key, 'Behavior Diff', () => { + ensureAgentBehaviorDiffReport(state); + }); + } + return createBehaviorPanel(state.cachedBehaviorDiffReport); default: return Box({ flexDirection: 'column', width: '100%', flexGrow: 1 }); } @@ -526,6 +579,9 @@ function applyLoadedData( state.receiptsExpandedLineIndex = null; state.receiptsSortMode = 'cost'; state.receiptsCategoryFilter = null; + state.simulatorScrollOffset = 0; + state.wasteScrollOffset = 0; + state.behaviorScrollOffset = 0; state.nutritionSignalsLoading = false; state.nutritionSignalsLoadedKeys.clear(); clearViewTaskState(state); @@ -981,6 +1037,9 @@ function handleViewSwitch(mode: ViewMode): void { currentState.nutritionScrollOffset = 0; currentState.compareScrollOffset = 0; currentState.wrappedScrollOffset = 0; + currentState.simulatorScrollOffset = 0; + currentState.wasteScrollOffset = 0; + currentState.behaviorScrollOffset = 0; resetReplayPanelState(currentState); resetReceiptsInteraction(currentState); currentState.receiptsSortMode = 'cost'; @@ -1041,6 +1100,9 @@ function invalidateWindowCaches(state: AppState): void { state.cachedWasteReport = null; state.cachedNutritionReport = null; state.cachedReceipt = null; + state.cachedRoutingSimulationReport = null; + state.cachedAgentWasteReport = null; + state.cachedBehaviorDiffReport = null; resetReceiptsInteraction(state); state.receiptsSortMode = 'cost'; state.receiptsCategoryFilter = null; @@ -1061,6 +1123,9 @@ function invalidateAllCaches(state: AppState): void { state.cachedWasteReport = null; state.cachedNutritionReport = null; state.cachedReceipt = null; + state.cachedRoutingSimulationReport = null; + state.cachedAgentWasteReport = null; + state.cachedBehaviorDiffReport = null; resetReplayDataState(state); resetReceiptsInteraction(state); state.nutritionSignalsLoading = false; @@ -1099,6 +1164,9 @@ const VIEW_KEYS: Record = { '9': 'replay', '0': 'nutrition', R: 'receipts', + X: 'simulator', + Y: 'waste', + Z: 'behavior', }; const VIEW_ORDER: ViewMode[] = [ @@ -1113,6 +1181,9 @@ const VIEW_ORDER: ViewMode[] = [ 'replay', 'nutrition', 'receipts', + 'simulator', + 'waste', + 'behavior', ]; /** Views that support j/k scrolling and their scroll offset field */ @@ -1124,6 +1195,9 @@ const SCROLLABLE_VIEWS = new Set([ 'replay', 'nutrition', 'receipts', + 'simulator', + 'waste', + 'behavior', ]); function getScrollableItemCount(state: AppState): number { @@ -1146,6 +1220,12 @@ function getScrollableItemCount(state: AppState): number { case 'receipts': { return getReceiptLineCount(state); } + case 'simulator': + return state.cachedRoutingSimulationReport?.candidates.length ?? 0; + case 'waste': + return state.cachedAgentWasteReport?.signals.length ?? 0; + case 'behavior': + return 12; default: return 0; } @@ -1167,6 +1247,11 @@ function getVisibleCount(view: ViewMode): number { return NUTRITION_VISIBLE_ROWS; case 'receipts': return RECEIPTS_VISIBLE_ROWS; + case 'simulator': + case 'waste': + return view === 'simulator' ? SIMULATOR_VISIBLE_ROWS : WASTE_VISIBLE_ROWS; + case 'behavior': + return 8; default: return 10; } @@ -1188,6 +1273,12 @@ function getScrollOffset(state: AppState): number { return state.nutritionScrollOffset; case 'receipts': return state.receiptsScrollOffset; + case 'simulator': + return state.simulatorScrollOffset; + case 'waste': + return state.wasteScrollOffset; + case 'behavior': + return state.behaviorScrollOffset; default: return 0; } @@ -1216,6 +1307,15 @@ function setScrollOffset(state: AppState, value: number): void { case 'receipts': state.receiptsScrollOffset = value; break; + case 'simulator': + state.simulatorScrollOffset = value; + break; + case 'waste': + state.wasteScrollOffset = value; + break; + case 'behavior': + state.behaviorScrollOffset = value; + break; } } diff --git a/packages/tui/src/lib/data.ts b/packages/tui/src/lib/data.ts index 40e1787..f562aab 100644 --- a/packages/tui/src/lib/data.ts +++ b/packages/tui/src/lib/data.ts @@ -1,5 +1,8 @@ import type { AggregatedStats, + AgentBehaviorDiffReport, + AgentWasteReport, + BehaviorCohortSelector, AdvisorReport, CompareOutput, DailyUsage, @@ -13,6 +16,7 @@ import type { Receipt, ReceiptLine, ReplayReport, + RoutingSimulationReport, TokenleakOutput, UsageEvent, WasteReport, @@ -23,11 +27,14 @@ import { dirname, join } from 'node:path'; import { aggregate, analyzeEfficiency, + buildAgentBehaviorDiffReport, + buildAgentWasteReport, buildExplainReport, buildFocusReport, buildMoreStats, buildNutritionReport, buildReceipt, + buildRoutingSimulationReport, buildDailyCostCompleteness, buildReplayReport, buildWasteReport, @@ -464,6 +471,79 @@ export function ensureWasteReport(state: AppState): WasteReport | null { return report; } +export function ensureAgentWasteReport(state: AppState): AgentWasteReport | null { + if (!state.data || state.data.windows.length === 0) return null; + if (state.cachedAgentWasteReport) return state.cachedAgentWasteReport; + + const scoped = getScopedWindowData(state); + if (!scoped) return null; + + const report = buildAgentWasteReport(scoped.scopedProviders, scoped.events, scoped.windowRange); + state.cachedAgentWasteReport = report; + return report; +} + +export function ensureRoutingSimulationReport(state: AppState): RoutingSimulationReport | null { + if (!state.data || state.data.windows.length === 0) return null; + if (state.cachedRoutingSimulationReport) return state.cachedRoutingSimulationReport; + + const scoped = getScopedWindowData(state); + if (!scoped) return null; + + const report = buildRoutingSimulationReport(scoped.events, scoped.windowRange, MODEL_PRICING, { + strategy: 'conservative', + }); + state.cachedRoutingSimulationReport = report; + return report; +} + +function defaultBehaviorSelectors(scoped: ScopedWindowData): [BehaviorCohortSelector, BehaviorCohortSelector] { + const providerTotals = scoped.scopedProviders + .map((provider) => ({ + provider: provider.provider, + label: provider.displayName, + tokens: provider.totalTokens, + })) + .sort((a, b) => b.tokens - a.tokens || a.label.localeCompare(b.label)); + + if (providerTotals.length >= 2) { + return [ + { label: providerTotals[0]!.label, dimension: 'provider', provider: providerTotals[0]!.provider }, + { label: providerTotals[1]!.label, dimension: 'provider', provider: providerTotals[1]!.provider }, + ]; + } + + const models = new Map(); + for (const event of scoped.events) { + models.set(event.model, (models.get(event.model) ?? 0) + event.totalTokens); + } + const modelTotals = [...models.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])); + if (modelTotals.length >= 2) { + return [ + { label: modelTotals[0]![0], dimension: 'model', model: modelTotals[0]![0] }, + { label: modelTotals[1]![0], dimension: 'model', model: modelTotals[1]![0] }, + ]; + } + + return [ + { label: 'Current window', dimension: 'date-range', dateRange: scoped.windowRange }, + { label: 'Current window', dimension: 'date-range', dateRange: scoped.windowRange }, + ]; +} + +export function ensureAgentBehaviorDiffReport(state: AppState): AgentBehaviorDiffReport | null { + if (!state.data || state.data.windows.length === 0) return null; + if (state.cachedBehaviorDiffReport) return state.cachedBehaviorDiffReport; + + const scoped = getScopedWindowData(state); + if (!scoped) return null; + + const [baseline, comparison] = defaultBehaviorSelectors(scoped); + const report = buildAgentBehaviorDiffReport(scoped.events, scoped.windowRange, baseline, comparison); + state.cachedBehaviorDiffReport = report; + return report; +} + /** Lazily compute and cache the FocusReport (window-dependent — filters events by date) */ export function ensureFocusReport(state: AppState): FocusReport | null { if (!state.data) return null; diff --git a/packages/tui/src/lib/state.ts b/packages/tui/src/lib/state.ts index 52d97df..324e7c8 100644 --- a/packages/tui/src/lib/state.ts +++ b/packages/tui/src/lib/state.ts @@ -1,5 +1,7 @@ import type { AdvisorReport, + AgentBehaviorDiffReport, + AgentWasteReport, FocusReport, ExplainReport, CompareOutput, @@ -9,6 +11,7 @@ import type { NutritionReport, Receipt, ReceiptCategory, + RoutingSimulationReport, } from '@tokenleak/core'; import type { CursorSetupStatus } from '@tokenleak/registry'; import type { TuiData } from './data.js'; @@ -24,7 +27,10 @@ export type ViewMode = | 'wrapped' | 'replay' | 'nutrition' - | 'receipts'; + | 'receipts' + | 'simulator' + | 'waste' + | 'behavior'; export type SortMode = 'cost' | 'tokens'; export type ReceiptsSortMode = 'cost' | 'qty' | 'alpha'; export type CursorSetupField = 'label' | 'token'; @@ -94,6 +100,9 @@ export interface AppState { receiptsExpandedLineIndex: number | null; receiptsSortMode: ReceiptsSortMode; receiptsCategoryFilter: ReceiptCategory | null; + simulatorScrollOffset: number; + wasteScrollOffset: number; + behaviorScrollOffset: number; // lazy caches (null = not yet computed, cleared on refresh) cachedAdvisorReport: AdvisorReport | null; @@ -105,6 +114,9 @@ export interface AppState { cachedWasteReport: WasteReport | null; cachedNutritionReport: NutritionReport | null; cachedReceipt: Receipt | null; + cachedRoutingSimulationReport: RoutingSimulationReport | null; + cachedAgentWasteReport: AgentWasteReport | null; + cachedBehaviorDiffReport: AgentBehaviorDiffReport | null; } export const WINDOW_LABELS = ['1D', '7D', '30D', '90D', 'ALL'] as const; @@ -156,6 +168,9 @@ export function createInitialState(): AppState { receiptsExpandedLineIndex: null, receiptsSortMode: 'cost', receiptsCategoryFilter: null, + simulatorScrollOffset: 0, + wasteScrollOffset: 0, + behaviorScrollOffset: 0, cachedAdvisorReport: null, cachedFocusReport: null, cachedExplainReport: null, @@ -165,5 +180,8 @@ export function createInitialState(): AppState { cachedWasteReport: null, cachedNutritionReport: null, cachedReceipt: null, + cachedRoutingSimulationReport: null, + cachedAgentWasteReport: null, + cachedBehaviorDiffReport: null, }; } diff --git a/packages/tui/src/panels/behavior.ts b/packages/tui/src/panels/behavior.ts new file mode 100644 index 0000000..9068e02 --- /dev/null +++ b/packages/tui/src/panels/behavior.ts @@ -0,0 +1,58 @@ +import { Box, Text } from '@opentui/core'; +import type { AgentBehaviorDiffReport, BehaviorCohortMetrics } from '@tokenleak/core'; +import { COLORS, BOLD } from '../lib/theme.js'; +import { formatCost, formatTokens, truncate } from '../lib/format.js'; + +function metricLine(label: string, base: string, compare: string, delta: string) { + return Text({ + content: ` ${label.padEnd(16)} ${base.padStart(14)} ${compare.padStart(14)} ${delta.padStart(12)}`, + fg: COLORS.white, + }); +} + +function fmt(label: keyof BehaviorCohortMetrics, value: number | null): string { + if (value === null) return '-'; + if (label === 'cost' || label === 'estimatedWasteSavings') return formatCost(value); + if (label === 'tokens') return formatTokens(value); + if (label === 'cacheHitRate') return `${(value * 100).toFixed(0)}%`; + if (label === 'inputPerOutput' || label === 'outputPerDollar' || label === 'modelSwitchesPerSession') return value.toFixed(2); + return Math.round(value).toLocaleString('en-US'); +} + +export function createBehaviorPanel(report: AgentBehaviorDiffReport | null) { + if (!report) { + return Box( + { flexDirection: 'column', width: '100%', flexGrow: 1, borderStyle: 'single', borderColor: COLORS.dimWhite, paddingLeft: 1 }, + Text({ content: ' Behavior Diff ', fg: COLORS.amber, attributes: BOLD }), + Text({ content: 'Need at least one cohort to compare behavior', fg: COLORS.dimWhite }), + ); + } + + const metrics: Array<[string, keyof BehaviorCohortMetrics]> = [ + ['Events', 'events'], + ['Sessions', 'sessions'], + ['Tokens', 'tokens'], + ['Cost', 'cost'], + ['Input/Output', 'inputPerOutput'], + ['Output/$', 'outputPerDollar'], + ['Cache Hit', 'cacheHitRate'], + ['Waste Signals', 'wasteSignals'], + ]; + + return Box( + { flexDirection: 'column', width: '100%', flexGrow: 1, borderStyle: 'single', borderColor: COLORS.dimWhite, paddingLeft: 1, paddingRight: 1 }, + Text({ content: ' Behavior Diff ', fg: COLORS.amber, attributes: BOLD }), + Text({ content: ` ${report.baseline.selector.label} vs ${report.comparison.selector.label}`, fg: COLORS.white, attributes: BOLD }), + Text({ content: '', fg: COLORS.dimWhite }), + Text({ content: ' Metric Baseline Compare Delta', fg: COLORS.dimWhite }), + ...metrics.map(([label, key]) => + metricLine(label, fmt(key, report.baseline.metrics[key]), fmt(key, report.comparison.metrics[key]), fmt(key, report.deltas[key])), + ), + Text({ content: '', fg: COLORS.dimWhite }), + Text({ content: ' Takeaways ', fg: COLORS.amber, attributes: BOLD }), + ...report.takeaways.slice(0, 4).map((takeaway) => Text({ content: ` - ${truncate(takeaway, 72)}`, fg: COLORS.cyan })), + ...(report.warnings.length > 0 + ? [Text({ content: '', fg: COLORS.dimWhite }), Text({ content: ` Warnings: ${truncate(report.warnings.join(' | '), 74)}`, fg: COLORS.dimWhite })] + : []), + ); +} diff --git a/packages/tui/src/panels/header.ts b/packages/tui/src/panels/header.ts index 1aad919..2b58f7d 100644 --- a/packages/tui/src/panels/header.ts +++ b/packages/tui/src/panels/header.ts @@ -28,6 +28,9 @@ const VIEWS: { key: string; label: string; mode: ViewMode }[] = [ { key: '9', label: 'Replay', mode: 'replay' }, { key: '0', label: 'AI ROI', mode: 'nutrition' }, { key: 'R', label: 'Receipts', mode: 'receipts' }, + { key: 'X', label: 'Sim', mode: 'simulator' }, + { key: 'Y', label: 'Waste', mode: 'waste' }, + { key: 'Z', label: 'Diff', mode: 'behavior' }, ]; export function buildHeader( diff --git a/packages/tui/src/panels/help.ts b/packages/tui/src/panels/help.ts index 640ff69..5f6bd14 100644 --- a/packages/tui/src/panels/help.ts +++ b/packages/tui/src/panels/help.ts @@ -79,6 +79,9 @@ export function createHelpPanel() { ['9', 'Replay'], ['0', 'AI ROI'], ['R', 'Receipts'], + ['X', 'Routing Simulator'], + ['Y', 'Waste Signals'], + ['Z', 'Behavior Diff'], ['?', 'Help'], ]), ...helpSection('EXPLAIN VIEW', [ diff --git a/packages/tui/src/panels/optimization.test.ts b/packages/tui/src/panels/optimization.test.ts new file mode 100644 index 0000000..1433d85 --- /dev/null +++ b/packages/tui/src/panels/optimization.test.ts @@ -0,0 +1,200 @@ +import { describe, expect, test } from 'bun:test'; +import type { + AgentWasteReport, + AgentWasteSignal, + RoutingSimulationCandidate, + RoutingSimulationReport, +} from '@tokenleak/core'; +import { createWastePanel } from './waste.js'; +import { createSimulatorPanel } from './simulator.js'; + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null; +} + +function collectTextContent(node: unknown): string[] { + if (!isRecord(node)) return []; + const props = node['props']; + const ownContent = + isRecord(props) && typeof props['content'] === 'string' ? [props['content']] : []; + const children = Array.isArray(node['children']) + ? node['children'].flatMap((child) => collectTextContent(child)) + : []; + return [...ownContent, ...children]; +} + +function wasteSignal( + title: string, + reason: string, + recipeTitle: string, + cost: number, +): AgentWasteSignal { + return { + kind: title === 'Repeated prompt cluster' ? 'prompt-repeat' : 'context-drag', + title, + severity: 'high', + confidence: 'high', + estimatedSavings: cost / 2, + evidence: { + eventCount: 12, + tokens: 24_200_000, + cost, + reason, + sessionId: 'session-1', + }, + recipes: [{ title: recipeTitle, detail: 'Try a more focused next step.' }], + }; +} + +function wasteReport(): AgentWasteReport { + const signals = [ + wasteSignal( + 'Repeated prompt cluster', + '191 similar prompts clustered around "# Optimization Intelligence Plan PLEASE IMPLEMENT THIS PLAN with many extra words".', + 'Break the retry loop', + 8, + ), + wasteSignal( + 'High context drag', + 'Input tokens are 14.5x output tokens in this session.', + 'Start a compact follow-up session', + 6, + ), + wasteSignal( + 'Repeated prompt cluster', + '181 similar prompts clustered around "# Thread handoff and merge the pending branch".', + 'Break the retry loop', + 4, + ), + ]; + return { + method: 'test', + dateRange: { since: '2026-05-10', until: '2026-05-16' }, + summary: { + totalSignals: signals.length, + highSeverity: signals.length, + estimatedSavings: 9, + analyzedEvents: 200, + analyzedSessions: 8, + }, + signals, + warnings: [], + }; +} + +function scrollableWasteReport(): AgentWasteReport { + const report = wasteReport(); + const extra = Array.from({ length: 7 }, (_, index) => + wasteSignal( + index % 2 === 0 ? 'High context drag' : 'Repeated prompt cluster', + index % 2 === 0 + ? `Input tokens are ${10 + index}.0x output tokens in this session.` + : `${120 + index} similar prompts clustered around "# Extra prompt ${index}".`, + index % 2 === 0 ? 'Start a compact follow-up session' : 'Break the retry loop', + 1 + index, + ), + ); + const signals = [...report.signals, ...extra]; + return { + ...report, + signals, + summary: { + ...report.summary, + totalSignals: signals.length, + highSeverity: signals.length, + }, + }; +} + +function candidate(index: number, savings: number): RoutingSimulationCandidate { + return { + ruleId: 'premium-short-output', + eventId: `event-${index}`, + provider: 'codex', + fromModel: 'gpt-5.5', + toModel: 'gpt-5-mini', + currentCost: 1, + simulatedCost: 1 - savings, + savings, + tokens: 12_000, + confidence: 'medium', + reasons: ['Premium model with short output', 'cache-aware target pricing'], + }; +} + +function routingReport(): RoutingSimulationReport { + const candidates = [candidate(1, 0.75), candidate(2, 0.5)]; + return { + method: 'test', + dateRange: { since: '2026-05-10', until: '2026-05-16' }, + strategy: 'conservative', + currentCost: 61.38, + simulatedCost: 6.5, + estimatedSavings: 54.88, + estimatedSavingsPercent: 0.894, + affectedEvents: 656, + affectedTokens: 73_600_000, + candidates, + rules: [], + warnings: ['No downgrade path for gpt-5.'], + }; +} + +function scrollableRoutingReport(): RoutingSimulationReport { + const candidates = Array.from({ length: 10 }, (_, index) => candidate(index + 1, 0.75 - index / 100)); + return { + ...routingReport(), + candidates, + }; +} + +describe('optimization TUI panels', () => { + test('waste panel uses stable labels and hides raw prompt fragments', () => { + const lines = collectTextContent(createWastePanel(wasteReport(), 0, 56)); + const text = lines.join('\n'); + + expect(text).toContain('Repeated similar asks'); + expect(text).toContain('Too much context'); + expect(text).toContain('Change approach'); + expect(text).toContain('Start fresh'); + expect(text).not.toContain('# Opt'); + expect(text).not.toContain('# Thr'); + expect(lines.every((line) => line.length <= 56)).toBe(true); + }); + + test('waste panel honors scroll offset', () => { + const report = scrollableWasteReport(); + const firstPage = collectTextContent(createWastePanel(report, 0, 70)).join('\n'); + const secondPage = collectTextContent(createWastePanel(report, 1, 70)).join('\n'); + + expect(firstPage).toContain('191 similar asks'); + expect(secondPage).not.toContain('191 similar asks'); + expect(secondPage).toContain('Too much context'); + }); + + test('simulator panel explains routing in action words', () => { + const lines = collectTextContent(createSimulatorPanel(routingReport(), 0, 72)); + const text = lines.join('\n'); + + expect(text).toContain('Actual spend'); + expect(text).toContain('Estimated with routing'); + expect(text).toContain('Could reroute'); + expect(text).toContain('Use gpt-5-mini instead of gpt-5.5 for small answers'); + expect(text).toContain('Save about $0.75 on this event'); + expect(text).toContain('confidence: medium'); + expect(text).not.toContain('Premium model with short output'); + expect(text).not.toContain('cache-aware target pricing'); + expect(text).not.toContain('[medium]'); + expect(lines.every((line) => line.length <= 72)).toBe(true); + }); + + test('simulator panel honors scroll offset', () => { + const report = scrollableRoutingReport(); + const firstPage = collectTextContent(createSimulatorPanel(report, 0, 72)).join('\n'); + const secondPage = collectTextContent(createSimulatorPanel(report, 1, 72)).join('\n'); + + expect(firstPage).toContain('Save about $0.75'); + expect(secondPage).not.toContain('Save about $0.75'); + expect(secondPage).toContain('Save about $0.74'); + }); +}); diff --git a/packages/tui/src/panels/simulator.ts b/packages/tui/src/panels/simulator.ts new file mode 100644 index 0000000..8b25154 --- /dev/null +++ b/packages/tui/src/panels/simulator.ts @@ -0,0 +1,99 @@ +import { Box, Text } from '@opentui/core'; +import type { RoutingSimulationCandidate, RoutingSimulationReport } from '@tokenleak/core'; +import { COLORS, BOLD } from '../lib/theme.js'; +import { formatCost, formatTokens, truncate, wrapText } from '../lib/format.js'; + +export const SIMULATOR_VISIBLE_ROWS = 8; +export const SIMULATOR_MAX_CONTENT_WIDTH = 78; + +function clampOffset(offset: number, itemCount: number): number { + return Math.max(0, Math.min(offset, Math.max(0, itemCount - SIMULATOR_VISIBLE_ROWS))); +} + +function candidateAction(candidate: RoutingSimulationCandidate): string { + if (candidate.ruleId === 'premium-short-output') { + return `Use ${candidate.toModel} instead of ${candidate.fromModel} for small answers`; + } + if (candidate.ruleId === 'quick-lookup') { + return `Use ${candidate.toModel} instead of ${candidate.fromModel} for quick lookups`; + } + if (candidate.ruleId === 'low-output-ratio') { + return `Use ${candidate.toModel} instead of ${candidate.fromModel} for low-output turns`; + } + return `Use ${candidate.toModel} instead of ${candidate.fromModel}`; +} + +function candidateDetail(candidate: RoutingSimulationCandidate): string { + const details: string[] = []; + if (candidate.reasons.some((reason) => reason.toLowerCase().includes('cache'))) { + details.push('includes cache pricing'); + } + return details.join(' · '); +} + +export function createSimulatorPanel( + report: RoutingSimulationReport | null, + scrollOffset: number = 0, + contentWidth: number = SIMULATOR_MAX_CONTENT_WIDTH, +) { + const width = Math.max(36, contentWidth); + if (!report) { + return Box( + { flexDirection: 'column', width: '100%', flexGrow: 1, borderStyle: 'single', borderColor: COLORS.dimWhite, paddingLeft: 1 }, + Text({ content: ' Routing Simulator ', fg: COLORS.amber, attributes: BOLD }), + Text({ content: 'No event data available for routing simulation', fg: COLORS.dimWhite }), + ); + } + + const positiveCandidates = report.candidates.filter((candidate) => (candidate.savings ?? 0) > 0); + const offset = clampOffset(scrollOffset, positiveCandidates.length); + const candidates = positiveCandidates.slice(offset, offset + SIMULATOR_VISIBLE_ROWS); + const below = positiveCandidates.length - offset - candidates.length; + return Box( + { flexDirection: 'column', width: '100%', flexGrow: 1, borderStyle: 'single', borderColor: COLORS.dimWhite, paddingLeft: 1, paddingRight: 1 }, + Text({ content: ' Routing Simulator ', fg: COLORS.amber, attributes: BOLD }), + Text({ + content: truncate(` Actual spend ${formatCost(report.currentCost)} -> Estimated with routing ${formatCost(report.simulatedCost)}`, width), + fg: COLORS.green, + attributes: BOLD, + }), + Text({ + content: truncate(` Savings ${formatCost(report.estimatedSavings)} (${(report.estimatedSavingsPercent * 100).toFixed(1)}%)`, width), + fg: COLORS.green, + attributes: BOLD, + }), + Text({ content: truncate(` Could reroute ${report.affectedEvents} events / ${formatTokens(report.affectedTokens)} tokens | Strategy ${report.strategy}`, width), fg: COLORS.dimWhite }), + Text({ content: '', fg: COLORS.dimWhite }), + Text({ content: ' Top Candidates ', fg: COLORS.amber, attributes: BOLD }), + ...(offset > 0 ? [Text({ content: ` ${offset} more above`, fg: COLORS.dimWhite })] : []), + ...(candidates.length > 0 + ? candidates.flatMap((candidate) => { + const detail = candidateDetail(candidate); + return [ + Text({ + content: truncate(` ${candidateAction(candidate)}`, width), + fg: COLORS.white, + }), + Text({ + content: truncate(` Save about ${formatCost(candidate.savings ?? 0)} on this event · confidence: ${candidate.confidence}`, width), + fg: COLORS.dimWhite, + }), + ...(detail + ? wrapText(detail, Math.max(16, width - 4), 1).map((line) => + Text({ content: truncate(` ${line}`, width), fg: COLORS.dimWhite }), + ) + : []), + ]; + }) + : [Text({ content: ' No positive routing candidates found', fg: COLORS.dimWhite })]), + ...(below > 0 ? [Text({ content: ` ${below} more below`, fg: COLORS.dimWhite })] : []), + ...(report.warnings.length > 0 + ? [ + Text({ content: '', fg: COLORS.dimWhite }), + ...wrapText(`Warnings: ${report.warnings.slice(0, 2).join(' | ')}`, width, 2).map((line) => + Text({ content: truncate(` ${line}`, width), fg: COLORS.dimWhite }), + ), + ] + : []), + ); +} diff --git a/packages/tui/src/panels/status-bar.ts b/packages/tui/src/panels/status-bar.ts index d7eb896..924da13 100644 --- a/packages/tui/src/panels/status-bar.ts +++ b/packages/tui/src/panels/status-bar.ts @@ -127,6 +127,9 @@ export function buildStatusBar(state: AppState) { state.selectedView === 'compare' || state.selectedView === 'wrapped' || state.selectedView === 'nutrition' + || state.selectedView === 'simulator' + || state.selectedView === 'waste' + || state.selectedView === 'behavior' ) { keys = `${nav} j/k:scroll r:refresh${cursorHint} ${helpHint} q:quit`; } else if (state.selectedView === 'export') { diff --git a/packages/tui/src/panels/waste.ts b/packages/tui/src/panels/waste.ts new file mode 100644 index 0000000..6709493 --- /dev/null +++ b/packages/tui/src/panels/waste.ts @@ -0,0 +1,94 @@ +import { Box, Text } from '@opentui/core'; +import type { AgentWasteReport, AgentWasteSignal } from '@tokenleak/core'; +import { COLORS, BOLD } from '../lib/theme.js'; +import { formatCost, formatTokens, truncate, wrapText } from '../lib/format.js'; + +export const WASTE_VISIBLE_ROWS = 8; +export const WASTE_MAX_CONTENT_WIDTH = 78; + +function severityColor(severity: 'high' | 'medium' | 'low'): string { + if (severity === 'high') return COLORS.red; + if (severity === 'medium') return COLORS.amber; + return COLORS.dimWhite; +} + +function shortSignalTitle(signal: AgentWasteSignal): string { + if (signal.title === 'Repeated prompt cluster') return 'Repeated similar asks'; + if (signal.title === 'High context drag') return 'Too much context'; + return signal.title; +} + +function shortRecipeTitle(title: string | undefined): string { + if (title === 'Break the retry loop') return 'Change approach'; + if (title === 'Start a compact follow-up session') return 'Start fresh'; + return title ?? 'Review signal'; +} + +function readableReason(signal: AgentWasteSignal): string { + const repeatMatch = signal.evidence.reason.match(/^(\d+)\s+similar prompts/i); + if (signal.kind === 'prompt-repeat' && repeatMatch) { + return `${repeatMatch[1]} similar asks repeated in this window.`; + } + return signal.evidence.reason; +} + +function clampOffset(offset: number, itemCount: number): number { + return Math.max(0, Math.min(offset, Math.max(0, itemCount - WASTE_VISIBLE_ROWS))); +} + +export function createWastePanel( + report: AgentWasteReport | null, + scrollOffset: number = 0, + contentWidth: number = WASTE_MAX_CONTENT_WIDTH, +) { + const width = Math.max(36, contentWidth); + if (!report) { + return Box( + { flexDirection: 'column', width: '100%', flexGrow: 1, borderStyle: 'single', borderColor: COLORS.dimWhite, paddingLeft: 1 }, + Text({ content: ' Waste Signals ', fg: COLORS.amber, attributes: BOLD }), + Text({ content: 'No event data available for waste detection', fg: COLORS.dimWhite }), + ); + } + + const offset = clampOffset(scrollOffset, report.signals.length); + const signals = report.signals.slice(offset, offset + WASTE_VISIBLE_ROWS); + const below = report.signals.length - offset - signals.length; + return Box( + { flexDirection: 'column', width: '100%', flexGrow: 1, borderStyle: 'single', borderColor: COLORS.dimWhite, paddingLeft: 1, paddingRight: 1 }, + Text({ content: ' Waste Signals ', fg: COLORS.amber, attributes: BOLD }), + Text({ + content: truncate(` Signals ${report.summary.totalSignals} High ${report.summary.highSeverity} Est. savings ${report.summary.estimatedSavings === null ? '-' : formatCost(report.summary.estimatedSavings)}`, width), + fg: COLORS.white, + attributes: BOLD, + }), + Text({ content: truncate(` Analyzed ${report.summary.analyzedEvents} events / ${report.summary.analyzedSessions} sessions`, width), fg: COLORS.dimWhite }), + Text({ content: '', fg: COLORS.dimWhite }), + ...(offset > 0 ? [Text({ content: ` ${offset} more above`, fg: COLORS.dimWhite })] : []), + ...(signals.length > 0 + ? signals.flatMap((signal) => { + const reasonLines = wrapText(readableReason(signal), Math.max(16, width - 4), 1); + return [ + Text({ + content: truncate(` [${signal.severity.toUpperCase()}] ${shortSignalTitle(signal)} ${signal.estimatedSavings === null ? '-' : formatCost(signal.estimatedSavings)} confidence: ${signal.confidence}`, width), + fg: severityColor(signal.severity), + attributes: BOLD, + }), + ...reasonLines.map((line) => Text({ content: truncate(` ${line}`, width), fg: COLORS.dimWhite })), + Text({ + content: truncate(` ${formatTokens(signal.evidence.tokens)} tok ${formatCost(signal.evidence.cost)} ${shortRecipeTitle(signal.recipes[0]?.title)}`, width), + fg: COLORS.cyan, + }), + ]; + }) + : [Text({ content: ' No deterministic waste signals detected', fg: COLORS.dimWhite })]), + ...(below > 0 ? [Text({ content: ` ${below} more below`, fg: COLORS.dimWhite })] : []), + ...(report.warnings.length > 0 + ? [ + Text({ content: '', fg: COLORS.dimWhite }), + ...wrapText(`Warnings: ${report.warnings.join(' | ')}`, width, 2).map((line) => + Text({ content: truncate(` ${line}`, width), fg: COLORS.dimWhite }), + ), + ] + : []), + ); +}