diff --git a/src/providers/cursor-agent.ts b/src/providers/cursor-agent.ts index 4cdc7d26..fbc0bfcc 100644 --- a/src/providers/cursor-agent.ts +++ b/src/providers/cursor-agent.ts @@ -139,12 +139,83 @@ function costModel(model: string): string { return model === 'cursor-agent-auto' ? CURSOR_AGENT_COST_MODEL : model } +function transcriptStem(transcriptPath: string): string { + const name = basename(transcriptPath) + if (name.endsWith('.jsonl')) return name.slice(0, -'.jsonl'.length) + if (name.endsWith('.txt')) return name.slice(0, -'.txt'.length) + return name +} + function toConversationId(transcriptPath: string): string { - const filename = basename(transcriptPath, '.txt') + const filename = transcriptStem(transcriptPath) if (filename.length === 36 && UUID_LIKE.test(filename)) return filename return createHash('sha1').update(transcriptPath).digest('hex').slice(0, 16) } +async function appendTranscriptSources( + scanDir: string, + projectId: string, + sources: SessionSource[], +): Promise { + const transcriptEntries = await readdir(scanDir, { withFileTypes: true }) + for (const transcript of transcriptEntries) { + // Legacy format: .txt files directly in the scan dir + if (transcript.isFile() && transcript.name.endsWith('.txt')) { + sources.push({ + path: join(scanDir, transcript.name), + project: projectId, + provider: 'cursor-agent', + }) + continue + } + + // Composer 2 format: UUID subdirectories with .jsonl files + if (transcript.isDirectory() && UUID_LIKE.test(transcript.name)) { + const subdir = join(scanDir, transcript.name) + const subEntries = await readdir(subdir, { withFileTypes: true }).catch(() => []) + const transcriptFilesByStem = new Map() + + for (const sub of subEntries) { + if (sub.isFile() && (sub.name.endsWith('.jsonl') || sub.name.endsWith('.txt'))) { + const stem = transcriptStem(sub.name) + const existing = transcriptFilesByStem.get(stem) ?? {} + if (sub.name.endsWith('.jsonl')) { + transcriptFilesByStem.set(stem, { ...existing, jsonl: sub.name }) + } else { + transcriptFilesByStem.set(stem, { ...existing, txt: sub.name }) + } + continue + } + + // Subagent transcripts inside a subagents/ directory + if (sub.isDirectory() && sub.name === 'subagents') { + const subagentEntries = await readdir(join(subdir, sub.name), { withFileTypes: true }).catch(() => []) + for (const sa of subagentEntries) { + if (!sa.isFile()) continue + if (!sa.name.endsWith('.jsonl') && !sa.name.endsWith('.txt')) continue + sources.push({ + path: join(subdir, sub.name, sa.name), + project: projectId, + provider: 'cursor-agent', + }) + } + } + } + + for (const files of transcriptFilesByStem.values()) { + const selectedName = files.jsonl ?? files.txt + if (selectedName) { + sources.push({ + path: join(subdir, selectedName), + project: projectId, + provider: 'cursor-agent', + }) + } + } + } + } +} + function extractUserQuery(userBlock: string): string { const chunks: string[] = [] let cursor = 0 @@ -241,7 +312,7 @@ function parseTranscript(raw: string): { turns: ParsedTurn[]; recognized: boolea let output = '' let reasoning = '' - const toolsByTurn: Record = Object.create(null) + const toolsByTurn = new Map() for (const line of assistantLines) { if (TOOL_RESULT_MARKER.test(line)) continue @@ -257,7 +328,7 @@ function parseTranscript(raw: string): { turns: ParsedTurn[]; recognized: boolea if (toolMatch) { const parsedTool = parseToolName(toolMatch[1] ?? '') const toolKey = `cursor:${parsedTool}` - toolsByTurn[toolKey] = true + toolsByTurn.set(toolKey, true) continue } @@ -266,7 +337,7 @@ function parseTranscript(raw: string): { turns: ParsedTurn[]; recognized: boolea if (pendingUsers.length > 0) { const userMessage = pendingUsers.shift()! - const tools = Object.keys(toolsByTurn) + const tools = Array.from(toolsByTurn.keys()) turns.push({ userMessage, assistant: { @@ -319,13 +390,13 @@ function createParser( source: SessionSource, seenKeys: Set, dbPath: string, - summariesByConversationId: Record, + summariesByConversationId: Map, ): SessionParser { return { async *parse(): AsyncGenerator { const conversationId = toConversationId(source.path) - let summary = summariesByConversationId[conversationId] + let summary = summariesByConversationId.get(conversationId) let db: SqliteDatabase | null = null try { @@ -348,7 +419,7 @@ function createParser( title: row.title, updatedAt: normalizeTimestamp(row.updatedAt), } - summariesByConversationId[conversationId] = summary + summariesByConversationId.set(conversationId, summary) } } catch { summary = undefined @@ -426,7 +497,7 @@ export function createCursorAgentProvider(baseDirOverride?: string): Provider { const baseDir = getCursorAgentBaseDir(baseDirOverride) const projectsDir = getProjectsDir(baseDir) const dbPath = getAttributionDbPath(baseDir) - const summariesByConversationId: Record = Object.create(null) + const summariesByConversationId = new Map() return { name: 'cursor-agent', @@ -452,50 +523,15 @@ export function createCursorAgentProvider(baseDirOverride?: string): Provider { if (!entry.isDirectory()) continue const projectId = prettifyProjectId(entry.name) - const transcriptDir = join(projectsDir, entry.name, 'agent-transcripts') - if (!existsSync(transcriptDir)) continue - - const transcriptEntries = await readdir(transcriptDir, { withFileTypes: true }) - for (const transcript of transcriptEntries) { - // Legacy format: .txt files directly in agent-transcripts/ - if (transcript.isFile() && transcript.name.endsWith('.txt')) { - const transcriptPath = join(transcriptDir, transcript.name) - sources.push({ - path: transcriptPath, - project: projectId, - provider: 'cursor-agent', - }) - continue - } - - // Composer 2 format: UUID subdirectories with .jsonl files - if (transcript.isDirectory() && UUID_LIKE.test(transcript.name)) { - const subdir = join(transcriptDir, transcript.name) - const subEntries = await readdir(subdir, { withFileTypes: true }).catch(() => []) - for (const sub of subEntries) { - if (sub.isFile() && (sub.name.endsWith('.jsonl') || sub.name.endsWith('.txt'))) { - sources.push({ - path: join(subdir, sub.name), - project: projectId, - provider: 'cursor-agent', - }) - } - // Subagent transcripts inside a subagents/ directory - if (sub.isDirectory() && sub.name === 'subagents') { - const subagentEntries = await readdir(join(subdir, sub.name), { withFileTypes: true }).catch(() => []) - for (const sa of subagentEntries) { - if (!sa.isFile()) continue - if (!sa.name.endsWith('.jsonl') && !sa.name.endsWith('.txt')) continue - sources.push({ - path: join(subdir, sub.name, sa.name), - project: projectId, - provider: 'cursor-agent', - }) - } - } - } - } + const projectDir = join(projectsDir, entry.name) + if (entry.name === 'agent-transcripts') { + await appendTranscriptSources(projectDir, projectId, sources) + continue } + + const transcriptDir = join(projectDir, 'agent-transcripts') + if (!existsSync(transcriptDir)) continue + await appendTranscriptSources(transcriptDir, projectId, sources) } return sources diff --git a/src/session-cache.ts b/src/session-cache.ts index 46b243c0..89dfe417 100644 --- a/src/session-cache.ts +++ b/src/session-cache.ts @@ -109,6 +109,7 @@ export const DURABLE_PROVIDER_NAMES: ReadonlySet = new Set(['copilot']) const PROVIDER_PARSE_VERSIONS: Record = { claude: 'cowork-space-grouping-v1', cline: 'worktree-project-grouping-v1', + 'cursor-agent': 'workspaceless-transcript-v1', copilot: 'otel-durable-v1', 'ibm-bob': 'worktree-project-grouping-v1', 'kilo-code': 'worktree-project-grouping-v1', diff --git a/tests/fixtures/cursor-agent/workspace-less/projects/agent-transcripts/1031d227-0c67-4e17-8954-0b6e2b3322f0/1031d227-0c67-4e17-8954-0b6e2b3322f0.jsonl b/tests/fixtures/cursor-agent/workspace-less/projects/agent-transcripts/1031d227-0c67-4e17-8954-0b6e2b3322f0/1031d227-0c67-4e17-8954-0b6e2b3322f0.jsonl new file mode 100644 index 00000000..d5a29e90 --- /dev/null +++ b/tests/fixtures/cursor-agent/workspace-less/projects/agent-transcripts/1031d227-0c67-4e17-8954-0b6e2b3322f0/1031d227-0c67-4e17-8954-0b6e2b3322f0.jsonl @@ -0,0 +1,2 @@ +{"role":"user","message":{"content":[{"type":"text","text":"\nRun a quick smoke test\n"}]}} +{"role":"assistant","message":{"content":[{"type":"text","text":"Smoke test passed."}]}} diff --git a/tests/providers/cursor-agent.test.ts b/tests/providers/cursor-agent.test.ts index 77559548..78b72828 100644 --- a/tests/providers/cursor-agent.test.ts +++ b/tests/providers/cursor-agent.test.ts @@ -1,5 +1,5 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' -import { mkdtemp, mkdir, rm, writeFile } from 'fs/promises' +import { mkdtemp, mkdir, readFile, rm, writeFile } from 'fs/promises' import { existsSync } from 'fs' import { join } from 'path' import { tmpdir } from 'os' @@ -125,6 +125,40 @@ describe('cursor-agent provider', () => { expect(sources.every((s) => s.provider === 'cursor-agent')).toBe(true) }) + it('does not scan a workspace root when agent-transcripts is missing', async () => { + const baseDir = await makeBaseDir() + const workspaceRoot = join(baseDir, 'projects', 'workspace-without-transcripts') + await mkdir(workspaceRoot, { recursive: true }) + await writeFile( + join(workspaceRoot, 'extension-state.txt'), + 'user:\nnot a transcript\nA:\nnot a cursor-agent answer\n', + ) + + const provider = createCursorAgentProvider(baseDir) + const sources = await provider.discoverSessions() + + expect(sources).toEqual([]) + }) + + it('prefers jsonl over same-session txt inside UUID transcript dirs', async () => { + const baseDir = await makeBaseDir() + const sessionDir = join(baseDir, 'projects', 'proj-with-duplicates', 'agent-transcripts', FIXED_UUID) + const jsonlPath = join(sessionDir, `${FIXED_UUID}.jsonl`) + const txtPath = join(sessionDir, `${FIXED_UUID}.txt`) + await mkdir(sessionDir, { recursive: true }) + await writeFile( + jsonlPath, + '{"role":"user","message":{"content":[{"type":"text","text":"jsonl wins"}]}}\n{"role":"assistant","message":{"content":[{"type":"text","text":"jsonl answer"}]}}\n', + ) + await writeFile(txtPath, 'user:\ntxt duplicate\nA:\ntxt answer\n') + + const provider = createCursorAgentProvider(baseDir) + const sources = await provider.discoverSessions() + + expect(sources).toHaveLength(1) + expect(sources[0]!.path).toBe(jsonlPath) + }) + it('parses one user/assistant pair with estimated token counts', async () => { const baseDir = await makeBaseDir() const transcriptDir = join(baseDir, 'projects', 'my-proj', 'agent-transcripts') @@ -212,6 +246,36 @@ describe('cursor-agent provider', () => { stderrSpy.mockRestore() }) + it('discovers jsonl transcripts stored directly under project dir (workspace-less layout)', async () => { + const baseDir = await makeBaseDir() + const fixtureRoot = join(import.meta.dirname, '../fixtures/cursor-agent/workspace-less') + const sessionDir = join(baseDir, 'projects', 'agent-transcripts', '1031d227-0c67-4e17-8954-0b6e2b3322f0') + await mkdir(sessionDir, { recursive: true }) + await writeFile( + join(sessionDir, '1031d227-0c67-4e17-8954-0b6e2b3322f0.jsonl'), + await readFile( + join( + fixtureRoot, + 'projects/agent-transcripts/1031d227-0c67-4e17-8954-0b6e2b3322f0/1031d227-0c67-4e17-8954-0b6e2b3322f0.jsonl', + ), + 'utf-8', + ), + ) + + const provider = createCursorAgentProvider(baseDir) + const sources = await provider.discoverSessions() + + expect(sources).toHaveLength(1) + expect(sources[0]!.project).toBe('transcripts') + expect(sources[0]!.path.endsWith('.jsonl')).toBe(true) + + const calls = await collectCalls(provider, sources[0]!) + expect(calls).toHaveLength(1) + expect(calls[0]!.sessionId).toBe('1031d227-0c67-4e17-8954-0b6e2b3322f0') + expect(calls[0]!.userMessage).toBe('Run a quick smoke test') + expect(calls[0]!.costUSD).toBeGreaterThan(0) + }) + it('falls back to stable sha1 conversation id for non-uuid filenames', async () => { const baseDir = await makeBaseDir() const transcriptDir = join(baseDir, 'projects', 'sha-proj', 'agent-transcripts')