From 7241d13e577fd8b85513767abf4bb16f4d6d34b8 Mon Sep 17 00:00:00 2001 From: BookTranslator User Date: Tue, 7 Apr 2026 22:29:55 +0200 Subject: [PATCH 1/5] Harden archive paths against malformed message metadata Add shared message validation (getValidMessageInfo) and filter out malformed messages across all archive, resume, describe, transform, search-index, and capture code paths. Prevents crashes when message.info is missing required fields (id, sessionID, role, time.created). Logs warnings for dropped messages instead of throwing. Fixes #4 --- src/store.ts | 187 ++++++++++++++++++++++++++++++--- tests/store-transform.test.mjs | 158 ++++++++++++++++++++++++++++ 2 files changed, 329 insertions(+), 16 deletions(-) diff --git a/src/store.ts b/src/store.ts index 7b33f44..5f789e9 100644 --- a/src/store.ts +++ b/src/store.ts @@ -338,6 +338,66 @@ function extractTimestamp(event: unknown): number { return Date.now(); } +type MessageValidationContext = { + operation: string; + sessionID?: string; + eventType?: string; +}; + +function logMalformedMessage( + message: string, + context: MessageValidationContext, + extra?: Record, +): void { + getLogger().warn(message, { + operation: context.operation, + sessionID: context.sessionID, + eventType: context.eventType, + ...extra, + }); +} + +function getValidMessageInfo(info: unknown): Message | undefined { + const record = asRecord(info); + if (!record) return undefined; + + const time = asRecord(record.time); + if ( + typeof record.id !== 'string' || + typeof record.sessionID !== 'string' || + typeof record.role !== 'string' || + typeof time?.created !== 'number' || + !Number.isFinite(time.created) + ) { + return undefined; + } + + return info as Message; +} + +function filterValidConversationMessages( + messages: ConversationMessage[], + context?: MessageValidationContext, +): ConversationMessage[] { + const valid = messages.filter((message) => Boolean(getValidMessageInfo(message?.info))); + const dropped = messages.length - valid.length; + if (dropped > 0 && context) { + logMalformedMessage('Skipping malformed conversation messages', context, { dropped }); + } + return valid; +} + +function isValidMessagePartUpdate(event: Event): boolean { + if (event.type !== 'message.part.updated') return false; + const part = asRecord(event.properties.part); + if (!part) return false; + return ( + typeof part.id === 'string' && + typeof part.messageID === 'string' && + typeof part.sessionID === 'string' + ); +} + function normalizeEvent(event: unknown): CapturedEvent | null { const record = asRecord(event); if (!record || typeof record.type !== 'string') return null; @@ -357,7 +417,13 @@ function getDeferredPartUpdateKey(event: Event): string | undefined { } function compareMessages(a: ConversationMessage, b: ConversationMessage): number { - return a.info.time.created - b.info.time.created; + const aInfo = getValidMessageInfo(a.info); + const bInfo = getValidMessageInfo(b.info); + if (!aInfo && !bInfo) return 0; + if (!aInfo) return 1; + if (!bInfo) return -1; + + return aInfo.time.created - bInfo.time.created || aInfo.id.localeCompare(bInfo.id); } function emptySession(sessionID: string): NormalizedSession { @@ -513,7 +579,9 @@ function listFiles(message: ConversationMessage): string[] { function makeSessionTitle(session: NormalizedSession): string | undefined { if (session.title) return session.title; - const firstUser = session.messages.find((message) => message.info.role === 'user'); + const firstUser = session.messages.find( + (message) => getValidMessageInfo(message.info)?.role === 'user', + ); if (!firstUser) return undefined; return truncate(guessMessageText(firstUser, []), 80); @@ -1198,6 +1266,8 @@ export class SqliteLcmStore { const normalized = normalizeEvent(event); if (!normalized) return; + if (this.shouldSkipMalformedCapturedEvent(normalized)) return; + const shouldRecord = this.shouldRecordEvent(normalized.type); const shouldPersistSession = Boolean(normalized.sessionID) && this.shouldPersistSessionForEvent(normalized.type); @@ -1832,11 +1902,12 @@ export class SqliteLcmStore { session: NormalizedSession, preserveExistingResume = false, ): SummaryNodeData[] { + const sanitizedSession = this.sanitizeSessionMessages(session, 'syncDerivedSessionStateSync'); const roots = this.ensureSummaryGraphSync( - session.sessionID, - this.getArchivedMessages(session.messages), + sanitizedSession.sessionID, + this.getArchivedMessages(sanitizedSession.messages), ); - this.writeResumeSync(session, roots, preserveExistingResume); + this.writeResumeSync(sanitizedSession, roots, preserveExistingResume); return roots; } @@ -2989,9 +3060,12 @@ export class SqliteLcmStore { async transformMessages(messages: ConversationMessage[]): Promise { return this.withStoreActivity(async () => { - if (messages.length < this.options.minMessagesForTransform) return false; + const validMessages = filterValidConversationMessages(messages, { + operation: 'transformMessages', + }); + if (validMessages.length < this.options.minMessagesForTransform) return false; - const window = resolveArchiveTransformWindow(messages, this.options.freshTailMessages); + const window = resolveArchiveTransformWindow(validMessages, this.options.freshTailMessages); if (!window) return false; await this.prepareForRead(); @@ -3055,6 +3129,44 @@ export class SqliteLcmStore { ].join(' '); } + private sanitizeSessionMessages( + session: NormalizedSession, + operation: string, + ): NormalizedSession { + const messages = filterValidConversationMessages(session.messages, { + operation, + sessionID: session.sessionID, + }); + return messages.length === session.messages.length ? session : { ...session, messages }; + } + + private shouldSkipMalformedCapturedEvent(event: CapturedEvent): boolean { + const payload = event.payload as Event; + + switch (payload.type) { + case 'message.updated': { + if (getValidMessageInfo(payload.properties.info)) return false; + logMalformedMessage('Skipping malformed message.updated event', { + operation: 'capture', + sessionID: event.sessionID, + eventType: payload.type, + }); + return true; + } + case 'message.part.updated': { + if (isValidMessagePartUpdate(payload)) return false; + logMalformedMessage('Skipping malformed message.part.updated event', { + operation: 'capture', + sessionID: event.sessionID, + eventType: payload.type, + }); + return true; + } + default: + return false; + } + } + private async buildAutomaticRetrievalContext( sessionID: string, recent: ConversationMessage[], @@ -4184,10 +4296,22 @@ export class SqliteLcmStore { } private replaceMessageSearchRowsSync(session: NormalizedSession): void { - replaceMessageSearchRowsModule(this.searchDeps(), redactStructuredValue(session, this.privacy)); + const sanitizedSession = this.sanitizeSessionMessages(session, 'replaceMessageSearchRowsSync'); + replaceMessageSearchRowsModule( + this.searchDeps(), + redactStructuredValue(sanitizedSession, this.privacy), + ); } private replaceMessageSearchRowSync(sessionID: string, message: ConversationMessage): void { + if (!getValidMessageInfo(message.info)) { + logMalformedMessage('Skipping malformed message search row', { + operation: 'replaceMessageSearchRowSync', + sessionID, + }); + return; + } + replaceMessageSearchRowModule( this.searchDeps(), sessionID, @@ -4651,7 +4775,10 @@ export class SqliteLcmStore { // Build NormalizedSession results return sessionIDs.map((sessionID) => { const row = sessionMap.get(sessionID); - const messages = messagesBySession.get(sessionID) ?? []; + const messages = filterValidConversationMessages(messagesBySession.get(sessionID) ?? [], { + operation: 'readSessionsBatchSync', + sessionID, + }); if (!row) { return { ...emptySession(sessionID), messages }; } @@ -4693,10 +4820,13 @@ export class SqliteLcmStore { partsByMessage.set(partRow.message_id, parts); } - const messages = messageRows.map((messageRow) => ({ - info: parseJson(messageRow.info_json), - parts: partsByMessage.get(messageRow.message_id) ?? [], - })); + const messages = filterValidConversationMessages( + messageRows.map((messageRow) => ({ + info: parseJson(messageRow.info_json), + parts: partsByMessage.get(messageRow.message_id) ?? [], + })), + { operation: 'readSessionSync', sessionID }, + ); if (!row) { return { ...emptySession(sessionID), messages }; @@ -4789,8 +4919,21 @@ export class SqliteLcmStore { ) .all(sessionID, messageID) as PartRow[]; + const info = parseJson(row.info_json); + if (!getValidMessageInfo(info)) { + logMalformedMessage( + 'Skipping malformed stored message', + { + operation: 'readMessageSync', + sessionID, + }, + { messageID }, + ); + return undefined; + } + return { - info: parseJson(row.info_json), + info, parts: parts.map((partRow) => { const part = parseJson(partRow.part_json); hydratePartFromArtifacts(part, artifactsByPart.get(part.id) ?? []); @@ -4952,7 +5095,16 @@ export class SqliteLcmStore { } private upsertMessageInfoSync(sessionID: string, message: ConversationMessage): void { - const info = redactStructuredValue(message.info, this.privacy); + const validated = getValidMessageInfo(message.info); + if (!validated) { + logMalformedMessage('Skipping malformed message metadata', { + operation: 'upsertMessageInfoSync', + sessionID, + }); + return; + } + + const info = redactStructuredValue(validated, this.privacy); this.getDb() .prepare( `INSERT INTO messages (message_id, session_id, created_at, info_json) @@ -5004,7 +5156,10 @@ export class SqliteLcmStore { } private async externalizeSession(session: NormalizedSession): Promise { - return externalizeSessionModule(this.artifactDeps(), session); + return externalizeSessionModule( + this.artifactDeps(), + this.sanitizeSessionMessages(session, 'externalizeSession'), + ); } private writeEvent(event: CapturedEvent): void { diff --git a/tests/store-transform.test.mjs b/tests/store-transform.test.mjs index 1c28e35..f361aaa 100644 --- a/tests/store-transform.test.mjs +++ b/tests/store-transform.test.mjs @@ -1602,3 +1602,161 @@ test('session updates refuse parent cycles and keep lineage stable', async () => await cleanupWorkspace(workspace); } }); + +test('transformMessages skips malformed messages without required info fields', async () => { + const workspace = makeWorkspace('lcm-transform-malformed-info'); + let store; + + try { + store = new SqliteLcmStore( + workspace, + makeOptions({ freshTailMessages: 1, minMessagesForTransform: 4 }), + ); + await store.init(); + await createSession(store, workspace, 's1', 0); + + const malformed = { + info: { id: 'broken-message' }, + parts: [textPart('s1', 'broken-message', 'broken-p', 'broken metadata stays visible')], + }; + const messages = [ + conversationMessage({ + sessionID: 's1', + messageID: 'm1', + created: 1, + parts: [textPart('s1', 'm1', 'm1-p', 'archived one')], + }), + conversationMessage({ + sessionID: 's1', + messageID: 'm2', + created: 2, + parts: [textPart('s1', 'm2', 'm2-p', 'archived two')], + }), + malformed, + conversationMessage({ + sessionID: 's1', + messageID: 'm3', + created: 3, + parts: [textPart('s1', 'm3', 'm3-p', 'archived three')], + }), + conversationMessage({ + sessionID: 's1', + messageID: 'm4', + created: 4, + parts: [textPart('s1', 'm4', 'm4-p', 'fresh tail')], + }), + ]; + + const changed = await store.transformMessages(messages); + const summaryPart = messages[4].parts.find( + (part) => part.type === 'text' && part.metadata?.opencodeLcm === 'archive-summary', + ); + + assert.equal(changed, true); + assert.ok(summaryPart); + assert.match(messages[0].parts[0].text, /^\[Archived by opencode-lcm:/); + assert.equal(malformed.parts[0].text, 'broken metadata stays visible'); + } finally { + store?.close(); + await cleanupWorkspace(workspace); + } +}); + +test('resume and describe skip stored messages with malformed metadata', async () => { + const workspace = makeWorkspace('lcm-resume-malformed-info'); + const dbPath = path.join(workspace, '.lcm', 'lcm.db'); + let store; + let db; + + try { + store = new SqliteLcmStore( + workspace, + makeOptions({ freshTailMessages: 1, minMessagesForTransform: 4 }), + ); + await store.init(); + + await createSession(store, workspace, 's1', 1); + for (const [messageID, created, text] of [ + ['m1', 2, 'resume archived one'], + ['m2', 3, 'resume archived two'], + ['m3', 4, 'resume archived three'], + ['m4', 5, 'resume fresh tail'], + ]) { + await captureMessage(store, { + sessionID: 's1', + messageID, + created, + parts: [textPart('s1', messageID, `${messageID}-p`, text)], + }); + } + + db = new DatabaseSync(dbPath, { + enableForeignKeyConstraints: true, + timeout: 5000, + }); + db.prepare('UPDATE messages SET info_json = ? WHERE session_id = ? AND message_id = ?').run( + JSON.stringify({ id: 'm2' }), + 's1', + 'm2', + ); + db.close(); + db = undefined; + + const resumed = await store.resume('s1'); + const described = await store.describe({ sessionID: 's1' }); + + assert.match(resumed, /resume archived one/); + assert.doesNotMatch(resumed, /resume archived two/); + assert.match(described, /Messages: 3/); + } finally { + db?.close(); + store?.close(); + await cleanupWorkspace(workspace); + } +}); + +test('capture ignores malformed message and part update events', async () => { + const workspace = makeWorkspace('lcm-capture-malformed-events'); + let store; + + try { + store = new SqliteLcmStore(workspace, makeOptions()); + await store.init(); + + await createSession(store, workspace, 's1', 1); + + await assert.doesNotReject( + store.capture({ + type: 'message.updated', + properties: { + sessionID: 's1', + info: { id: 'broken' }, + }, + }), + ); + await assert.doesNotReject( + store.capture({ + type: 'message.part.updated', + properties: { + sessionID: 's1', + part: { id: 'broken-part' }, + }, + }), + ); + + await captureMessage(store, { + sessionID: 's1', + messageID: 'm1', + created: 2, + parts: [textPart('s1', 'm1', 'm1-p', 'good message survives')], + }); + + const described = await store.describe({ sessionID: 's1' }); + + assert.match(described, /Messages: 1/); + assert.match(described, /good message survives/); + } finally { + store?.close(); + await cleanupWorkspace(workspace); + } +}); From 431774dd2204da18b4fcf0446100193c901df2f1 Mon Sep 17 00:00:00 2001 From: BookTranslator User Date: Tue, 7 Apr 2026 22:47:21 +0200 Subject: [PATCH 2/5] Remove malformed messages from transform output array transformMessages() now splices malformed entries out of the caller's messages array so they never reach the opencode backend, preventing the Bad Request that followed the earlier TypeError fix. --- src/store.ts | 7 +++++-- tests/store-transform.test.mjs | 8 ++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/store.ts b/src/store.ts index 5f789e9..1dde676 100644 --- a/src/store.ts +++ b/src/store.ts @@ -3063,9 +3063,12 @@ export class SqliteLcmStore { const validMessages = filterValidConversationMessages(messages, { operation: 'transformMessages', }); - if (validMessages.length < this.options.minMessagesForTransform) return false; + if (validMessages.length !== messages.length) { + messages.splice(0, messages.length, ...validMessages); + } + if (messages.length < this.options.minMessagesForTransform) return false; - const window = resolveArchiveTransformWindow(validMessages, this.options.freshTailMessages); + const window = resolveArchiveTransformWindow(messages, this.options.freshTailMessages); if (!window) return false; await this.prepareForRead(); diff --git a/tests/store-transform.test.mjs b/tests/store-transform.test.mjs index f361aaa..2c579b6 100644 --- a/tests/store-transform.test.mjs +++ b/tests/store-transform.test.mjs @@ -1648,14 +1648,18 @@ test('transformMessages skips malformed messages without required info fields', ]; const changed = await store.transformMessages(messages); - const summaryPart = messages[4].parts.find( + const summaryPart = messages[3].parts.find( (part) => part.type === 'text' && part.metadata?.opencodeLcm === 'archive-summary', ); assert.equal(changed, true); + assert.equal(messages.length, 4); + assert.equal( + messages.some((message) => message.info?.id === 'broken-message'), + false, + ); assert.ok(summaryPart); assert.match(messages[0].parts[0].text, /^\[Archived by opencode-lcm:/); - assert.equal(malformed.parts[0].text, 'broken metadata stays visible'); } finally { store?.close(); await cleanupWorkspace(workspace); From 7b757595c0116b4d11ba7966585c6975d07073ec Mon Sep 17 00:00:00 2001 From: BookTranslator User Date: Tue, 7 Apr 2026 23:14:17 +0200 Subject: [PATCH 3/5] Release 0.13.1 --- CHANGELOG.md | 6 ++++++ package-lock.json | 4 ++-- package.json | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd3413c..b6b80d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.13.1] - 2026-04-07 + +### Fixed +- Archive transform now removes malformed messages from the outbound message array before returning control to OpenCode, preventing follow-on backend `Bad Request` failures +- Archive, resume, describe, search indexing, and capture paths now skip malformed `message.info` metadata defensively instead of throwing when required fields are missing + ### Added - Opt-in `perf:archive` harness for large-archive regression coverage across transform, grep, snapshot, reopen, resume, and retention paths - Separate advisory `Archive Performance` workflow for scheduled/manual perf runs with JSON artifact upload diff --git a/package-lock.json b/package-lock.json index 64d6963..9d07476 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "opencode-lcm", - "version": "0.12.0", + "version": "0.13.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "opencode-lcm", - "version": "0.12.0", + "version": "0.13.1", "devDependencies": { "@biomejs/biome": "^2.4.10", "@opencode-ai/plugin": "^1.3.3", diff --git a/package.json b/package.json index 42998ff..f9706fe 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "opencode-lcm", - "version": "0.13.0", + "version": "0.13.1", "description": "Long-memory plugin for OpenCode with context-mode interop", "type": "module", "main": "./dist/index.js", From 4680645025f95d823e0ba9231675cc9ba4cdd918 Mon Sep 17 00:00:00 2001 From: BookTranslator User Date: Wed, 8 Apr 2026 01:05:01 +0200 Subject: [PATCH 4/5] Release 0.13.2 --- CHANGELOG.md | 5 +++++ package.json | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6b80d2..b2e1bb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.13.2] - 2026-04-08 + +### Fixed +- Republish of 0.13.1 malformed-message hardening through CI with provenance + ## [0.13.1] - 2026-04-07 ### Fixed diff --git a/package.json b/package.json index f9706fe..32628ca 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "opencode-lcm", - "version": "0.13.1", + "version": "0.13.2", "description": "Long-memory plugin for OpenCode with context-mode interop", "type": "module", "main": "./dist/index.js", From 11aad29db6a8cc934c11b8a68694ecb32cfdebd8 Mon Sep 17 00:00:00 2001 From: Milofax <2537423+Milofax@users.noreply.github.com> Date: Sun, 12 Apr 2026 08:58:10 +0200 Subject: [PATCH 5/5] fix(store): skip corrupted JSON rows instead of crashing the plugin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A single corrupted `parts.part_json`, `messages.info_json`, `summary_nodes.message_ids_json`, `summary_state.root_node_ids_json`, or `artifacts.metadata_json` row in the SQLite archive currently kills the entire grep / describe / transform / summary pipeline via an uncaught `Failed to parse JSON` error from `parseJson`. Add a non-throwing `parseJsonSafe()` to utils.ts and replace every stored-row `parseJson` call site with it: Store read paths (14 call sites): - readSessionsBatchSync: part_json + info_json + artifact metadata_json - readSessionSync: part_json + info_json - readMessageSync: info_json + part_json - diagnoseSummarySession: root_node_ids_json - ensureSummaryGraphSync: root_node_ids_json - readSummaryNodeSync: message_ids_json - materializeArtifactRow: metadata_json Legacy migration paths (2 call sites): - migrateLegacyArtifacts: per-file try/catch so one bad sessions/*.json no longer kills the remaining files, and corrupted resume.json is gracefully skipped Each skipped row is logged with operation/sessionID/messageID/partID context and a 120-char preview of the offending blob, so the damage is auditable without crashing the plugin. The three in-memory `parseJson(JSON.stringify(...))` round-trips in store-artifacts.ts and the already-try/catch-guarded events.jsonl line parser stay as throwing `parseJson` — they cannot fail from disk corruption. Regression test: - Capture a session, corrupt one part_json row in SQLite, clear all FTS indexes to force the scan path, call grep and assert the surviving messages are returned instead of throwing. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/store-artifacts.ts | 3 +- src/store.ts | 173 ++++++++++++++++++++++++++------- src/utils.ts | 29 ++++++ tests/store-transform.test.mjs | 70 +++++++++++++ 4 files changed, 238 insertions(+), 37 deletions(-) diff --git a/src/store-artifacts.ts b/src/store-artifacts.ts index 2a4faea..f2f4a1c 100644 --- a/src/store-artifacts.ts +++ b/src/store-artifacts.ts @@ -23,6 +23,7 @@ import { inferFileExtension, inferUrlScheme, parseJson, + parseJsonSafe, sanitizeAutomaticRetrievalSourceText, truncate, } from './utils.js'; @@ -661,6 +662,6 @@ export function materializeArtifactRow( contentHash: row.content_hash ?? hashContent(contentText), charCount: blob?.char_count ?? row.char_count, createdAt: row.created_at, - metadata: parseJson>(row.metadata_json || '{}'), + metadata: parseJsonSafe>(row.metadata_json || '{}') ?? {}, }; } diff --git a/src/store.ts b/src/store.ts index 1dde676..d058db2 100644 --- a/src/store.ts +++ b/src/store.ts @@ -82,6 +82,7 @@ import { hashContent, isAutomaticRetrievalNoise, parseJson, + parseJsonSafe, sanitizeAutomaticRetrievalSourceText, shortNodeID, shouldSuppressLowSignalAutomaticRetrievalAnchor, @@ -1697,7 +1698,15 @@ export class SqliteLcmStore { const latestMessageCreated = archived.at(-1)?.info.time.created ?? 0; const archivedSignature = this.buildArchivedSignature(archived); - const rootIDs = state ? parseJson(state.root_node_ids_json) : []; + const rootIDs = state + ? (parseJsonSafe(state.root_node_ids_json, (error) => { + getLogger().warn('Corrupted root_node_ids_json in summary_state', { + operation: 'diagnoseSummarySession', + sessionID: session.sessionID, + error: error.message, + }); + }) ?? []) + : []; const roots = rootIDs .map((nodeID) => this.readSummaryNodeSync(nodeID)) .filter((node): node is SummaryNodeData => Boolean(node)); @@ -3664,7 +3673,14 @@ export class SqliteLcmStore { state.latest_message_created === latestMessageCreated && state.archived_signature === archivedSignature ) { - const rootIDs = parseJson(state.root_node_ids_json); + const rootIDs = + parseJsonSafe(state.root_node_ids_json, (error) => { + getLogger().warn('Corrupted root_node_ids_json in summary_state', { + operation: 'ensureSummaryGraphSync', + sessionID, + error: error.message, + }); + }) ?? []; const roots = rootIDs .map((nodeID) => this.readSummaryNodeSync(nodeID)) .filter((node): node is SummaryNodeData => Boolean(node)); @@ -3915,7 +3931,15 @@ export class SqliteLcmStore { nodeKind: row.node_kind === 'leaf' ? 'leaf' : 'internal', startIndex: row.start_index, endIndex: row.end_index, - messageIDs: parseJson(row.message_ids_json), + messageIDs: + parseJsonSafe(row.message_ids_json, (error) => { + getLogger().warn('Corrupted message_ids_json in summary_nodes', { + operation: 'readSummaryNodeSync', + nodeID: row.node_id, + sessionID: row.session_id, + error: error.message, + }); + }) ?? [], summaryText: row.summary_text, createdAt: row.created_at, }; @@ -4739,7 +4763,7 @@ export class SqliteLcmStore { contentHash: contentHash ?? hashContent(contentText), charCount: blob?.char_count ?? row.char_count, createdAt: row.created_at, - metadata: parseJson>(row.metadata_json || '{}'), + metadata: parseJsonSafe>(row.metadata_json || '{}') ?? {}, }; const list = artifactsByPart.get(artifact.partID) ?? []; list.push(artifact); @@ -4755,7 +4779,17 @@ export class SqliteLcmStore { partsByMessage = new Map(); partsBySessionMessage.set(partRow.session_id, partsByMessage); } - const part = parseJson(partRow.part_json); + const part = parseJsonSafe(partRow.part_json, (error, preview) => { + getLogger().warn('Skipping corrupted part row', { + operation: 'readSessionsBatchSync', + sessionID: partRow.session_id, + messageID: partRow.message_id, + partID: partRow.part_id, + error: error.message, + preview, + }); + }); + if (!part) continue; const artifacts = artifactsByPart.get(part.id) ?? []; hydratePartFromArtifacts(part, artifacts); const parts = partsByMessage.get(partRow.message_id) ?? []; @@ -4767,9 +4801,19 @@ export class SqliteLcmStore { const messagesBySession = new Map>(); for (const messageRow of messageRows) { const sessionParts = partsBySessionMessage.get(messageRow.session_id); + const info = parseJsonSafe(messageRow.info_json, (error, preview) => { + getLogger().warn('Skipping corrupted message row', { + operation: 'readSessionsBatchSync', + sessionID: messageRow.session_id, + messageID: messageRow.message_id, + error: error.message, + preview, + }); + }); + if (!info) continue; const messages = messagesBySession.get(messageRow.session_id) ?? []; messages.push({ - info: parseJson(messageRow.info_json), + info, parts: sessionParts?.get(messageRow.message_id) ?? [], }); messagesBySession.set(messageRow.session_id, messages); @@ -4816,7 +4860,17 @@ export class SqliteLcmStore { const partsByMessage = new Map(); for (const partRow of partRows) { const parts = partsByMessage.get(partRow.message_id) ?? []; - const part = parseJson(partRow.part_json); + const part = parseJsonSafe(partRow.part_json, (error, preview) => { + getLogger().warn('Skipping corrupted part row', { + operation: 'readSessionSync', + sessionID: partRow.session_id, + messageID: partRow.message_id, + partID: partRow.part_id, + error: error.message, + preview, + }); + }); + if (!part) continue; const artifacts = artifactsByPart.get(part.id) ?? []; hydratePartFromArtifacts(part, artifacts); parts.push(part); @@ -4824,10 +4878,24 @@ export class SqliteLcmStore { } const messages = filterValidConversationMessages( - messageRows.map((messageRow) => ({ - info: parseJson(messageRow.info_json), - parts: partsByMessage.get(messageRow.message_id) ?? [], - })), + messageRows + .map((messageRow) => { + const info = parseJsonSafe(messageRow.info_json, (error, preview) => { + getLogger().warn('Skipping corrupted message row', { + operation: 'readSessionSync', + sessionID, + messageID: messageRow.message_id, + error: error.message, + preview, + }); + }); + if (!info) return undefined; + return { + info, + parts: partsByMessage.get(messageRow.message_id) ?? [], + }; + }) + .filter((entry): entry is { info: Message; parts: Part[] } => entry !== undefined), { operation: 'readSessionSync', sessionID }, ); @@ -4922,25 +4990,45 @@ export class SqliteLcmStore { ) .all(sessionID, messageID) as PartRow[]; - const info = parseJson(row.info_json); - if (!getValidMessageInfo(info)) { - logMalformedMessage( - 'Skipping malformed stored message', - { - operation: 'readMessageSync', - sessionID, - }, - { messageID }, - ); + const info = parseJsonSafe(row.info_json, (error, preview) => { + getLogger().warn('Skipping corrupted message row', { + operation: 'readMessageSync', + sessionID, + messageID, + error: error.message, + preview, + }); + }); + if (!info || !getValidMessageInfo(info)) { + if (info) { + logMalformedMessage( + 'Skipping malformed stored message', + { + operation: 'readMessageSync', + sessionID, + }, + { messageID }, + ); + } return undefined; } return { info, - parts: parts.map((partRow) => { - const part = parseJson(partRow.part_json); + parts: parts.flatMap((partRow) => { + const part = parseJsonSafe(partRow.part_json, (error, preview) => { + getLogger().warn('Skipping corrupted part row', { + operation: 'readMessageSync', + sessionID, + messageID, + partID: partRow.part_id, + error: error.message, + preview, + }); + }); + if (!part) return []; hydratePartFromArtifacts(part, artifactsByPart.get(part.id) ?? []); - return part; + return [part]; }), }; } @@ -5206,9 +5294,18 @@ export class SqliteLcmStore { try { const entries = await readdir(sessionsDir); for (const entry of entries.filter((item) => item.endsWith('.json'))) { - const content = await readFile(path.join(sessionsDir, entry), 'utf8'); - const session = parseJson(content); - await this.persistSession(session); + try { + const content = await readFile(path.join(sessionsDir, entry), 'utf8'); + const session = parseJsonSafe(content, (error) => { + getLogger().debug('Corrupted legacy session file skipped', { + entry, + error: error.message, + }); + }); + if (session) await this.persistSession(session); + } catch (error) { + getLogger().debug('Legacy session file migration failed', { entry, error }); + } } } catch (error) { if (!hasErrorCode(error, 'ENOENT')) { @@ -5219,15 +5316,19 @@ export class SqliteLcmStore { const resumePath = path.join(this.baseDir, 'resume.json'); try { const content = await readFile(resumePath, 'utf8'); - const resumes = parseJson(content); - const insertResume = db.prepare( - `INSERT INTO resumes (session_id, note, updated_at) - VALUES (?, ?, ?) - ON CONFLICT(session_id) DO UPDATE SET note = excluded.note, updated_at = excluded.updated_at`, - ); - const now = Date.now(); - for (const [sessionID, note] of Object.entries(resumes)) { - insertResume.run(sessionID, note, now); + const resumes = parseJsonSafe(content, (error) => { + getLogger().debug('Corrupted legacy resume.json skipped', { error: error.message }); + }); + if (resumes) { + const insertResume = db.prepare( + `INSERT INTO resumes (session_id, note, updated_at) + VALUES (?, ?, ?) + ON CONFLICT(session_id) DO UPDATE SET note = excluded.note, updated_at = excluded.updated_at`, + ); + const now = Date.now(); + for (const [sessionID, note] of Object.entries(resumes)) { + insertResume.run(sessionID, note, now); + } } } catch (error) { if (!hasErrorCode(error, 'ENOENT')) { diff --git a/src/utils.ts b/src/utils.ts index 5da1b12..713eab1 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -44,6 +44,35 @@ export function parseJson(value: string): T { } } +/** + * Parses JSON without throwing. + * + * Returns `undefined` if `value` is not valid JSON. Use this for stored rows + * (parts, messages, artifacts) where a single corrupted blob must not crash + * the whole batch load — callers are expected to skip the affected row and + * continue. For structural invariants where a failure should abort, use + * `parseJson` instead. + * + * The optional `onError` callback is invoked with the parse error and a + * truncated preview of the offending input so callers can log a warning + * with their own operation/session context. + */ +export function parseJsonSafe( + value: string, + onError?: (error: Error, preview: string) => void, +): T | undefined { + try { + return JSON.parse(value) as T; + } catch (error) { + if (onError) { + const normalized = error instanceof Error ? error : new Error(String(error)); + const preview = `${value.slice(0, 120)}${value.length > 120 ? '...' : ''}`; + onError(normalized, preview); + } + return undefined; + } +} + // --- Number utilities --- export function clamp(value: number, min: number, max: number): number { diff --git a/tests/store-transform.test.mjs b/tests/store-transform.test.mjs index 2c579b6..1fc0457 100644 --- a/tests/store-transform.test.mjs +++ b/tests/store-transform.test.mjs @@ -1764,3 +1764,73 @@ test('capture ignores malformed message and part update events', async () => { await cleanupWorkspace(workspace); } }); + +test('grep scan survives a single corrupted part_json row', async () => { + const workspace = makeWorkspace('lcm-grep-corrupted-part-json'); + const dbPath = path.join(workspace, '.lcm', 'lcm.db'); + let store; + let db; + + try { + store = new SqliteLcmStore(workspace, makeOptions()); + await store.init(); + + await createSession(store, workspace, 's1', 1); + for (const [messageID, created, text] of [ + ['m1', 2, 'cosmic-ray keep alpha'], + ['m2', 3, 'cosmic-ray corrupt beta'], + ['m3', 4, 'cosmic-ray keep gamma'], + ]) { + await captureMessage(store, { + sessionID: 's1', + messageID, + created, + parts: [textPart('s1', messageID, `${messageID}-p`, text)], + }); + } + + store.close(); + store = undefined; + + db = new DatabaseSync(dbPath, { + enableForeignKeyConstraints: true, + timeout: 5000, + }); + // Simulate an on-disk corruption: truncate m2's part_json mid-string. + // This is the exact failure mode from the real crash report: + // Failed to parse JSON: Unterminated string + // at parseJson → readSessionsBatchSync → readScopedSessionsSync → searchByScan + const truncated = '{"id": "m2-p", "type": "text", "text": "cosmic-ray corrupt b'; + const updated = db + .prepare('UPDATE parts SET part_json = ? WHERE session_id = ? AND message_id = ?') + .run(truncated, 's1', 'm2'); + assert.equal(updated.changes, 1); + // Clear all FTS indexes so grep is forced down the scan path — that is + // the exact code path the user crashed on in production (searchByScan → + // readScopedSessionsSync → readSessionsBatchSync → parseJson). + db.prepare('DELETE FROM message_fts').run(); + db.prepare('DELETE FROM summary_fts').run(); + db.prepare('DELETE FROM artifact_fts').run(); + db.close(); + db = undefined; + + store = new SqliteLcmStore(workspace, makeOptions()); + await store.init(); + // Warm up the lazy DB handle the way real plugin usage does before grep. + await store.stats(); + + const results = await store.grep({ query: 'cosmic-ray', sessionID: 's1', limit: 10 }); + + const ids = results.map((result) => result.id).sort(); + assert.ok(ids.includes('m1'), 'm1 should still be searchable after corruption'); + assert.ok(ids.includes('m3'), 'm3 should still be searchable after corruption'); + // m2's part JSON is unparseable, so m2 has no searchable content left and + // must be silently skipped — but the scan itself must complete without + // throwing, which is what the fix protects. + assert.ok(!ids.includes('m2'), 'corrupted m2 should be skipped, not crash the scan'); + } finally { + db?.close(); + store?.close(); + await cleanupWorkspace(workspace); + } +});