From b567f4cc924b041d893932f21c81917f5b3126d4 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Thu, 30 Apr 2026 20:39:07 +0200 Subject: [PATCH 01/14] fix(openclaw): coordinate chat turn persistence durably --- .../adapter-openclaw/src/ChatTurnWriter.ts | 700 +++++++++++++++--- .../adapter-openclaw/src/DkgChannelPlugin.ts | 81 +- .../adapter-openclaw/src/DkgNodePlugin.ts | 3 + packages/adapter-openclaw/src/types.ts | 2 + .../test/ChatTurnWriter.test.ts | 410 ++++++++++ .../adapter-openclaw/test/dkg-channel.test.ts | 109 +++ packages/adapter-openclaw/test/plugin.test.ts | 1 + 7 files changed, 1216 insertions(+), 90 deletions(-) diff --git a/packages/adapter-openclaw/src/ChatTurnWriter.ts b/packages/adapter-openclaw/src/ChatTurnWriter.ts index ba27fd3b8..3ddb67791 100644 --- a/packages/adapter-openclaw/src/ChatTurnWriter.ts +++ b/packages/adapter-openclaw/src/ChatTurnWriter.ts @@ -12,6 +12,9 @@ interface Logger { export interface ChatTurnMessage { role: "user" | "assistant" | "system" | "tool"; content: string | Array<{ type: string; text?: string }>; + context?: Record; + metadata?: Record; + [k: string]: unknown; /** * Optional list of tool invocations the model issued in this assistant * step. Present on intermediate assistant messages that exist solely to @@ -29,6 +32,25 @@ export interface AgentEndContext { messages: ChatTurnMessage[]; } +interface ComputedChatTurnPair { + user: string; + assistant: string; + pairIndex: number; + externalTurnIds: string[]; + externalDirect: boolean; +} + +interface ExternalMarkerAction { + skip: boolean; + markers: string[]; +} + +interface WatermarkStateSnapshot { + cachedHad: boolean; + cachedIndex?: number; + pendingIndex?: number; +} + /** * Canonical shape mirrors `InternalHookEvent` from * `@openclaw/openclaw/src/hooks/internal-hook-types.ts`: @@ -114,6 +136,12 @@ export class ChatTurnWriter { // worst case is W4a skipping pairs that W4b actually wrote — same // failure mode as the lastIdx peek hit, no new data loss. private w4bSessionCounts: Map = new Map(); + // Direct-channel persists (Node-UI through DkgChannelPlugin) bypass + // ChatTurnWriter's daemon write path but append to the same OpenClaw + // transcript. These durable correlation markers let later W4a backfill + // skip exactly those already-persisted UI pairs across restarts without + // confusing two legitimate same-content turns. + private externalTurnMarkers: Map> = new Map(); // In-flight persist tracking — `resetSessionState()` awaits these so a // pre-reset persist can't advance the just-reset watermark afterward. // Both W4a (`onAgentEnd`) and W4b (`onMessageSent`) MUST register their @@ -230,6 +258,7 @@ export class ChatTurnWriter { // concurrent persists keep their advances; nothing got wiped. const mergedWm = new Map(this.cachedWatermarks); const mergedBc = new Map(this.w4bSessionCounts); + const mergedMarkers = this.cloneExternalTurnMarkers(this.externalTurnMarkers); try { if (fs.existsSync(newWatermarkFilePath)) { const raw = fs.readFileSync(newWatermarkFilePath, "utf-8"); @@ -240,9 +269,16 @@ export class ChatTurnWriter { if (typeof val === "number") { w = val; } else if (val && typeof val === "object") { - const obj = val as { w?: unknown; b?: unknown }; + const obj = val as { w?: unknown; b?: unknown; m?: unknown }; if (typeof obj.w === "number") w = obj.w; if (typeof obj.b === "number") b = obj.b; + if (obj.m && typeof obj.m === "object" && !Array.isArray(obj.m)) { + this.mergeExternalTurnMarkers( + mergedMarkers, + key, + obj.m as Record, + ); + } } mergedWm.set(key, Math.max(mergedWm.get(key) ?? -1, w)); mergedBc.set(key, Math.max(mergedBc.get(key) ?? 0, b)); @@ -265,7 +301,11 @@ export class ChatTurnWriter { if (!fs.existsSync(newDir)) fs.mkdirSync(newDir, { recursive: true }); // T45 — Pass the merged temp maps explicitly so live state // stays untouched if the write fails. - wrote = this.writeWatermarkFile(newWatermarkFilePath, { wm: mergedWm, bc: mergedBc }); + wrote = this.writeWatermarkFile(newWatermarkFilePath, { + wm: mergedWm, + bc: mergedBc, + markers: mergedMarkers, + }); } catch (err) { // T23 — Surface BOTH mkdirSync failures (ENOTDIR / ENOENT on // an unwritable parent) AND writeWatermarkFile failures @@ -288,6 +328,13 @@ export class ChatTurnWriter { for (const [key, val] of mergedBc) { this.w4bSessionCounts.set(key, Math.max(this.w4bSessionCounts.get(key) ?? 0, val)); } + for (const [key, markers] of mergedMarkers) { + const live = this.externalTurnMarkers.get(key) ?? new Map(); + for (const [marker, count] of markers) { + live.set(marker, Math.max(live.get(marker) ?? 0, count)); + } + if (live.size > 0) this.externalTurnMarkers.set(key, live); + } } // T45 — On failure, live state is already untouched. No restore // needed; concurrent persists' advances during the failed merge @@ -364,13 +411,24 @@ export class ChatTurnWriter { if (typeof val === "number") { this.cachedWatermarks.set(key, val); } else if (val && typeof val === "object") { - const obj = val as { w?: unknown; b?: unknown }; + const obj = val as { w?: unknown; b?: unknown; m?: unknown }; if (typeof obj.w === "number") { this.cachedWatermarks.set(key, obj.w); } if (typeof obj.b === "number") { this.w4bSessionCounts.set(key, obj.b); } + if (obj.m && typeof obj.m === "object" && !Array.isArray(obj.m)) { + const markers = new Map(); + for (const [hash, count] of Object.entries(obj.m as Record)) { + if (typeof count === "number" && count > 0) { + markers.set(hash, count); + } + } + if (markers.size > 0) { + this.externalTurnMarkers.set(key, markers); + } + } } } } @@ -387,6 +445,7 @@ export class ChatTurnWriter { if (ctx?.channelId === "dkg-ui") return; const sessionId = this.deriveSessionId(ctx); if (!sessionId) return; + const externalCursorKey = this.externalCursorKeyFromHookPayload(undefined, ctx); // T4 — Serialize agent_end calls per session via a Promise chain. // The full computeDelta + per-pair persist loop runs INSIDE the // chain so a later fire's `computeDelta` reads the earlier fire's @@ -399,11 +458,15 @@ export class ChatTurnWriter { // ensures the NEXT fire's work runs only after this fire's work // settles. `flush()` still drains the persist via `inFlightPersists` // tracked inside `runAgentEndPersist` → `trackPersistJob`. + const resetAtSchedule = this.pendingResets.get(sessionId); const previous = this.w4aSessionChains.get(sessionId) ?? Promise.resolve(); const work = previous // Never block the next fire on the previous fire's failure. .catch(() => undefined) - .then(() => this.runAgentEndPersist(event, sessionId)); + .then(async () => { + if (resetAtSchedule) await resetAtSchedule; + await this.runAgentEndPersist(event, sessionId, externalCursorKey); + }); this.w4aSessionChains.set(sessionId, work); work.finally(() => { // Cleanup so idle sessions don't accumulate empty chains. Only @@ -417,13 +480,8 @@ export class ChatTurnWriter { // ordering; flush() drains via inFlightPersists. } - private async runAgentEndPersist(event: AgentEndContext, sessionId: string): Promise { + private async runAgentEndPersist(event: AgentEndContext, sessionId: string, externalCursorKey?: string): Promise { try { - // If a compaction/reset is mid-flight for this session, wait for it - // before reading the watermark. Otherwise we'd compute the delta - // against stale state. - const pendingReset = this.pendingResets.get(sessionId); - if (pendingReset) await pendingReset; // R18.2 — Take the MAX of W4a's pair-indexed watermark and W4b's // session count (minus 1, because count is 1-based). When typed // hooks were unavailable for a stretch (e.g., the `setup-runtime` @@ -442,10 +500,35 @@ export class ChatTurnWriter { // point. Without sequencing, a failed middle pair could be skipped // when the tail succeeds. const lastIdx = pairs.length - 1; + const externalContentMatchCounts = externalCursorKey + ? this.externalContentMatchCounts(externalCursorKey, pairs) + : new Map(); const job = this.trackPersistJob(sessionId, async () => { for (let i = 0; i < pairs.length; i++) { - const { user, assistant, pairIndex } = pairs[i]; + const { user, assistant, pairIndex, externalTurnIds, externalDirect } = pairs[i]; if (!user && !assistant) continue; + const externalMarkerAction = externalCursorKey + ? this.consumeExternalTurnMarkersForPair( + externalCursorKey, + user, + assistant, + externalTurnIds, + externalDirect, + externalContentMatchCounts, + ) + : { skip: false, markers: [] }; + if (externalCursorKey && externalMarkerAction.markers.length > 0) { + const watermarkSnapshot = this.snapshotWatermarkState(sessionId); + if (externalMarkerAction.skip) this.bumpWatermark(sessionId, pairIndex); + if (!this.commitWatermarkStateSync(sessionId)) { + for (const marker of externalMarkerAction.markers) { + this.restoreExternalTurnMarker(externalCursorKey, marker); + } + this.restoreWatermarkState(sessionId, watermarkSnapshot); + throw new Error("Failed to write external chat-turn marker consumption"); + } + if (externalMarkerAction.skip) continue; + } // W4a turnId mixes pair position into the hash so backfill of // two same-text pairs (e.g. user said "hi" twice) produces // distinct turnIds and BOTH persist. @@ -576,7 +659,7 @@ export class ChatTurnWriter { // Reset is SESSION-SCOPED. The hook returns the reset promise so // OpenClaw's typed-hook dispatcher awaits it — the next `agent_end` // for this session can't race past the in-flight cleanup. - await this.runReset(this.deriveSessionId(ctx)); + await this.runReset(this.resetIdentityFromHookPayload(event, ctx)); } catch (err) { this.logger.error?.("[ChatTurnWriter.onBeforeCompaction] Error", { err }); } @@ -585,43 +668,83 @@ export class ChatTurnWriter { async onBeforeReset(event: any, ctx?: any): Promise { try { this.flushSync(); - await this.runReset(this.deriveSessionId(ctx)); + await this.runReset(this.resetIdentityFromHookPayload(event, ctx)); } catch (err) { this.logger.error?.("[ChatTurnWriter.onBeforeReset] Error", { err }); } } + async markExternalTurnPersistedDurable(opts: { + sessionKey?: string; + turnId?: string; + user: string; + assistant: string; + }): Promise { + const externalCursorKey = this.externalCursorKeyFromSessionKey(opts.sessionKey); + const assistant = this.stripRecalledMemory(opts.assistant); + const markers = [ + this.externalTurnMarkerId(opts.turnId), + this.externalTurnContentMarkerKey(opts.user, assistant), + ].filter(Boolean); + if (!externalCursorKey || markers.length === 0) return; + for (const marker of markers) { + this.restoreExternalTurnMarker(externalCursorKey, marker); + } + if (!this.commitWatermarkStateSync()) { + for (const marker of markers) { + this.consumeExternalTurnMarker(externalCursorKey, marker); + } + throw new Error("Failed to write external chat-turn marker"); + } + } + /** * Track the reset promise on `pendingResets` so `onAgentEnd` / * `onMessageSent` can `await` it before processing a turn that arrived * mid-reset. Without this gate, a fast post-compaction `agent_end` * could read the stale watermark before the reset finishes draining. */ - private async runReset(sessionId: string): Promise { - if (!sessionId) return; - // T4 — Drain any queued (but not yet started) agent_end chain - // work BEFORE registering this reset in `pendingResets`. The - // chain's `.then(() => runAgentEndPersist(...))` delays - // `trackPersistJob` registration by one microtask, so a chained- - // but-not-yet-running fire wouldn't appear in `inFlightPersists` - // (which `resetSessionState` awaits). Critically, this drain - // happens BEFORE `pendingResets.set` — otherwise the chained - // `runAgentEndPersist` would see our pending reset, await it, - // and deadlock against itself. Draining first lets the chained - // work see no-pending-reset and run with the pre-compaction - // state; the reset then wipes after the work completes. - const chain = this.w4aSessionChains.get(sessionId); - if (chain) { - await chain.catch(() => undefined); + private async runReset(identity: { + sessionId: string; + channelId?: string; + accountId?: string; + conversationId?: string; + sessionKey?: string; + externalCursorKey?: string; + }): Promise { + const sessionIds = this.collectResetSessionIds(identity); + if (sessionIds.length === 0 && !identity.externalCursorKey) return; + let startReset!: () => void; + const reset = new Promise((resolve, reject) => { + startReset = () => { + void (async () => { + // T4/T81 — Set the pending reset gate before draining older + // W4a chain work. onAgentEnd captures the reset promise at + // scheduling time, so chain entries queued before this reset do + // not wait on themselves, while new W4a/W4b/internal-hook work + // that arrives after the gate is installed waits or replays. + for (const sessionId of sessionIds) { + const chain = this.w4aSessionChains.get(sessionId); + if (chain) { + await chain.catch(() => undefined); + } + } + await this.resetSessionState(sessionIds, identity.externalCursorKey); + })().then(resolve, reject); + }; + }); + for (const sessionId of sessionIds) { + this.pendingResets.set(sessionId, reset); } - const reset = this.resetSessionState(sessionId); - this.pendingResets.set(sessionId, reset); + startReset(); try { await reset; } finally { // Only delete if no newer reset replaced ours. - if (this.pendingResets.get(sessionId) === reset) { - this.pendingResets.delete(sessionId); + for (const sessionId of sessionIds) { + if (this.pendingResets.get(sessionId) === reset) { + this.pendingResets.delete(sessionId); + } } } } @@ -637,37 +760,44 @@ export class ChatTurnWriter { * `persistOne` calls `saveWatermark(0)`, leaving stale state for the next * `agent_end` against a smaller post-compaction array. */ - private async resetSessionState(sessionId: string): Promise { - if (!sessionId) return; - const inFlight = this.inFlightPersists.get(sessionId); - if (inFlight && inFlight.size > 0) { - // Snapshot the set — settle every job (success or failure) before - // wiping watermark state so a late completion can't reintroduce it. - const pending = Array.from(inFlight); - await Promise.allSettled(pending); + private async resetSessionState(sessionIds: string[] | string, externalCursorKey?: string): Promise { + const ids = Array.isArray(sessionIds) ? sessionIds : [sessionIds].filter(Boolean); + if (ids.length === 0 && !externalCursorKey) return; + for (const sessionId of ids) { + const inFlight = this.inFlightPersists.get(sessionId); + if (inFlight && inFlight.size > 0) { + // Snapshot the set — settle every job (success or failure) before + // wiping watermark state so a late completion can't reintroduce it. + const pending = Array.from(inFlight); + await Promise.allSettled(pending); + } } - this.inFlightPersists.delete(sessionId); - this.w4aSessionChains.delete(sessionId); - this.cachedWatermarks.delete(sessionId); - const entry = this.debounceTimers.get(sessionId); - if (entry) { - clearTimeout(entry.timer); - this.debounceTimers.delete(sessionId); + for (const sessionId of ids) { + this.inFlightPersists.delete(sessionId); + this.cachedWatermarks.delete(sessionId); + const entry = this.debounceTimers.get(sessionId); + if (entry) { + clearTimeout(entry.timer); + this.debounceTimers.delete(sessionId); + } + // `conversationKeyFromInternalEvent` and `composeSessionId` produce the + // same string shape (`openclaw::::`), + // so a session reset deletes its pending entry by exact key — no + // sessionKey suffix matching, which would falsely clear unrelated + // conversations whose sessionKey shares a trailing fragment OR contains + // raw `:` (e.g. the `agent::` keys created in + // `DkgChannelPlugin`). + this.pendingUserMessages.delete(sessionId); + this.clearSessionTurnIds(sessionId); + // R18.2 — Reset the W4b session count too. After compaction the + // `messages[]` array is rewritten, so the W4b count's "I persisted + // N turns" no longer maps to the new pair indices. Leaving stale + // count would skip new pairs in `computeDelta`. + this.w4bSessionCounts.delete(sessionId); + } + if (externalCursorKey) { + this.externalTurnMarkers.delete(externalCursorKey); } - // `conversationKeyFromInternalEvent` and `composeSessionId` produce the - // same string shape (`openclaw::::`), - // so a session reset deletes its pending entry by exact key — no - // sessionKey suffix matching, which would falsely clear unrelated - // conversations whose sessionKey shares a trailing fragment OR contains - // raw `:` (e.g. the `agent::` keys created in - // `DkgChannelPlugin`). - this.pendingUserMessages.delete(sessionId); - this.clearSessionTurnIds(sessionId); - // R18.2 — Reset the W4b session count too. After compaction the - // `messages[]` array is rewritten, so the W4b count's "I persisted - // N turns" no longer maps to the new pair indices. Leaving stale - // count would skip new pairs in `computeDelta`. - this.w4bSessionCounts.delete(sessionId); this.writeWatermarkFile(); } @@ -678,6 +808,11 @@ export class ChatTurnWriter { if (channelId === "dkg-ui") return; const conversationKey = this.conversationKeyFromInternalEvent(ev); if (!conversationKey) return; + const pendingReset = this.pendingResets.get(conversationKey); + if (pendingReset) { + void pendingReset.then(() => this.onMessageReceived(ev)).catch(() => undefined); + return; + } const text = readEventText(ev); // R15.2 — Skip attachment-only / non-text inbound events. `readEventText` // returns "" when the envelope carries no text payload (e.g. an image @@ -823,8 +958,10 @@ export class ChatTurnWriter { // it. Without tracking, a `message:sent` write mid-compaction // could land its `saveWatermark()` after the reset clears state. this.trackPersistJob(sessionId, async () => { + let daemonPersisted = false; try { await this.persistOne(sessionId, userText, assistantText, turnId); + daemonPersisted = true; // Post-success: stamp the content-only `w4bOrigin` key on // the SHORT-TTL cross-path map (T5) so a later W4a // `agent_end` last-pair peek can see that W4b already @@ -859,9 +996,18 @@ export class ChatTurnWriter { // backfill (count resets to 0, watermark file is // still -1, savedUpTo computes to -1, computeDelta // emits everything). - this.scheduleWatermarkFlush(sessionId); + if (!this.commitWatermarkStateSync(sessionId)) { + throw new Error("Failed to write W4b chat-turn watermark"); + } } } catch (err) { + if (daemonPersisted) { + this.logger.error?.( + "[ChatTurnWriter.onMessageSent] Persist succeeded but durable W4b state write failed", + { err }, + ); + return; + } // W4b is the ONLY path with a copy of `userText` (it lives // ephemerally in the FIFO queue). On a hard persist failure // there's no `agent_end` backfill — the messages array doesn't @@ -1095,15 +1241,48 @@ export class ChatTurnWriter { } flushSync(): void { + const applied = this.applyPendingWatermarks(); + if (applied) { + this.writeWatermarkFile(); + } + } + + private applyPendingWatermarks(sessionId?: string): boolean { let applied = false; - for (const [sessionId, entry] of this.debounceTimers.entries()) { + for (const [key, entry] of Array.from(this.debounceTimers.entries())) { + if (sessionId && key !== sessionId) continue; clearTimeout(entry.timer); - this.cachedWatermarks.set(sessionId, entry.pendingIndex); + this.cachedWatermarks.set(key, entry.pendingIndex); + this.debounceTimers.delete(key); applied = true; } - this.debounceTimers.clear(); - if (applied) { - this.writeWatermarkFile(); + return applied; + } + + private commitWatermarkStateSync(sessionId?: string): boolean { + this.applyPendingWatermarks(sessionId); + return this.writeWatermarkFile(); + } + + private snapshotWatermarkState(sessionId: string): WatermarkStateSnapshot { + return { + cachedHad: this.cachedWatermarks.has(sessionId), + cachedIndex: this.cachedWatermarks.get(sessionId), + pendingIndex: this.debounceTimers.get(sessionId)?.pendingIndex, + }; + } + + private restoreWatermarkState(sessionId: string, snapshot: WatermarkStateSnapshot): void { + const existing = this.debounceTimers.get(sessionId); + if (existing) clearTimeout(existing.timer); + this.debounceTimers.delete(sessionId); + if (snapshot.cachedHad) { + this.cachedWatermarks.set(sessionId, snapshot.cachedIndex ?? -1); + } else { + this.cachedWatermarks.delete(sessionId); + } + if (snapshot.pendingIndex !== undefined) { + this.saveWatermark(sessionId, snapshot.pendingIndex); } } @@ -1119,8 +1298,8 @@ export class ChatTurnWriter { private computeDelta( messages: ChatTurnMessage[], savedUpTo: number, - ): Array<{ user: string; assistant: string; pairIndex: number }> { - const pairs: Array<{ user: string; assistant: string; pairIndex: number }> = []; + ): ComputedChatTurnPair[] { + const pairs: ComputedChatTurnPair[] = []; // R19.1 — Queue of unmatched user messages. Two transcript shapes // were previously mis-parsed: // * `[user1, user2, assistant]` — the prior single-slot @@ -1136,7 +1315,11 @@ export class ChatTurnWriter { // non-tool-call assistant turn. Any assistant carrying tool calls // is treated as intermediate regardless of whether it also has // text content. - const pendingUsers: string[] = []; + const pendingUsers: Array<{ + text: string; + externalTurnIds: string[]; + externalDirect: boolean; + }> = []; let pairIndex = 0; for (const msg of messages) { if (msg.role === "user") { @@ -1149,7 +1332,13 @@ export class ChatTurnWriter { // assistant-only pair (`{ user: "", assistant: reply }`) // for any image-only user message followed by a reply. const userText = this.extractText(msg.content); - if (userText) pendingUsers.push(userText); + if (userText) { + pendingUsers.push({ + text: userText, + externalTurnIds: this.extractExternalTurnIds(msg), + externalDirect: this.hasExternalDirectChannelMetadata(msg), + }); + } } else if (msg.role === "assistant") { const text = this.extractText(msg.content); const hasToolCalls = Array.isArray(msg.toolCalls) ? msg.toolCalls.length > 0 @@ -1190,13 +1379,19 @@ export class ChatTurnWriter { // put so a later real reply gets the same index. continue; } - const userText = pendingUsers.join("\n"); + const userText = pendingUsers.map((pending) => pending.text).join("\n"); + const externalDirect = pendingUsers.length === 1 && pendingUsers[0].externalDirect; + const externalTurnIds = externalDirect + ? Array.from(new Set(pendingUsers.flatMap((pending) => pending.externalTurnIds))) + : []; pendingUsers.length = 0; if (pairIndex > savedUpTo) { pairs.push({ user: userText, assistant: this.stripRecalledMemory(text), pairIndex, + externalTurnIds, + externalDirect, }); } pairIndex++; @@ -1251,6 +1446,69 @@ export class ChatTurnWriter { return out.trim(); } + private extractExternalTurnIds(msg: ChatTurnMessage): string[] { + const ids = new Set(); + const add = (value: unknown): void => { + if (typeof value === "string" && value.trim()) ids.add(value.trim()); + }; + + add((msg as any).dkgTurnId); + add((msg as any).DkgTurnId); + add((msg as any).turnId); + add((msg as any).correlationId); + + const context = msg.context; + if (context && typeof context === "object") { + add((context as any).dkgTurnId); + add((context as any).DkgTurnId); + add((context as any).turnId); + add((context as any).correlationId); + add((context as any).CorrelationId); + } + + const metadata = msg.metadata; + if (metadata && typeof metadata === "object") { + add((metadata as any).dkgTurnId); + add((metadata as any).DkgTurnId); + add((metadata as any).turnId); + add((metadata as any).correlationId); + add((metadata as any).CorrelationId); + } + + return Array.from(ids); + } + + private hasExternalDirectChannelMetadata(msg: ChatTurnMessage): boolean { + const values: unknown[] = [ + (msg as any).channelId, + (msg as any).provider, + (msg as any).Provider, + (msg as any).surface, + (msg as any).Surface, + ]; + const context = msg.context; + if (context && typeof context === "object") { + values.push( + (context as any).channelId, + (context as any).provider, + (context as any).Provider, + (context as any).surface, + (context as any).Surface, + ); + } + const metadata = msg.metadata; + if (metadata && typeof metadata === "object") { + values.push( + (metadata as any).channelId, + (metadata as any).provider, + (metadata as any).Provider, + (metadata as any).surface, + (metadata as any).Surface, + ); + } + return values.some((value) => typeof value === "string" && value === "dkg-ui"); + } + /** * Strip control chars and bound length without dropping the * distinguishing suffix. R13.2 — naive `substring(0, 64)` collapsed @@ -1315,6 +1573,135 @@ export class ChatTurnWriter { return `w4b-content::${this.contentHash(user, assistant)}`; } + private externalTurnMarkerId(turnId?: unknown): string { + if (typeof turnId !== "string" || turnId.trim().length === 0) return ""; + return `external-id::${createHash("sha256").update(turnId.trim()).digest("hex").slice(0, 16)}`; + } + + private externalTurnContentMarkerKey(user: string, assistant: string): string { + if (!user && !assistant) return ""; + return `external-content::${this.contentHash(user, assistant)}`; + } + + private externalContentMatchCounts( + sessionKeyCursor: string, + pairs: ComputedChatTurnPair[], + ): Map { + const bucket = this.externalTurnMarkers.get(sessionKeyCursor); + const counts = new Map(); + if (!bucket) return counts; + for (const pair of pairs) { + if (!pair.externalDirect) continue; + const marker = this.externalTurnContentMarkerKey(pair.user, pair.assistant); + if (marker && bucket.has(marker)) { + counts.set(marker, (counts.get(marker) ?? 0) + 1); + } + } + return counts; + } + + private consumeExternalTurnMarkersForPair( + sessionKeyCursor: string, + user: string, + assistant: string, + turnIds: string[], + externalDirect: boolean, + contentMatchCounts: Map, + ): ExternalMarkerAction { + const consumed: string[] = []; + for (const turnId of turnIds) { + const marker = this.externalTurnMarkerId(turnId); + if (marker && this.consumeExternalTurnMarker(sessionKeyCursor, marker)) { + consumed.push(marker); + const contentMarker = this.externalTurnContentMarkerKey(user, assistant); + if (contentMarker && this.consumeExternalTurnMarker(sessionKeyCursor, contentMarker)) { + consumed.push(contentMarker); + } + return { skip: true, markers: consumed }; + } + } + + const contentMarker = this.externalTurnContentMarkerKey(user, assistant); + if ( + externalDirect + && turnIds.length === 0 + && contentMarker + && contentMatchCounts.get(contentMarker) === 1 + && this.consumeExternalTurnMarker(sessionKeyCursor, contentMarker) + ) { + consumed.push(contentMarker); + return { skip: true, markers: consumed }; + } + + if ( + externalDirect + && contentMarker + && (turnIds.length > 0 || (contentMatchCounts.get(contentMarker) ?? 0) > 1) + ) { + consumed.push(...this.retireExternalTurnMarker(sessionKeyCursor, contentMarker)); + } + return { skip: false, markers: consumed }; + } + + private consumeExternalTurnMarker(sessionKeyCursor: string, marker: string): boolean { + const bucket = this.externalTurnMarkers.get(sessionKeyCursor); + if (!bucket) return false; + const count = bucket.get(marker) ?? 0; + if (count <= 0) return false; + if (count === 1) { + bucket.delete(marker); + } else { + bucket.set(marker, count - 1); + } + if (bucket.size === 0) { + this.externalTurnMarkers.delete(sessionKeyCursor); + } + return true; + } + + private retireExternalTurnMarker(sessionKeyCursor: string, marker: string): string[] { + const bucket = this.externalTurnMarkers.get(sessionKeyCursor); + if (!bucket) return []; + const count = bucket.get(marker) ?? 0; + if (count <= 0) return []; + bucket.delete(marker); + if (bucket.size === 0) { + this.externalTurnMarkers.delete(sessionKeyCursor); + } + return Array.from({ length: count }, () => marker); + } + + private restoreExternalTurnMarker(sessionKeyCursor: string, marker: string): void { + if (!marker) return; + const bucket = this.externalTurnMarkers.get(sessionKeyCursor) ?? new Map(); + bucket.set(marker, (bucket.get(marker) ?? 0) + 1); + this.externalTurnMarkers.set(sessionKeyCursor, bucket); + } + + private cloneExternalTurnMarkers( + source: Map>, + ): Map> { + const clone = new Map>(); + for (const [key, markers] of source) { + clone.set(key, new Map(markers)); + } + return clone; + } + + private mergeExternalTurnMarkers( + target: Map>, + key: string, + markers: Record, + ): void { + const bucket = target.get(key) ?? new Map(); + for (const [marker, count] of Object.entries(markers)) { + if (typeof count === "number" && count > 0) { + bucket.set(marker, Math.max(bucket.get(marker) ?? 0, count)); + } + } + if (bucket.size > 0) target.set(key, bucket); + } + /** * R15.1 — Per-turn in-flight reservation key for the W4b path. * Distinct from the cross-path `w4bOrigin` (which is content-only and @@ -1414,13 +1801,63 @@ export class ChatTurnWriter { * `deriveSessionIdFromEvent` for dedup. */ private deriveSessionId(ctx?: any): string { - if (!ctx || !ctx.channelId || !ctx.sessionKey) return ""; - return this.composeSessionId({ - channelId: ctx.channelId, - accountId: ctx.accountId, - conversationId: ctx.conversationId, - sessionKey: ctx.sessionKey, - }); + const identity = this.identityFieldsFromPayload(ctx); + if (!identity.channelId || !identity.sessionKey) return ""; + return this.composeSessionId(identity); + } + + private identityFieldsFromPayload(payload?: any): { + channelId?: string; + accountId?: string; + conversationId?: string; + sessionKey?: string; + } { + if (!payload || typeof payload !== "object") return {}; + const nested = typeof payload.context === "object" && payload.context ? payload.context : {}; + const pick = (key: "channelId" | "accountId" | "conversationId" | "sessionKey"): string | undefined => { + const direct = payload[key]; + if (typeof direct === "string") return direct; + const nestedValue = (nested as any)[key]; + return typeof nestedValue === "string" ? nestedValue : undefined; + }; + return { + channelId: pick("channelId"), + accountId: pick("accountId"), + conversationId: pick("conversationId"), + sessionKey: pick("sessionKey"), + }; + } + + private resetIdentityFromHookPayload(event?: any, ctx?: any): { + sessionId: string; + channelId?: string; + accountId?: string; + conversationId?: string; + sessionKey?: string; + externalCursorKey?: string; + } { + const ctxFields = this.identityFieldsFromPayload(ctx); + const eventFields = this.identityFieldsFromPayload(event); + const identity = { + channelId: ctxFields.channelId ?? eventFields.channelId, + accountId: ctxFields.accountId ?? eventFields.accountId, + conversationId: ctxFields.conversationId ?? eventFields.conversationId, + sessionKey: ctxFields.sessionKey ?? eventFields.sessionKey, + }; + const sessionId = identity.channelId && identity.sessionKey + ? this.composeSessionId(identity) + : ""; + return { + ...identity, + sessionId, + externalCursorKey: this.externalCursorKeyFromSessionKey(identity.sessionKey), + }; + } + + private externalCursorKeyFromHookPayload(event?: any, ctx?: any): string { + const ctxFields = this.identityFieldsFromPayload(ctx); + const eventFields = this.identityFieldsFromPayload(event); + return this.externalCursorKeyFromSessionKey(ctxFields.sessionKey ?? eventFields.sessionKey); } /** @@ -1471,6 +1908,90 @@ export class ChatTurnWriter { return `openclaw:${ids.join(":")}`; } + private externalCursorKeyFromSessionKey(sessionKey?: unknown): string { + if (typeof sessionKey !== "string" || sessionKey.trim().length === 0) return ""; + return `openclaw:transcript:${this.encodeIdField(this.sanitize(sessionKey))}`; + } + + private collectResetSessionIds(identity: { + sessionId: string; + channelId?: string; + accountId?: string; + conversationId?: string; + sessionKey?: string; + }): string[] { + const ids = new Set(); + if (identity.sessionId) ids.add(identity.sessionId); + if (!identity.channelId || !identity.sessionKey) return Array.from(ids); + const expected = { + channelId: this.encodeIdField(this.sanitize(identity.channelId)), + accountId: typeof identity.accountId === "string" + ? this.encodeIdField(this.sanitize(identity.accountId)) + : undefined, + conversationId: typeof identity.conversationId === "string" + ? this.encodeIdField(this.sanitize(identity.conversationId)) + : undefined, + sessionKey: this.encodeIdField(this.sanitize(identity.sessionKey)), + }; + for (const candidate of this.collectKnownSessionIds()) { + const parsed = this.parseComposedSessionId(candidate); + if (!parsed) continue; + if (parsed.channelId !== expected.channelId) continue; + if (parsed.sessionKey !== expected.sessionKey) continue; + if (expected.accountId !== undefined && parsed.accountId !== expected.accountId) continue; + if (expected.conversationId !== undefined && parsed.conversationId !== expected.conversationId) continue; + ids.add(candidate); + } + return Array.from(ids); + } + + private collectKnownSessionIds(): Set { + const ids = new Set(); + const add = (key: string): void => { + if (this.parseComposedSessionId(key)) ids.add(key); + }; + for (const key of this.cachedWatermarks.keys()) add(key); + for (const key of this.w4bSessionCounts.keys()) add(key); + for (const key of this.debounceTimers.keys()) add(key); + for (const key of this.pendingUserMessages.keys()) add(key); + for (const key of this.inFlightPersists.keys()) add(key); + for (const key of this.w4aSessionChains.keys()) add(key); + for (const key of this.recentTurnIds.keys()) { + add(this.sessionIdFromCompositeDedupKey(key)); + } + for (const key of this.crossPathStamps.keys()) { + add(this.sessionIdFromCompositeDedupKey(key)); + } + for (const key of this.crossPathInflight.keys()) { + add(this.sessionIdFromCompositeDedupKey(key)); + } + return ids; + } + + private sessionIdFromCompositeDedupKey(key: string): string { + if (!key.startsWith("openclaw:")) return ""; + const parts = key.split(":"); + if (parts.length < 5) return ""; + const sessionId = parts.slice(0, 5).join(":"); + return this.parseComposedSessionId(sessionId) ? sessionId : ""; + } + + private parseComposedSessionId(sessionId: string): { + channelId: string; + accountId: string; + conversationId: string; + sessionKey: string; + } | null { + const parts = sessionId.split(":"); + if (parts.length !== 5 || parts[0] !== "openclaw") return null; + return { + channelId: parts[1], + accountId: parts[2], + conversationId: parts[3], + sessionKey: parts[4], + }; + } + /** * Pending-message lookup key. Must distinguish every in-flight conversation * the gateway is juggling, so it includes channel + account + conversation + @@ -1630,7 +2151,11 @@ export class ChatTurnWriter { private writeWatermarkFile( targetPath: string = this.watermarkFilePath, - overrideMaps?: { wm: Map; bc: Map }, + overrideMaps?: { + wm: Map; + bc: Map; + markers?: Map>; + }, ): boolean { try { // T17 — Emit the new `{ w: , b: }` shape so @@ -1654,13 +2179,22 @@ export class ChatTurnWriter { // only become "the source of truth" once the write succeeded. const wm = overrideMaps?.wm ?? this.cachedWatermarks; const bc = overrideMaps?.bc ?? this.w4bSessionCounts; - const allKeys = new Set([...wm.keys(), ...bc.keys()]); - const data: Record = {}; + const markersByKey = overrideMaps?.markers ?? this.externalTurnMarkers; + const allKeys = new Set([ + ...wm.keys(), + ...bc.keys(), + ...markersByKey.keys(), + ]); + const data: Record }> = {}; for (const key of allKeys) { + const markers = markersByKey.get(key); data[key] = { w: wm.get(key) ?? -1, b: bc.get(key) ?? 0, }; + if (markers && markers.size > 0) { + data[key].m = Object.fromEntries(markers.entries()); + } } const tmpPath = `${targetPath}.tmp`; fs.writeFileSync(tmpPath, JSON.stringify(data, null, 2), "utf-8"); diff --git a/packages/adapter-openclaw/src/DkgChannelPlugin.ts b/packages/adapter-openclaw/src/DkgChannelPlugin.ts index 546cd4841..9e297391a 100644 --- a/packages/adapter-openclaw/src/DkgChannelPlugin.ts +++ b/packages/adapter-openclaw/src/DkgChannelPlugin.ts @@ -31,6 +31,7 @@ import type { OpenClawPluginApi, } from './types.js'; import type { DkgDaemonClient, OpenClawAttachmentRef } from './dkg-client.js'; +import type { ChatTurnWriter } from './ChatTurnWriter.js'; export const CHANNEL_NAME = 'dkg-ui'; const DEFAULT_CHANNEL_ACCOUNT_ID = 'default'; @@ -214,6 +215,8 @@ interface PersistTurnOptions { persistenceState?: 'stored' | 'failed' | 'pending'; failureReason?: string | null; attachmentRefs?: OpenClawAttachmentRef[]; + sessionKey?: string; + turnId?: string; } interface InboundChatOptions { @@ -387,6 +390,7 @@ export class DkgChannelPlugin { private readonly gatewayLifecyclePendingOwnersByAccount = new Map(); private readonly gatewayLifecycleOwnersByContext = new WeakMap(); private readonly gatewayLifecycleOwnersBySignal = new WeakMap(); + private chatTurnWriter: ChatTurnWriter | null = null; /** * Pre-dispatch memory-slot re-assert callback. Set by `DkgNodePlugin` * to `memoryPlugin.reAssertCapability.bind(memoryPlugin)`. Called @@ -410,6 +414,10 @@ export class DkgChannelPlugin { this.preDispatchReAssert = cb; } + setChatTurnWriter(writer: ChatTurnWriter | null): void { + this.chatTurnWriter = writer; + } + /** * Read the UI-selected project context graph for the currently-running * dispatch. Used by `DkgMemorySessionResolver` inside `DkgNodePlugin` @@ -870,6 +878,15 @@ export class DkgChannelPlugin { this.notifyStopIdle(); } + private reservePendingTurnPersistence(correlationId: string, allowDuringShutdown: boolean): void { + if (this.pendingTurnPersistence.has(correlationId)) return; + this.pendingTurnPersistence.set(correlationId, { + attempt: 0, + timer: null, + allowDuringShutdown, + }); + } + private clearPendingTurnPersistence(): void { for (const job of this.pendingTurnPersistence.values()) { if (job.timer) clearTimeout(job.timer); @@ -1093,11 +1110,13 @@ export class DkgChannelPlugin { api.logger.info?.(`[dkg-channel] Dispatching for: ${correlationId}`); try { const reply = await this.dispatchViaPluginSdk(text, correlationId, identity, contextAttachmentRefs, sanitizedContextEntries, uiContextGraphId); + const { sessionKey, ...replyForCaller } = reply; // Fire-and-forget: persist turn to DKG graph for Agent Hub visualization this.queueTurnPersistence(text, reply.text, correlationId, identity, { attachmentRefs, + sessionKey, }, true); - return reply; + return replyForCaller; } catch (err: any) { api.logger.warn?.(`[dkg-channel] dispatchViaPluginSdk failed: ${err.message}`); throw err; @@ -1130,10 +1149,12 @@ export class DkgChannelPlugin { correlationId, } as any), ); + const { sessionKey, ...replyForCaller } = reply; this.queueTurnPersistence(text, reply.text, correlationId, identity || 'owner', { attachmentRefs, + sessionKey, }, true); - return reply; + return replyForCaller; } throw new Error( @@ -1209,6 +1230,8 @@ export class DkgChannelPlugin { CommandBody: commandBody, BodyForCommands: commandBody, ...(commandBody !== text ? { OriginalRawBody: text } : {}), + CorrelationId: correlationId, + DkgTurnId: correlationId, From: identity || 'Owner', To: route.agentId, SessionKey: route.sessionKey, @@ -1303,7 +1326,7 @@ export class DkgChannelPlugin { clearTimeout(timer); const replyText = finalizeAgentReplyText(replyChunks.join('\n')); log.info?.(`[dkg-channel] Reply dispatched (${replyText.length} chars) for ${correlationId}`); - resolve({ text: replyText, correlationId }); + resolve({ text: replyText, correlationId, sessionKey: route.sessionKey }); }).catch((err: any) => { clearTimeout(timer); log.warn?.(`[dkg-channel] dispatchInboundReplyWithBase failed: ${err.message}`); @@ -1352,7 +1375,7 @@ export class DkgChannelPlugin { clearTimeout(timer); const replyText = finalizeAgentReplyText(replyChunks.join('\n')); log.info?.(`[dkg-channel] Reply dispatched (${replyText.length} chars) for ${correlationId}`); - resolve({ text: replyText, correlationId }); + resolve({ text: replyText, correlationId, sessionKey: route.sessionKey }); }) .catch((err: any) => { clearTimeout(timer); @@ -1438,6 +1461,8 @@ export class DkgChannelPlugin { Body: formattedBody, BodyForAgent: agentBody, RawBody: commandBody, CommandBody: commandBody, BodyForCommands: commandBody, ...(commandBody !== text ? { OriginalRawBody: text } : {}), + CorrelationId: correlationId, + DkgTurnId: correlationId, From: identity || 'Owner', To: route.agentId, SessionKey: route.sessionKey, AccountId: 'default', Provider: CHANNEL_NAME, Surface: CHANNEL_NAME, ChatType: 'direct', @@ -1547,6 +1572,7 @@ export class DkgChannelPlugin { if (resolvedTerminalState === 'completed' && resolvedFinalText) { this.queueTurnPersistence(text, resolvedFinalText, correlationId, identity, { attachmentRefs, + sessionKey: route.sessionKey, }, true); } else if (resolvedTerminalState === 'failed') { this.queueTurnPersistence( @@ -1554,7 +1580,7 @@ export class DkgChannelPlugin { this.buildFailedAssistantReply(resolvedFailureReason), correlationId, identity, - { persistenceState: 'failed', failureReason: resolvedFailureReason, attachmentRefs }, + { persistenceState: 'failed', failureReason: resolvedFailureReason, attachmentRefs, sessionKey: route.sessionKey }, true, ); } else { @@ -1563,7 +1589,7 @@ export class DkgChannelPlugin { CANCELLED_TURN_MESSAGE, correlationId, identity, - { persistenceState: 'failed', failureReason: 'cancelled', attachmentRefs }, + { persistenceState: 'failed', failureReason: 'cancelled', attachmentRefs, sessionKey: route.sessionKey }, true, ); } @@ -1603,6 +1629,7 @@ export class DkgChannelPlugin { aborted = true; // Stop dangling deliver() callbacks from queuing if (terminalState === 'cancelled' && dispatchTerminal == null) { + this.reservePendingTurnPersistence(correlationId, true); void dispatchCompletion.finally(() => { persistResolvedTerminalState(); }); @@ -1767,9 +1794,45 @@ export class DkgChannelPlugin { ...(opts?.failureReason != null ? { failureReason: opts.failureReason } : {}), }, ); + await this.markExternalTurnPersistedAfterStore({ + sessionKey: opts?.sessionKey, + turnId: opts?.turnId ?? correlationId, + user: userMessage, + assistant: assistantReply, + correlationId, + }); this.api?.logger.info?.(`[dkg-channel] Turn persisted to DKG graph: ${correlationId}`); } + private async markExternalTurnPersistedAfterStore(opts: { + sessionKey?: string; + turnId: string; + user: string; + assistant: string; + correlationId: string; + }): Promise { + if (!this.chatTurnWriter) return; + for (let attempt = 0; attempt < 2; attempt++) { + try { + await this.chatTurnWriter.markExternalTurnPersistedDurable({ + sessionKey: opts.sessionKey, + turnId: opts.turnId, + user: opts.user, + assistant: opts.assistant, + }); + return; + } catch (err: any) { + if (attempt === 0) { + await new Promise((resolve) => setTimeout(resolve, TURN_PERSIST_RETRY_DELAYS_MS[0])); + continue; + } + this.api?.logger.warn?.( + `[dkg-channel] Turn persisted but ChatTurnWriter marker failed for ${opts.correlationId}: ${err?.message ?? err}`, + ); + } + } + } + private queueTurnPersistence( userMessage: string, assistantReply: string, @@ -1778,7 +1841,11 @@ export class DkgChannelPlugin { opts?: PersistTurnOptions, allowDuringShutdown = false, ): void { - if (!this.canContinuePersistenceAttempt(allowDuringShutdown) || this.pendingTurnPersistence.has(correlationId)) return; + const existing = this.pendingTurnPersistence.get(correlationId); + if ( + !this.canContinuePersistenceAttempt(allowDuringShutdown) + || (existing && existing.attempt > 0) + ) return; const attemptPersist = (attempt: number): void => { if (!this.canContinuePersistenceAttempt(allowDuringShutdown)) return; diff --git a/packages/adapter-openclaw/src/DkgNodePlugin.ts b/packages/adapter-openclaw/src/DkgNodePlugin.ts index 79ec48282..d6f17bf57 100644 --- a/packages/adapter-openclaw/src/DkgNodePlugin.ts +++ b/packages/adapter-openclaw/src/DkgNodePlugin.ts @@ -706,6 +706,7 @@ export class DkgNodePlugin { this.chatTurnWriter = new ChatTurnWriter({ client: this.client, logger: api.logger, stateDir }); this.chatTurnWriterStateDir = stateDir; this.chatTurnWriterStateDirSource = stateDirSource; + this.channelPlugin?.setChatTurnWriter(this.chatTurnWriter); } @@ -1115,6 +1116,7 @@ export class DkgNodePlugin { if (!this.channelPlugin) { this.channelPlugin = new DkgChannelPlugin(channelConfig, this.client); } + this.channelPlugin.setChatTurnWriter(this.chatTurnWriter); this.channelPlugin.register(api); api.logger.info?.('[dkg] Channel module enabled — DKG UI bridge active'); } @@ -1516,6 +1518,7 @@ export class DkgNodePlugin { this.peerIdDeferredRetryTimer = null; } await this.channelPlugin?.stop(); + try { await this.chatTurnWriter?.flush(); } catch { /* best effort */ } } getClient(): DkgDaemonClient { diff --git a/packages/adapter-openclaw/src/types.ts b/packages/adapter-openclaw/src/types.ts index 5dd622e5a..20a88bb7f 100644 --- a/packages/adapter-openclaw/src/types.ts +++ b/packages/adapter-openclaw/src/types.ts @@ -136,6 +136,8 @@ export interface ChannelOutboundReply { text: string; /** Session-internal turn ID. */ turnId?: string; + /** OpenClaw transcript session key resolved for this channel dispatch. */ + sessionKey?: string; /** Tool calls made during this turn. */ toolCalls?: Array<{ name: string; args: Record; result: unknown }>; } diff --git a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts index 3fc83205e..b6a555579 100644 --- a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts +++ b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts @@ -447,6 +447,416 @@ describe("ChatTurnWriter", () => { newWriter.flushSync(); }); + it("T80 — W4b success durably writes the skip floor before the debounce window", async () => { + writer.onMessageReceived({ + sessionKey: "sk", + context: { channelId: "tg", content: "u1", messageId: "in-1" }, + } as any); + await writer.onMessageSent({ + sessionKey: "sk", + context: { channelId: "tg", content: "a1", success: true, messageId: "out-1" }, + } as any); + await flushMicrotasks(); + + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + expect((restarted as any).w4bSessionCounts.get("openclaw:tg:::sk")).toBe(1); + + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "u1" }, + { role: "assistant", content: "a1" }, + ], + }, { channelId: "tg", sessionKey: "sk" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(0); + restarted.flushSync(); + }); + + it("T81 — before_reset can use event payload identity and clears stale W4b state", async () => { + writer.onMessageReceived({ + sessionKey: "sk", + context: { channelId: "tg", content: "before reset", messageId: "in-1" }, + } as any); + await writer.onMessageSent({ + sessionKey: "sk", + context: { channelId: "tg", content: "old reply", success: true, messageId: "out-1" }, + } as any); + await flushMicrotasks(); + expect((writer as any).w4bSessionCounts.get("openclaw:tg:::sk")).toBe(1); + + mockClient.storeChatTurn.mockClear(); + await writer.onBeforeReset({ channelId: "tg", sessionKey: "sk" }); + expect((writer as any).w4bSessionCounts.get("openclaw:tg:::sk")).toBeUndefined(); + + writer.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "after reset" }, + { role: "assistant", content: "new reply" }, + ], + }, { channelId: "tg", sessionKey: "sk" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("after reset"); + }); + + it("T82 — durable external direct-channel marker prevents restart backfill by W4a", async () => { + await writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-1", + user: "node ui question", + assistant: "node ui answer", + }); + + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "node ui question", context: { Provider: "dkg-ui", DkgTurnId: "node-ui-corr-1" } }, + { role: "assistant", content: "node ui answer" }, + { role: "user", content: "telegram question" }, + { role: "assistant", content: "telegram answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("telegram question"); + restarted.flushSync(); + }); + + it("T83 — external marker write failure rolls back counts before retry", async () => { + const writeSpy = vi.spyOn(writer as any, "writeWatermarkFile") + .mockReturnValueOnce(false) + .mockReturnValueOnce(true); + + await expect(writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-rollback", + user: "rollback question", + assistant: "rollback answer", + })).rejects.toThrow("Failed to write external chat-turn marker"); + + await writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-rollback", + user: "rollback question", + assistant: "rollback answer", + }); + + const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); + const bucket: Map | undefined = (writer as any).externalTurnMarkers.get(externalCursorKey); + expect(Array.from(bucket?.values() ?? [])).toEqual([1, 1]); + writeSpy.mockRestore(); + }); + + it("T84 — external markers are correlation-bound, not content-only", async () => { + await writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-2", + user: "same question", + assistant: "same answer", + }); + + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "same question" }, + { role: "assistant", content: "same answer" }, + { role: "user", content: "same question", context: { Provider: "dkg-ui", DkgTurnId: "node-ui-corr-2" } }, + { role: "assistant", content: "same answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("same question"); + const expectedFirstPairTurnId = (restarted as any).deterministicTurnId( + (restarted as any).deriveSessionId({ channelId: "telegram", sessionKey: "agent:main:main" }), + "same question", + "same answer", + 0, + ); + const skippedSecondPairTurnId = (restarted as any).deterministicTurnId( + (restarted as any).deriveSessionId({ channelId: "telegram", sessionKey: "agent:main:main" }), + "same question", + "same answer", + 1, + ); + expect(mockClient.storeChatTurn.mock.calls[0][3]).toEqual({ turnId: expectedFirstPairTurnId }); + expect(mockClient.storeChatTurn.mock.calls[0][3]).not.toEqual({ turnId: skippedSecondPairTurnId }); + restarted.flushSync(); + }); + + it("T85 — external markers fall back to unique content only with direct-channel metadata", async () => { + await writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-unique-content", + user: "unique ui question", + assistant: "unique ui answer", + }); + + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "unique ui question", context: { Provider: "dkg-ui" } }, + { role: "assistant", content: "unique ui answer" }, + { role: "user", content: "telegram question" }, + { role: "assistant", content: "telegram answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("telegram question"); + restarted.flushSync(); + }); + + it("T86 — content fallback does not consume a unique non-direct channel pair", async () => { + await writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-stale-content", + user: "shared text", + assistant: "shared answer", + }); + + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "shared text" }, + { role: "assistant", content: "shared answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("shared text"); + restarted.flushSync(); + }); + + it("T91 — content fallback does not consume a direct pair with a mismatched explicit ID", async () => { + await writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-stale-id", + user: "same direct text", + assistant: "same direct answer", + }); + + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "same direct text", context: { Provider: "dkg-ui", DkgTurnId: "node-ui-corr-new-id" } }, + { role: "assistant", content: "same direct answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("same direct text"); + restarted.flushSync(); + }); + + it("T92 — content fallback is ambiguous when any same-content direct pair has an explicit ID", async () => { + await writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-ambiguous-content", + user: "ambiguous direct text", + assistant: "ambiguous direct answer", + }); + + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "ambiguous direct text", context: { Provider: "dkg-ui", DkgTurnId: "node-ui-corr-new-id" } }, + { role: "assistant", content: "ambiguous direct answer" }, + { role: "user", content: "ambiguous direct text", context: { Provider: "dkg-ui" } }, + { role: "assistant", content: "ambiguous direct answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(2); + expect(mockClient.storeChatTurn.mock.calls.map((call) => call[1])).toEqual([ + "ambiguous direct text", + "ambiguous direct text", + ]); + restarted.flushSync(); + }); + + it("T93 — blocked content fallback retires the stale content marker for later windows", async () => { + await writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-retired-content", + user: "retired direct text", + assistant: "retired direct answer", + }); + + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "retired direct text", context: { Provider: "dkg-ui", DkgTurnId: "node-ui-corr-new-id" } }, + { role: "assistant", content: "retired direct answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "retired direct text", context: { Provider: "dkg-ui", DkgTurnId: "node-ui-corr-new-id" } }, + { role: "assistant", content: "retired direct answer" }, + { role: "user", content: "retired direct text", context: { Provider: "dkg-ui" } }, + { role: "assistant", content: "retired direct answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("retired direct text"); + restarted.flushSync(); + }); + + it("T87 — ID marker does not skip a mixed direct and non-direct joined user side", async () => { + await writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-mixed", + user: "ui part", + assistant: "combined answer", + }); + + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "ui part", context: { Provider: "dkg-ui", DkgTurnId: "node-ui-corr-mixed" } }, + { role: "user", content: "telegram part" }, + { role: "assistant", content: "combined answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("ui part\ntelegram part"); + restarted.flushSync(); + }); + + it("T88 — one direct marker does not skip multiple collapsed direct users", async () => { + await writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-direct-collapse", + user: "first ui", + assistant: "shared ui answer", + }); + + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "first ui", context: { Provider: "dkg-ui", DkgTurnId: "node-ui-corr-direct-collapse" } }, + { role: "user", content: "second ui", context: { Provider: "dkg-ui" } }, + { role: "assistant", content: "shared ui answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("first ui\nsecond ui"); + restarted.flushSync(); + }); + + it("T89 — reset gate replays W4b inbound that arrives while pre-reset W4a work drains", async () => { + let releaseFirstPersist!: () => void; + let firstPersist = true; + mockClient.storeChatTurn.mockImplementation(async () => { + if (firstPersist) { + firstPersist = false; + await new Promise((resolve) => { releaseFirstPersist = resolve; }); + } + return undefined; + }); + + writer.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "before reset" }, + { role: "assistant", content: "old reply" }, + ], + }, { channelId: "tg", sessionKey: "sk" }); + await flushMicrotasks(); + + const resetPromise = writer.onBeforeReset({ channelId: "tg", sessionKey: "sk" }); + await flushMicrotasks(); + writer.onMessageReceived({ + sessionKey: "sk", + context: { channelId: "tg", content: "after reset", messageId: "in-after" }, + } as any); + + releaseFirstPersist(); + await resetPromise; + await flushMicrotasks(); + + mockClient.storeChatTurn.mockClear(); + await writer.onMessageSent({ + sessionKey: "sk", + context: { channelId: "tg", content: "new reply", success: true, messageId: "out-after" }, + } as any); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("after reset"); + }); + + it("T90 — setStateDir preserves destination external markers", async () => { + const destinationStateDir = fs.mkdtempSync(path.join(os.tmpdir(), "chatturnwriter-dest-")); + try { + const destination = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir: destinationStateDir }); + await destination.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-migrate", + user: "migrated ui question", + assistant: "migrated ui answer", + }); + destination.flushSync(); + + await writer.setStateDir(destinationStateDir); + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir: destinationStateDir }); + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "migrated ui question", context: { Provider: "dkg-ui", DkgTurnId: "node-ui-corr-migrate" } }, + { role: "assistant", content: "migrated ui answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(0); + restarted.flushSync(); + } finally { + fs.rmSync(destinationStateDir, { recursive: true, force: true }); + } + }); + it("T17 — disk file accepts the legacy number format for backward compat", async () => { // The pre-fix file contained `{ "sid": }` (watermark only). // Existing on-disk files MUST still load correctly to avoid losing diff --git a/packages/adapter-openclaw/test/dkg-channel.test.ts b/packages/adapter-openclaw/test/dkg-channel.test.ts index c12038005..a3eb8616a 100644 --- a/packages/adapter-openclaw/test/dkg-channel.test.ts +++ b/packages/adapter-openclaw/test/dkg-channel.test.ts @@ -1381,6 +1381,8 @@ describe('DkgChannelPlugin', () => { api.cfg = mockCfg; const storeCalls: unknown[][] = []; client.storeChatTurn = async (...args: unknown[]) => { storeCalls.push(args); return undefined as any; }; + const markExternalTurnPersistedDurable = vi.fn().mockResolvedValue(undefined); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); plugin.register(api); const reply = await plugin.processInbound('Hello', 'corr-1', 'owner'); @@ -1390,6 +1392,8 @@ describe('DkgChannelPlugin', () => { expect(dispatched).toMatchObject({ ctx: expect.objectContaining({ BodyForAgent: 'Hello', + DkgTurnId: 'corr-1', + CorrelationId: 'corr-1', SessionKey: 'session-1', }), cfg: mockCfg, @@ -1404,6 +1408,8 @@ describe('DkgChannelPlugin', () => { sessionKey: 'session-1', ctx: expect.objectContaining({ BodyForAgent: 'Hello', + DkgTurnId: 'corr-1', + CorrelationId: 'corr-1', From: 'owner', }), })); @@ -1493,6 +1499,8 @@ describe('DkgChannelPlugin', () => { api.cfg = mockCfg; const storeCalls: unknown[][] = []; client.storeChatTurn = async (...args: unknown[]) => { storeCalls.push(args); return undefined as any; }; + const markExternalTurnPersistedDurable = vi.fn().mockResolvedValue(undefined); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); plugin.register(api); await plugin.processInbound('User message', 'corr-persist', 'owner'); @@ -1505,6 +1513,12 @@ describe('DkgChannelPlugin', () => { 'Agent reply', { turnId: 'corr-persist' }, ]); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledWith({ + sessionKey: 'session-1', + turnId: 'corr-persist', + user: 'User message', + assistant: 'Agent reply', + }); }); it('processInbound should carry attachment refs into the runtime prompt and persist them with the turn', async () => { @@ -1674,6 +1688,42 @@ describe('DkgChannelPlugin', () => { } }); + it('processInbound should not retry the daemon write when only the ChatTurnWriter marker fails', async () => { + vi.useFakeTimers(); + try { + const { runtime } = makeMockRuntime({ + dispatchImpl: async (params) => { + await params.dispatcherOptions.deliver({ text: 'Persisted reply' }); + }, + }); + const mockCfg = { session: { dmScope: 'main' }, agents: {} }; + + const api = makeApi({ + logger: { info: trackFn(), warn: trackFn(), debug: trackFn() }, + } as any) as any; + api.runtime = runtime; + api.cfg = mockCfg; + const storeCalls: unknown[][] = []; + client.storeChatTurn = async (...args: unknown[]) => { storeCalls.push(args); return undefined as any; }; + const markExternalTurnPersistedDurable = vi.fn().mockRejectedValue(new Error('marker disk outage')); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); + plugin.register(api); + + await plugin.processInbound('Already stored', 'corr-marker-fail', 'owner'); + await vi.advanceTimersByTimeAsync(10); + expect(storeCalls).toHaveLength(1); + + await vi.advanceTimersByTimeAsync(250); + expect(storeCalls).toHaveLength(1); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(2); + expect(api.logger.warn.calls.some((call: unknown[]) => + String(call[0]).includes('Turn persisted but ChatTurnWriter marker failed for corr-marker-fail'), + )).toBe(true); + } finally { + vi.useRealTimers(); + } + }); + it('persistTurn should use separate sessionId for non-owner identities', async () => { const { runtime } = makeMockRuntime({ resolveAgentRouteImpl: () => ({ agentId: 'agent-1', sessionKey: 'session-1' }), @@ -1689,6 +1739,8 @@ describe('DkgChannelPlugin', () => { api.cfg = mockCfg; const storeCalls: unknown[][] = []; client.storeChatTurn = async (...args: unknown[]) => { storeCalls.push(args); return undefined as any; }; + const markExternalTurnPersistedDurable = vi.fn().mockResolvedValue(undefined); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); plugin.register(api); await plugin.processInbound('decide', 'corr-game', 'background-worker'); @@ -1930,6 +1982,8 @@ describe('DkgChannelPlugin', () => { api.cfg = mockCfg; const storeCalls: unknown[][] = []; client.storeChatTurn = async (...args: unknown[]) => { storeCalls.push(args); return undefined as any; }; + const markExternalTurnPersistedDurable = vi.fn().mockResolvedValue(undefined); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); plugin.register(api); const events: Array<{ type: string; delta?: string; text?: string; correlationId?: string }> = []; @@ -1944,6 +1998,8 @@ describe('DkgChannelPlugin', () => { CommandBody: 'Hello', BodyForCommands: 'Hello', AttachmentRefs: attachmentRefs, + DkgTurnId: 'corr-stream-runtime', + CorrelationId: 'corr-stream-runtime', SessionKey: 'session-1', }), cfg: mockCfg, @@ -1964,6 +2020,12 @@ describe('DkgChannelPlugin', () => { 'Streamed reply', { turnId: 'corr-stream-runtime', attachmentRefs }, ]); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledWith({ + sessionKey: 'session-1', + turnId: 'corr-stream-runtime', + user: 'Hello', + assistant: 'Streamed reply', + }); }); it('processInboundStream should wait for a still-running dispatch to settle before persisting a closed stream', async () => { @@ -2005,6 +2067,53 @@ describe('DkgChannelPlugin', () => { ]); }); + it('stop should drain a disconnected stream whose dispatch has not settled yet', async () => { + let resumeDispatch!: () => void; + const { runtime } = makeMockRuntime({ + dispatchImpl: async (params) => { + await params.dispatcherOptions.deliver({ text: 'Partial ' }); + await new Promise((resolve) => { resumeDispatch = resolve; }); + await params.dispatcherOptions.deliver({ text: 'reply' }); + }, + }); + const mockCfg = { session: { dmScope: 'main' }, agents: {} }; + + const api = makeApi() as any; + api.runtime = runtime; + api.cfg = mockCfg; + const storeCalls: unknown[][] = []; + client.storeChatTurn = async (...args: unknown[]) => { storeCalls.push(args); return undefined as any; }; + const markExternalTurnPersistedDurable = vi.fn().mockResolvedValue(undefined); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); + plugin.register(api); + + const stream = plugin.processInboundStream('Hello', 'corr-stream-cancel-stop', 'owner'); + await expect(stream.next()).resolves.toEqual({ + done: false, + value: { type: 'text_delta', delta: 'Partial ' }, + }); + await expect(stream.return(undefined)).resolves.toEqual({ + done: true, + value: undefined, + }); + + const stopPromise = plugin.stop(); + let stopSettled = false; + void stopPromise.then(() => { stopSettled = true; }); + await Promise.resolve(); + expect(stopSettled).toBe(false); + + resumeDispatch(); + await stopPromise; + + expect(storeCalls[0]).toEqual([ + 'openclaw:dkg-ui', + 'Hello', + 'Partial reply', + { turnId: 'corr-stream-cancel-stop' }, + ]); + }); + it('processInboundStream should persist the completed reply when final completion was already queued before the consumer stopped iterating', async () => { const { runtime } = makeMockRuntime({ dispatchImpl: async (params) => { diff --git a/packages/adapter-openclaw/test/plugin.test.ts b/packages/adapter-openclaw/test/plugin.test.ts index 02127bf60..3ac741bf3 100644 --- a/packages/adapter-openclaw/test/plugin.test.ts +++ b/packages/adapter-openclaw/test/plugin.test.ts @@ -4341,6 +4341,7 @@ describe('DkgNodePlugin', () => { const channelPlugin = (plugin as any).channelPlugin as any; expect(channelPlugin).toBeDefined(); + expect(channelPlugin.chatTurnWriter).toBe((plugin as any).chatTurnWriter); // Simulate a dispatch scope by running the memorySessionResolver // lookup inside `channelPlugin.dispatchContext.run`, the same From c1df0c69df02645d8a56c2a53f366d09b0a9fd72 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Thu, 30 Apr 2026 20:53:48 +0200 Subject: [PATCH 02/14] fix(openclaw): scope reset and marker commits --- .../adapter-openclaw/src/ChatTurnWriter.ts | 17 +++--- .../test/ChatTurnWriter.test.ts | 55 +++++++++++++++++++ 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/packages/adapter-openclaw/src/ChatTurnWriter.ts b/packages/adapter-openclaw/src/ChatTurnWriter.ts index 3ddb67791..83a089677 100644 --- a/packages/adapter-openclaw/src/ChatTurnWriter.ts +++ b/packages/adapter-openclaw/src/ChatTurnWriter.ts @@ -690,7 +690,7 @@ export class ChatTurnWriter { for (const marker of markers) { this.restoreExternalTurnMarker(externalCursorKey, marker); } - if (!this.commitWatermarkStateSync()) { + if (!this.commitWatermarkStateSync(externalCursorKey)) { for (const marker of markers) { this.consumeExternalTurnMarker(externalCursorKey, marker); } @@ -1923,14 +1923,13 @@ export class ChatTurnWriter { const ids = new Set(); if (identity.sessionId) ids.add(identity.sessionId); if (!identity.channelId || !identity.sessionKey) return Array.from(ids); + if (typeof identity.accountId !== "string" || typeof identity.conversationId !== "string") { + return Array.from(ids); + } const expected = { channelId: this.encodeIdField(this.sanitize(identity.channelId)), - accountId: typeof identity.accountId === "string" - ? this.encodeIdField(this.sanitize(identity.accountId)) - : undefined, - conversationId: typeof identity.conversationId === "string" - ? this.encodeIdField(this.sanitize(identity.conversationId)) - : undefined, + accountId: this.encodeIdField(this.sanitize(identity.accountId)), + conversationId: this.encodeIdField(this.sanitize(identity.conversationId)), sessionKey: this.encodeIdField(this.sanitize(identity.sessionKey)), }; for (const candidate of this.collectKnownSessionIds()) { @@ -1938,8 +1937,8 @@ export class ChatTurnWriter { if (!parsed) continue; if (parsed.channelId !== expected.channelId) continue; if (parsed.sessionKey !== expected.sessionKey) continue; - if (expected.accountId !== undefined && parsed.accountId !== expected.accountId) continue; - if (expected.conversationId !== undefined && parsed.conversationId !== expected.conversationId) continue; + if (parsed.accountId !== expected.accountId) continue; + if (parsed.conversationId !== expected.conversationId) continue; ids.add(candidate); } return Array.from(ids); diff --git a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts index b6a555579..74926916c 100644 --- a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts +++ b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts @@ -504,6 +504,36 @@ describe("ChatTurnWriter", () => { expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("after reset"); }); + it("T95 — partial reset identity does not clear sibling thread state", async () => { + writer.onMessageReceived({ + sessionKey: "sk", + context: { + channelId: "tg", + accountId: "acct", + conversationId: "thread-2", + content: "sibling question", + messageId: "sibling-in", + }, + } as any); + + await writer.onBeforeReset({ channelId: "tg", sessionKey: "sk" }); + await writer.onMessageSent({ + sessionKey: "sk", + context: { + channelId: "tg", + accountId: "acct", + conversationId: "thread-2", + content: "sibling answer", + success: true, + messageId: "sibling-out", + }, + } as any); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("sibling question"); + }); + it("T82 — durable external direct-channel marker prevents restart backfill by W4a", async () => { await writer.markExternalTurnPersistedDurable({ sessionKey: "agent:main:main", @@ -555,6 +585,31 @@ describe("ChatTurnWriter", () => { writeSpy.mockRestore(); }); + it("T94 — external marker write failure preserves unrelated debounce timers", async () => { + writer.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "pending question" }, + { role: "assistant", content: "pending answer" }, + ], + }, { channelId: "tg", sessionKey: "sk" }); + await flushMicrotasks(); + + const sessionId = (writer as any).deriveSessionId({ channelId: "tg", sessionKey: "sk" }); + expect((writer as any).debounceTimers.has(sessionId)).toBe(true); + + const writeSpy = vi.spyOn(writer as any, "writeWatermarkFile").mockReturnValueOnce(false); + await expect(writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-debounce", + user: "external question", + assistant: "external answer", + })).rejects.toThrow("Failed to write external chat-turn marker"); + + expect((writer as any).debounceTimers.has(sessionId)).toBe(true); + writeSpy.mockRestore(); + }); + it("T84 — external markers are correlation-bound, not content-only", async () => { await writer.markExternalTurnPersistedDurable({ sessionKey: "agent:main:main", From 417afa73bbeea38d21f110139fd1ef16ddd9c5b8 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Thu, 30 Apr 2026 21:23:00 +0200 Subject: [PATCH 03/14] fix(openclaw): address durable marker review gaps --- .../adapter-openclaw/src/ChatTurnWriter.ts | 55 ++++++--- .../test/ChatTurnWriter.test.ts | 104 +++++++++++++++++- 2 files changed, 140 insertions(+), 19 deletions(-) diff --git a/packages/adapter-openclaw/src/ChatTurnWriter.ts b/packages/adapter-openclaw/src/ChatTurnWriter.ts index 83a089677..90f43bcca 100644 --- a/packages/adapter-openclaw/src/ChatTurnWriter.ts +++ b/packages/adapter-openclaw/src/ChatTurnWriter.ts @@ -258,6 +258,7 @@ export class ChatTurnWriter { // concurrent persists keep their advances; nothing got wiped. const mergedWm = new Map(this.cachedWatermarks); const mergedBc = new Map(this.w4bSessionCounts); + const baseMarkers = this.cloneExternalTurnMarkers(this.externalTurnMarkers); const mergedMarkers = this.cloneExternalTurnMarkers(this.externalTurnMarkers); try { if (fs.existsSync(newWatermarkFilePath)) { @@ -320,8 +321,10 @@ export class ChatTurnWriter { // T45 — Commit by union-merging back into live. If a concurrent // persist advanced live's watermark during the write window, // its increment is preserved (max takes the higher of merged- - // from-destination and post-flush-live). If no concurrent - // persist arrived, live ends up exactly equal to mergedWm/Bc. + // from-destination and post-flush-live). External markers are + // multiplicities, so only add the destination delta beyond the + // baseline snapshot; adding the whole merged snapshot would + // double-count markers that were already live before migration. for (const [key, val] of mergedWm) { this.cachedWatermarks.set(key, Math.max(this.cachedWatermarks.get(key) ?? -1, val)); } @@ -331,7 +334,9 @@ export class ChatTurnWriter { for (const [key, markers] of mergedMarkers) { const live = this.externalTurnMarkers.get(key) ?? new Map(); for (const [marker, count] of markers) { - live.set(marker, Math.max(live.get(marker) ?? 0, count)); + const baseCount = baseMarkers.get(key)?.get(marker) ?? 0; + const delta = count - baseCount; + if (delta > 0) live.set(marker, (live.get(marker) ?? 0) + delta); } if (live.size > 0) this.externalTurnMarkers.set(key, live); } @@ -681,10 +686,8 @@ export class ChatTurnWriter { assistant: string; }): Promise { const externalCursorKey = this.externalCursorKeyFromSessionKey(opts.sessionKey); - const assistant = this.stripRecalledMemory(opts.assistant); const markers = [ this.externalTurnMarkerId(opts.turnId), - this.externalTurnContentMarkerKey(opts.user, assistant), ].filter(Boolean); if (!externalCursorKey || markers.length === 0) return; for (const marker of markers) { @@ -997,6 +1000,7 @@ export class ChatTurnWriter { // still -1, savedUpTo computes to -1, computeDelta // emits everything). if (!this.commitWatermarkStateSync(sessionId)) { + this.scheduleWatermarkFlush(sessionId, { retryOnFailure: true, attempts: 3 }); throw new Error("Failed to write W4b chat-turn watermark"); } } @@ -1623,7 +1627,8 @@ export class ChatTurnWriter { const contentMarker = this.externalTurnContentMarkerKey(user, assistant); if ( - externalDirect + this.allowsExternalContentFallback(sessionKeyCursor) + && externalDirect && turnIds.length === 0 && contentMarker && contentMatchCounts.get(contentMarker) === 1 @@ -1634,7 +1639,8 @@ export class ChatTurnWriter { } if ( - externalDirect + this.allowsExternalContentFallback(sessionKeyCursor) + && externalDirect && contentMarker && (turnIds.length > 0 || (contentMatchCounts.get(contentMarker) ?? 0) > 1) ) { @@ -1643,6 +1649,10 @@ export class ChatTurnWriter { return { skip: false, markers: consumed }; } + private allowsExternalContentFallback(sessionKeyCursor: string): boolean { + return !sessionKeyCursor.startsWith("openclaw:transcript:"); + } + private consumeExternalTurnMarker(sessionKeyCursor: string, marker: string): boolean { const bucket = this.externalTurnMarkers.get(sessionKeyCursor); if (!bucket) return false; @@ -1696,7 +1706,7 @@ export class ChatTurnWriter { const bucket = target.get(key) ?? new Map(); for (const [marker, count] of Object.entries(markers)) { if (typeof count === "number" && count > 0) { - bucket.set(marker, Math.max(bucket.get(marker) ?? 0, count)); + bucket.set(marker, (bucket.get(marker) ?? 0) + count); } } if (bucket.size > 0) target.set(key, bucket); @@ -2055,15 +2065,32 @@ export class ChatTurnWriter { * T17 — Schedule a debounced watermark-file flush WITHOUT changing * the pending watermark value. Used by W4b's `w4bSessionCounts` * increment so the new count lands on disk via the same file write - * that watermark updates use. If a flush is already scheduled, no-op - * — it will pick up the new w4bCount when it fires. + * that watermark updates use. Retry flushes may take over an existing + * non-retry debounce timer while preserving that timer's pending + * watermark index. */ - private scheduleWatermarkFlush(sessionId: string): void { - if (this.debounceTimers.has(sessionId)) return; - const currentWatermark = this.cachedWatermarks.get(sessionId) ?? -1; + private scheduleWatermarkFlush( + sessionId: string, + opts: { retryOnFailure?: boolean; attempts?: number; pendingIndex?: number } = {}, + ): void { + const existing = this.debounceTimers.get(sessionId); + if (existing) { + if (!opts.retryOnFailure) return; + clearTimeout(existing.timer); + this.debounceTimers.delete(sessionId); + opts = { ...opts, pendingIndex: existing.pendingIndex }; + } + const currentWatermark = opts.pendingIndex ?? this.cachedWatermarks.get(sessionId) ?? -1; const timer = setTimeout(() => { - this.writeWatermarkFile(); this.debounceTimers.delete(sessionId); + this.cachedWatermarks.set(sessionId, currentWatermark); + const wrote = this.writeWatermarkFile(); + if (!wrote && opts.retryOnFailure && (opts.attempts ?? 1) > 1) { + this.scheduleWatermarkFlush(sessionId, { + retryOnFailure: true, + attempts: (opts.attempts ?? 1) - 1, + }); + } }, 50); this.debounceTimers.set(sessionId, { timer, pendingIndex: currentWatermark }); } diff --git a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts index 74926916c..42c1bb602 100644 --- a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts +++ b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts @@ -475,6 +475,55 @@ describe("ChatTurnWriter", () => { restarted.flushSync(); }); + it("T96 - W4b durable write failure retries state flush after daemon success", async () => { + const writeSpy = vi.spyOn(writer as any, "writeWatermarkFile") + .mockImplementationOnce(() => false); + + writer.onMessageReceived({ + sessionKey: "sk", + context: { channelId: "tg", content: "retry q", messageId: "in-retry" }, + } as any); + await writer.onMessageSent({ + sessionKey: "sk", + context: { channelId: "tg", content: "retry a", success: true, messageId: "out-retry" }, + } as any); + await new Promise((resolve) => setTimeout(resolve, 90)); + + expect(writeSpy).toHaveBeenCalledTimes(2); + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + expect((restarted as any).w4bSessionCounts.get("openclaw:tg:::sk")).toBe(1); + writeSpy.mockRestore(); + restarted.flushSync(); + }); + + it("T98 - W4b durable retry upgrades an existing normal debounce flush", async () => { + const sessionId = "openclaw:tg:::sk"; + (writer as any).saveWatermark(sessionId, 0); + const commitSpy = vi.spyOn(writer as any, "commitWatermarkStateSync") + .mockReturnValue(false); + const writeSpy = vi.spyOn(writer as any, "writeWatermarkFile") + .mockImplementationOnce(() => false); + + writer.onMessageReceived({ + sessionKey: "sk", + context: { channelId: "tg", content: "pending retry q", messageId: "in-retry-pending" }, + } as any); + await writer.onMessageSent({ + sessionKey: "sk", + context: { channelId: "tg", content: "pending retry a", success: true, messageId: "out-retry-pending" }, + } as any); + await new Promise((resolve) => setTimeout(resolve, 130)); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + expect(writeSpy).toHaveBeenCalledTimes(2); + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + expect((restarted as any).cachedWatermarks.get(sessionId)).toBe(0); + expect((restarted as any).w4bSessionCounts.get(sessionId)).toBe(1); + commitSpy.mockRestore(); + writeSpy.mockRestore(); + restarted.flushSync(); + }); + it("T81 — before_reset can use event payload identity and clears stale W4b state", async () => { writer.onMessageReceived({ sessionKey: "sk", @@ -581,7 +630,7 @@ describe("ChatTurnWriter", () => { const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); const bucket: Map | undefined = (writer as any).externalTurnMarkers.get(externalCursorKey); - expect(Array.from(bucket?.values() ?? [])).toEqual([1, 1]); + expect(Array.from(bucket?.values() ?? [])).toEqual([1]); writeSpy.mockRestore(); }); @@ -650,13 +699,17 @@ describe("ChatTurnWriter", () => { restarted.flushSync(); }); - it("T85 — external markers fall back to unique content only with direct-channel metadata", async () => { + it("T85 - session-key external markers do not content-fallback without an exact ID", async () => { await writer.markExternalTurnPersistedDurable({ sessionKey: "agent:main:main", turnId: "node-ui-corr-unique-content", user: "unique ui question", assistant: "unique ui answer", }); + const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); + const contentMarker = (writer as any).externalTurnContentMarkerKey("unique ui question", "unique ui answer"); + (writer as any).restoreExternalTurnMarker(externalCursorKey, contentMarker); + (writer as any).writeWatermarkFile(); const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); mockClient.storeChatTurn.mockClear(); @@ -671,8 +724,11 @@ describe("ChatTurnWriter", () => { }, { channelId: "telegram", sessionKey: "agent:main:main" }); await flushMicrotasks(); - expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); - expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("telegram question"); + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(2); + expect(mockClient.storeChatTurn.mock.calls.map((call) => call[1])).toEqual([ + "unique ui question", + "telegram question", + ]); restarted.flushSync(); }); @@ -707,6 +763,10 @@ describe("ChatTurnWriter", () => { user: "same direct text", assistant: "same direct answer", }); + const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); + const contentMarker = (writer as any).externalTurnContentMarkerKey("same direct text", "same direct answer"); + (writer as any).restoreExternalTurnMarker(externalCursorKey, contentMarker); + (writer as any).writeWatermarkFile(); const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); mockClient.storeChatTurn.mockClear(); @@ -731,6 +791,10 @@ describe("ChatTurnWriter", () => { user: "ambiguous direct text", assistant: "ambiguous direct answer", }); + const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); + const contentMarker = (writer as any).externalTurnContentMarkerKey("ambiguous direct text", "ambiguous direct answer"); + (writer as any).restoreExternalTurnMarker(externalCursorKey, contentMarker); + (writer as any).writeWatermarkFile(); const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); mockClient.storeChatTurn.mockClear(); @@ -753,13 +817,17 @@ describe("ChatTurnWriter", () => { restarted.flushSync(); }); - it("T93 — blocked content fallback retires the stale content marker for later windows", async () => { + it("T93 - session-key content markers cannot skip later ID-less windows", async () => { await writer.markExternalTurnPersistedDurable({ sessionKey: "agent:main:main", turnId: "node-ui-corr-retired-content", user: "retired direct text", assistant: "retired direct answer", }); + const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); + const contentMarker = (writer as any).externalTurnContentMarkerKey("retired direct text", "retired direct answer"); + (writer as any).restoreExternalTurnMarker(externalCursorKey, contentMarker); + (writer as any).writeWatermarkFile(); const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); restarted.onAgentEnd({ @@ -912,6 +980,32 @@ describe("ChatTurnWriter", () => { } }); + it("T97 - setStateDir adds external marker multiplicities", async () => { + const destinationStateDir = fs.mkdtempSync(path.join(os.tmpdir(), "chatturnwriter-dest-counts-")); + try { + const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); + const marker = (writer as any).externalTurnMarkerId("node-ui-corr-counted"); + (writer as any).restoreExternalTurnMarker(externalCursorKey, marker); + (writer as any).writeWatermarkFile(); + + const destination = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir: destinationStateDir }); + (destination as any).restoreExternalTurnMarker(externalCursorKey, marker); + (destination as any).writeWatermarkFile(); + + await writer.setStateDir(destinationStateDir); + + const bucket: Map | undefined = (writer as any).externalTurnMarkers.get(externalCursorKey); + expect(bucket?.get(marker)).toBe(2); + const persisted = JSON.parse(fs.readFileSync( + path.join(destinationStateDir, "dkg-adapter", "chat-turn-watermarks.json"), + "utf-8", + )); + expect(persisted[externalCursorKey].m[marker]).toBe(2); + } finally { + fs.rmSync(destinationStateDir, { recursive: true, force: true }); + } + }); + it("T17 — disk file accepts the legacy number format for backward compat", async () => { // The pre-fix file contained `{ "sid": }` (watermark only). // Existing on-disk files MUST still load correctly to avoid losing From c090a81f91d47825f37d48aec43f977d9c723d1e Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Thu, 30 Apr 2026 21:42:35 +0200 Subject: [PATCH 04/14] fix(openclaw): harden direct marker retry semantics --- .../adapter-openclaw/src/ChatTurnWriter.ts | 14 +- .../adapter-openclaw/src/DkgChannelPlugin.ts | 122 ++++++++++++++---- .../test/ChatTurnWriter.test.ts | 35 +++++ .../adapter-openclaw/test/dkg-channel.test.ts | 17 ++- 4 files changed, 156 insertions(+), 32 deletions(-) diff --git a/packages/adapter-openclaw/src/ChatTurnWriter.ts b/packages/adapter-openclaw/src/ChatTurnWriter.ts index 90f43bcca..c998d8b9c 100644 --- a/packages/adapter-openclaw/src/ChatTurnWriter.ts +++ b/packages/adapter-openclaw/src/ChatTurnWriter.ts @@ -716,7 +716,7 @@ export class ChatTurnWriter { externalCursorKey?: string; }): Promise { const sessionIds = this.collectResetSessionIds(identity); - if (sessionIds.length === 0 && !identity.externalCursorKey) return; + if (sessionIds.length === 0) return; let startReset!: () => void; const reset = new Promise((resolve, reject) => { startReset = () => { @@ -732,7 +732,7 @@ export class ChatTurnWriter { await chain.catch(() => undefined); } } - await this.resetSessionState(sessionIds, identity.externalCursorKey); + await this.resetSessionState(sessionIds); })().then(resolve, reject); }; }); @@ -763,9 +763,9 @@ export class ChatTurnWriter { * `persistOne` calls `saveWatermark(0)`, leaving stale state for the next * `agent_end` against a smaller post-compaction array. */ - private async resetSessionState(sessionIds: string[] | string, externalCursorKey?: string): Promise { + private async resetSessionState(sessionIds: string[] | string): Promise { const ids = Array.isArray(sessionIds) ? sessionIds : [sessionIds].filter(Boolean); - if (ids.length === 0 && !externalCursorKey) return; + if (ids.length === 0) return; for (const sessionId of ids) { const inFlight = this.inFlightPersists.get(sessionId); if (inFlight && inFlight.size > 0) { @@ -798,9 +798,9 @@ export class ChatTurnWriter { // count would skip new pairs in `computeDelta`. this.w4bSessionCounts.delete(sessionId); } - if (externalCursorKey) { - this.externalTurnMarkers.delete(externalCursorKey); - } + // External markers record daemon-success facts from direct-channel + // persists. Preserve them across reset/compaction so the reset W4a replay + // can still consume the marker instead of duplicating the stored UI turn. this.writeWatermarkFile(); } diff --git a/packages/adapter-openclaw/src/DkgChannelPlugin.ts b/packages/adapter-openclaw/src/DkgChannelPlugin.ts index 9e297391a..6d188b340 100644 --- a/packages/adapter-openclaw/src/DkgChannelPlugin.ts +++ b/packages/adapter-openclaw/src/DkgChannelPlugin.ts @@ -361,6 +361,11 @@ export class DkgChannelPlugin { timer: ReturnType | null; allowDuringShutdown: boolean; }>(); + private readonly pendingMarkerPersistence = new Map | null; + allowDuringShutdown: boolean; + }>(); /** * Per-dispatch AsyncLocalStorage holding the UI-selected project * context graph for the currently-running turn. Populated by @@ -634,6 +639,12 @@ export class DkgChannelPlugin { clearTimeout(job.timer); this.deletePendingTurnPersistence(id); } + for (const [id, job] of this.pendingMarkerPersistence) { + if (job.allowDuringShutdown) continue; + if (!job.timer) continue; + clearTimeout(job.timer); + this.deletePendingMarkerPersistence(id); + } if (this.serverStart) { await this.serverStart.catch(() => {}); @@ -652,6 +663,7 @@ export class DkgChannelPlugin { `[dkg-channel] Channel stop timed out after ${STOP_DRAIN_TIMEOUT_MS}ms waiting for turn persistence to drain; continuing shutdown`, ); this.clearPendingTurnPersistence(); + this.clearPendingMarkerPersistence(); } this.stopDrainDeadlineAt = null; if (updateGatewayStatus) { @@ -895,15 +907,39 @@ export class DkgChannelPlugin { this.notifyStopIdle(); } + private deletePendingMarkerPersistence(correlationId: string): void { + const job = this.pendingMarkerPersistence.get(correlationId); + if (job?.timer) clearTimeout(job.timer); + this.pendingMarkerPersistence.delete(correlationId); + this.notifyStopIdle(); + } + + private clearPendingMarkerPersistence(): void { + for (const job of this.pendingMarkerPersistence.values()) { + if (job.timer) clearTimeout(job.timer); + } + this.pendingMarkerPersistence.clear(); + this.notifyStopIdle(); + } + private notifyStopIdle(): void { - if (!this.stopping || this.inFlight > 0 || this.pendingTurnPersistence.size > 0) return; + if ( + !this.stopping + || this.inFlight > 0 + || this.pendingTurnPersistence.size > 0 + || this.pendingMarkerPersistence.size > 0 + ) return; while (this.stopWaiters.length > 0) { this.stopWaiters.shift()?.(); } } private waitForStopDrain(timeoutMs: number): Promise { - if (this.inFlight === 0 && this.pendingTurnPersistence.size === 0) { + if ( + this.inFlight === 0 + && this.pendingTurnPersistence.size === 0 + && this.pendingMarkerPersistence.size === 0 + ) { return Promise.resolve(true); } return new Promise((resolve) => { @@ -1777,6 +1813,7 @@ export class DkgChannelPlugin { correlationId: string, identity: string, opts?: PersistTurnOptions, + allowDuringShutdown = false, ): Promise { // Non-owner identities (e.g. background workers) get their own session // so they don't pollute the user's DKG UI chat history. @@ -1800,7 +1837,7 @@ export class DkgChannelPlugin { user: userMessage, assistant: assistantReply, correlationId, - }); + }, allowDuringShutdown); this.api?.logger.info?.(`[dkg-channel] Turn persisted to DKG graph: ${correlationId}`); } @@ -1810,27 +1847,68 @@ export class DkgChannelPlugin { user: string; assistant: string; correlationId: string; - }): Promise { + }, allowDuringShutdown: boolean): Promise { if (!this.chatTurnWriter) return; - for (let attempt = 0; attempt < 2; attempt++) { - try { - await this.chatTurnWriter.markExternalTurnPersistedDurable({ - sessionKey: opts.sessionKey, - turnId: opts.turnId, - user: opts.user, - assistant: opts.assistant, - }); + try { + await this.writeExternalTurnMarker(opts); + this.deletePendingMarkerPersistence(opts.correlationId); + } catch (err: any) { + this.scheduleExternalTurnMarkerRetry(opts, 1, allowDuringShutdown, err); + } + } + + private async writeExternalTurnMarker(opts: { + sessionKey?: string; + turnId: string; + user: string; + assistant: string; + }): Promise { + await this.chatTurnWriter?.markExternalTurnPersistedDurable({ + sessionKey: opts.sessionKey, + turnId: opts.turnId, + user: opts.user, + assistant: opts.assistant, + }); + } + + private scheduleExternalTurnMarkerRetry( + opts: { + sessionKey?: string; + turnId: string; + user: string; + assistant: string; + correlationId: string; + }, + attempt: number, + allowDuringShutdown: boolean, + err: any, + ): void { + if (!this.chatTurnWriter || !this.canContinuePersistenceAttempt(allowDuringShutdown)) { + this.deletePendingMarkerPersistence(opts.correlationId); + return; + } + const retryDelayMs = TURN_PERSIST_RETRY_DELAYS_MS[Math.min(attempt - 1, TURN_PERSIST_RETRY_DELAYS_MS.length - 1)]; + this.api?.logger.warn?.( + `[dkg-channel] Turn persisted but ChatTurnWriter marker failed for ${opts.correlationId}; retrying marker in ${retryDelayMs}ms: ${err?.message ?? err}`, + ); + const existing = this.pendingMarkerPersistence.get(opts.correlationId); + if (existing?.timer) clearTimeout(existing.timer); + const timer = setTimeout(() => { + if (!this.chatTurnWriter || !this.canContinuePersistenceAttempt(allowDuringShutdown)) { + this.deletePendingMarkerPersistence(opts.correlationId); return; - } catch (err: any) { - if (attempt === 0) { - await new Promise((resolve) => setTimeout(resolve, TURN_PERSIST_RETRY_DELAYS_MS[0])); - continue; - } - this.api?.logger.warn?.( - `[dkg-channel] Turn persisted but ChatTurnWriter marker failed for ${opts.correlationId}: ${err?.message ?? err}`, - ); } - } + this.pendingMarkerPersistence.set(opts.correlationId, { attempt: attempt + 1, timer: null, allowDuringShutdown }); + void this.writeExternalTurnMarker(opts) + .then(() => { + this.deletePendingMarkerPersistence(opts.correlationId); + }) + .catch((nextErr: any) => { + this.scheduleExternalTurnMarkerRetry(opts, attempt + 1, allowDuringShutdown, nextErr); + }); + }, retryDelayMs); + this.pendingMarkerPersistence.set(opts.correlationId, { attempt, timer, allowDuringShutdown }); + this.notifyStopIdle(); } private queueTurnPersistence( @@ -1850,7 +1928,7 @@ export class DkgChannelPlugin { const attemptPersist = (attempt: number): void => { if (!this.canContinuePersistenceAttempt(allowDuringShutdown)) return; this.pendingTurnPersistence.set(correlationId, { attempt, timer: null, allowDuringShutdown }); - void this.persistTurn(userMessage, assistantReply, correlationId, identity, opts) + void this.persistTurn(userMessage, assistantReply, correlationId, identity, opts, allowDuringShutdown) .then(() => { this.deletePendingTurnPersistence(correlationId); }) diff --git a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts index 42c1bb602..c127501f1 100644 --- a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts +++ b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts @@ -609,6 +609,41 @@ describe("ChatTurnWriter", () => { restarted.flushSync(); }); + for (const hookName of ["onBeforeReset", "onBeforeCompaction"] as const) { + it(`T99 - ${hookName} preserves durable external markers for replay dedupe`, async () => { + const turnId = `node-ui-corr-${hookName}`; + await writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId, + user: "reset ui question", + assistant: "reset ui answer", + }); + await writer[hookName]({ channelId: "telegram", sessionKey: "agent:main:main" }); + + const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); + const marker = (writer as any).externalTurnMarkerId(turnId); + const persisted = JSON.parse(fs.readFileSync( + path.join(stateDir, "dkg-adapter", "chat-turn-watermarks.json"), + "utf-8", + )); + expect(persisted[externalCursorKey].m[marker]).toBe(1); + + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "reset ui question", context: { Provider: "dkg-ui", DkgTurnId: turnId } }, + { role: "assistant", content: "reset ui answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(0); + restarted.flushSync(); + }); + } + it("T83 — external marker write failure rolls back counts before retry", async () => { const writeSpy = vi.spyOn(writer as any, "writeWatermarkFile") .mockReturnValueOnce(false) diff --git a/packages/adapter-openclaw/test/dkg-channel.test.ts b/packages/adapter-openclaw/test/dkg-channel.test.ts index a3eb8616a..63116d50c 100644 --- a/packages/adapter-openclaw/test/dkg-channel.test.ts +++ b/packages/adapter-openclaw/test/dkg-channel.test.ts @@ -1688,7 +1688,7 @@ describe('DkgChannelPlugin', () => { } }); - it('processInbound should not retry the daemon write when only the ChatTurnWriter marker fails', async () => { + it('processInbound should retry only the ChatTurnWriter marker after daemon write succeeds', async () => { vi.useFakeTimers(); try { const { runtime } = makeMockRuntime({ @@ -1705,19 +1705,30 @@ describe('DkgChannelPlugin', () => { api.cfg = mockCfg; const storeCalls: unknown[][] = []; client.storeChatTurn = async (...args: unknown[]) => { storeCalls.push(args); return undefined as any; }; - const markExternalTurnPersistedDurable = vi.fn().mockRejectedValue(new Error('marker disk outage')); + const markExternalTurnPersistedDurable = vi.fn() + .mockRejectedValueOnce(new Error('marker disk outage')) + .mockRejectedValueOnce(new Error('marker disk outage again')) + .mockRejectedValueOnce(new Error('marker disk outage third')) + .mockResolvedValueOnce(undefined); plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); plugin.register(api); await plugin.processInbound('Already stored', 'corr-marker-fail', 'owner'); await vi.advanceTimersByTimeAsync(10); expect(storeCalls).toHaveLength(1); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(1); await vi.advanceTimersByTimeAsync(250); expect(storeCalls).toHaveLength(1); expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(2); + await vi.advanceTimersByTimeAsync(1_000); + expect(storeCalls).toHaveLength(1); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(3); + await vi.advanceTimersByTimeAsync(1_000); + expect(storeCalls).toHaveLength(1); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(4); expect(api.logger.warn.calls.some((call: unknown[]) => - String(call[0]).includes('Turn persisted but ChatTurnWriter marker failed for corr-marker-fail'), + String(call[0]).includes('retrying marker'), )).toBe(true); } finally { vi.useRealTimers(); From 85384992b3d517a560e15abacc24e073681bd320 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Thu, 30 Apr 2026 21:57:22 +0200 Subject: [PATCH 05/14] fix(openclaw): keep external id markers durable --- .../adapter-openclaw/src/ChatTurnWriter.ts | 91 ++----------------- .../test/ChatTurnWriter.test.ts | 42 ++++----- 2 files changed, 27 insertions(+), 106 deletions(-) diff --git a/packages/adapter-openclaw/src/ChatTurnWriter.ts b/packages/adapter-openclaw/src/ChatTurnWriter.ts index c998d8b9c..04359e421 100644 --- a/packages/adapter-openclaw/src/ChatTurnWriter.ts +++ b/packages/adapter-openclaw/src/ChatTurnWriter.ts @@ -43,6 +43,7 @@ interface ComputedChatTurnPair { interface ExternalMarkerAction { skip: boolean; markers: string[]; + rollbackMarkers: string[]; } interface WatermarkStateSnapshot { @@ -505,28 +506,21 @@ export class ChatTurnWriter { // point. Without sequencing, a failed middle pair could be skipped // when the tail succeeds. const lastIdx = pairs.length - 1; - const externalContentMatchCounts = externalCursorKey - ? this.externalContentMatchCounts(externalCursorKey, pairs) - : new Map(); const job = this.trackPersistJob(sessionId, async () => { for (let i = 0; i < pairs.length; i++) { - const { user, assistant, pairIndex, externalTurnIds, externalDirect } = pairs[i]; + const { user, assistant, pairIndex, externalTurnIds } = pairs[i]; if (!user && !assistant) continue; const externalMarkerAction = externalCursorKey ? this.consumeExternalTurnMarkersForPair( externalCursorKey, - user, - assistant, externalTurnIds, - externalDirect, - externalContentMatchCounts, ) - : { skip: false, markers: [] }; + : { skip: false, markers: [], rollbackMarkers: [] }; if (externalCursorKey && externalMarkerAction.markers.length > 0) { const watermarkSnapshot = this.snapshotWatermarkState(sessionId); if (externalMarkerAction.skip) this.bumpWatermark(sessionId, pairIndex); if (!this.commitWatermarkStateSync(sessionId)) { - for (const marker of externalMarkerAction.markers) { + for (const marker of externalMarkerAction.rollbackMarkers) { this.restoreExternalTurnMarker(externalCursorKey, marker); } this.restoreWatermarkState(sessionId, watermarkSnapshot); @@ -1582,75 +1576,22 @@ export class ChatTurnWriter { return `external-id::${createHash("sha256").update(turnId.trim()).digest("hex").slice(0, 16)}`; } - private externalTurnContentMarkerKey(user: string, assistant: string): string { - if (!user && !assistant) return ""; - return `external-content::${this.contentHash(user, assistant)}`; - } - - private externalContentMatchCounts( - sessionKeyCursor: string, - pairs: ComputedChatTurnPair[], - ): Map { - const bucket = this.externalTurnMarkers.get(sessionKeyCursor); - const counts = new Map(); - if (!bucket) return counts; - for (const pair of pairs) { - if (!pair.externalDirect) continue; - const marker = this.externalTurnContentMarkerKey(pair.user, pair.assistant); - if (marker && bucket.has(marker)) { - counts.set(marker, (counts.get(marker) ?? 0) + 1); - } - } - return counts; - } - private consumeExternalTurnMarkersForPair( sessionKeyCursor: string, - user: string, - assistant: string, turnIds: string[], - externalDirect: boolean, - contentMatchCounts: Map, ): ExternalMarkerAction { - const consumed: string[] = []; for (const turnId of turnIds) { const marker = this.externalTurnMarkerId(turnId); - if (marker && this.consumeExternalTurnMarker(sessionKeyCursor, marker)) { - consumed.push(marker); - const contentMarker = this.externalTurnContentMarkerKey(user, assistant); - if (contentMarker && this.consumeExternalTurnMarker(sessionKeyCursor, contentMarker)) { - consumed.push(contentMarker); - } - return { skip: true, markers: consumed }; + if (marker && this.hasExternalTurnMarker(sessionKeyCursor, marker)) { + return { skip: true, markers: [marker], rollbackMarkers: [] }; } } - - const contentMarker = this.externalTurnContentMarkerKey(user, assistant); - if ( - this.allowsExternalContentFallback(sessionKeyCursor) - && externalDirect - && turnIds.length === 0 - && contentMarker - && contentMatchCounts.get(contentMarker) === 1 - && this.consumeExternalTurnMarker(sessionKeyCursor, contentMarker) - ) { - consumed.push(contentMarker); - return { skip: true, markers: consumed }; - } - - if ( - this.allowsExternalContentFallback(sessionKeyCursor) - && externalDirect - && contentMarker - && (turnIds.length > 0 || (contentMatchCounts.get(contentMarker) ?? 0) > 1) - ) { - consumed.push(...this.retireExternalTurnMarker(sessionKeyCursor, contentMarker)); - } - return { skip: false, markers: consumed }; + return { skip: false, markers: [], rollbackMarkers: [] }; } - private allowsExternalContentFallback(sessionKeyCursor: string): boolean { - return !sessionKeyCursor.startsWith("openclaw:transcript:"); + private hasExternalTurnMarker(sessionKeyCursor: string, marker: string): boolean { + const bucket = this.externalTurnMarkers.get(sessionKeyCursor); + return (bucket?.get(marker) ?? 0) > 0; } private consumeExternalTurnMarker(sessionKeyCursor: string, marker: string): boolean { @@ -1669,18 +1610,6 @@ export class ChatTurnWriter { return true; } - private retireExternalTurnMarker(sessionKeyCursor: string, marker: string): string[] { - const bucket = this.externalTurnMarkers.get(sessionKeyCursor); - if (!bucket) return []; - const count = bucket.get(marker) ?? 0; - if (count <= 0) return []; - bucket.delete(marker); - if (bucket.size === 0) { - this.externalTurnMarkers.delete(sessionKeyCursor); - } - return Array.from({ length: count }, () => marker); - } - private restoreExternalTurnMarker(sessionKeyCursor: string, marker: string): void { if (!marker) return; const bucket = this.externalTurnMarkers.get(sessionKeyCursor) ?? new Map(); diff --git a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts index c127501f1..28a09de85 100644 --- a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts +++ b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts @@ -639,6 +639,18 @@ describe("ChatTurnWriter", () => { }, { channelId: "telegram", sessionKey: "agent:main:main" }); await flushMicrotasks(); + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(0); + await restarted[hookName]({ channelId: "telegram", sessionKey: "agent:main:main" }); + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "reset ui question", context: { Provider: "dkg-ui", DkgTurnId: turnId } }, + { role: "assistant", content: "reset ui answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(0); restarted.flushSync(); }); @@ -734,18 +746,13 @@ describe("ChatTurnWriter", () => { restarted.flushSync(); }); - it("T85 - session-key external markers do not content-fallback without an exact ID", async () => { + it("T85 - session-key external markers require an exact ID", async () => { await writer.markExternalTurnPersistedDurable({ sessionKey: "agent:main:main", turnId: "node-ui-corr-unique-content", user: "unique ui question", assistant: "unique ui answer", }); - const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); - const contentMarker = (writer as any).externalTurnContentMarkerKey("unique ui question", "unique ui answer"); - (writer as any).restoreExternalTurnMarker(externalCursorKey, contentMarker); - (writer as any).writeWatermarkFile(); - const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); mockClient.storeChatTurn.mockClear(); restarted.onAgentEnd({ @@ -767,7 +774,7 @@ describe("ChatTurnWriter", () => { restarted.flushSync(); }); - it("T86 — content fallback does not consume a unique non-direct channel pair", async () => { + it("T86 — ID-less non-direct channel pair is not skipped by an external marker", async () => { await writer.markExternalTurnPersistedDurable({ sessionKey: "agent:main:main", turnId: "node-ui-corr-stale-content", @@ -791,18 +798,13 @@ describe("ChatTurnWriter", () => { restarted.flushSync(); }); - it("T91 — content fallback does not consume a direct pair with a mismatched explicit ID", async () => { + it("T91 — exact external marker does not skip a direct pair with a mismatched explicit ID", async () => { await writer.markExternalTurnPersistedDurable({ sessionKey: "agent:main:main", turnId: "node-ui-corr-stale-id", user: "same direct text", assistant: "same direct answer", }); - const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); - const contentMarker = (writer as any).externalTurnContentMarkerKey("same direct text", "same direct answer"); - (writer as any).restoreExternalTurnMarker(externalCursorKey, contentMarker); - (writer as any).writeWatermarkFile(); - const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); mockClient.storeChatTurn.mockClear(); restarted.onAgentEnd({ @@ -819,18 +821,13 @@ describe("ChatTurnWriter", () => { restarted.flushSync(); }); - it("T92 — content fallback is ambiguous when any same-content direct pair has an explicit ID", async () => { + it("T92 — ID-less direct pair is not skipped without an exact external ID", async () => { await writer.markExternalTurnPersistedDurable({ sessionKey: "agent:main:main", turnId: "node-ui-corr-ambiguous-content", user: "ambiguous direct text", assistant: "ambiguous direct answer", }); - const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); - const contentMarker = (writer as any).externalTurnContentMarkerKey("ambiguous direct text", "ambiguous direct answer"); - (writer as any).restoreExternalTurnMarker(externalCursorKey, contentMarker); - (writer as any).writeWatermarkFile(); - const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); mockClient.storeChatTurn.mockClear(); restarted.onAgentEnd({ @@ -852,18 +849,13 @@ describe("ChatTurnWriter", () => { restarted.flushSync(); }); - it("T93 - session-key content markers cannot skip later ID-less windows", async () => { + it("T93 - exact external marker does not skip later ID-less windows", async () => { await writer.markExternalTurnPersistedDurable({ sessionKey: "agent:main:main", turnId: "node-ui-corr-retired-content", user: "retired direct text", assistant: "retired direct answer", }); - const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); - const contentMarker = (writer as any).externalTurnContentMarkerKey("retired direct text", "retired direct answer"); - (writer as any).restoreExternalTurnMarker(externalCursorKey, contentMarker); - (writer as any).writeWatermarkFile(); - const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); restarted.onAgentEnd({ sessionId: "test", From a7e1ced6a3cf7324fc1bdfdba933a0c006895d29 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Thu, 30 Apr 2026 22:14:06 +0200 Subject: [PATCH 06/14] fix(openclaw): snapshot pending watermarks on writes --- .../adapter-openclaw/src/ChatTurnWriter.ts | 14 ++++++++- .../test/ChatTurnWriter.test.ts | 31 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/packages/adapter-openclaw/src/ChatTurnWriter.ts b/packages/adapter-openclaw/src/ChatTurnWriter.ts index 04359e421..d97f7925b 100644 --- a/packages/adapter-openclaw/src/ChatTurnWriter.ts +++ b/packages/adapter-openclaw/src/ChatTurnWriter.ts @@ -1262,6 +1262,14 @@ export class ChatTurnWriter { return this.writeWatermarkFile(); } + private snapshotWatermarksForWrite(): Map { + const wm = new Map(this.cachedWatermarks); + for (const [key, entry] of this.debounceTimers.entries()) { + wm.set(key, entry.pendingIndex); + } + return wm; + } + private snapshotWatermarkState(sessionId: string): WatermarkStateSnapshot { return { cachedHad: this.cachedWatermarks.has(sessionId), @@ -2132,7 +2140,11 @@ export class ChatTurnWriter { // concurrent persist arriving during the merge+write window // doesn't get wiped on write failure, and the merged values // only become "the source of truth" once the write succeeded. - const wm = overrideMaps?.wm ?? this.cachedWatermarks; + // T100 - Normal writes serialize every pending debounce watermark + // into the durable snapshot without clearing unrelated timers. A + // scoped sync commit for one session must not write a stale cached + // watermark for another session that is still waiting on debounce. + const wm = overrideMaps?.wm ?? this.snapshotWatermarksForWrite(); const bc = overrideMaps?.bc ?? this.w4bSessionCounts; const markersByKey = overrideMaps?.markers ?? this.externalTurnMarkers; const allKeys = new Set([ diff --git a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts index 28a09de85..3db6531c1 100644 --- a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts +++ b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts @@ -524,6 +524,37 @@ describe("ChatTurnWriter", () => { restarted.flushSync(); }); + it("T100 - sync watermark writes include unrelated pending debounce snapshots", async () => { + const pendingSession = "openclaw:tg:::pending"; + const syncSession = "openclaw:tg:::sync"; + (writer as any).saveWatermark(pendingSession, 4); + (writer as any).saveWatermark(syncSession, 2); + (writer as any).w4bSessionCounts.set(syncSession, 1); + + expect((writer as any).debounceTimers.has(pendingSession)).toBe(true); + expect((writer as any).debounceTimers.has(syncSession)).toBe(true); + expect((writer as any).cachedWatermarks.has(pendingSession)).toBe(false); + expect((writer as any).commitWatermarkStateSync(syncSession)).toBe(true); + + const persisted = JSON.parse(fs.readFileSync( + path.join(stateDir, "dkg-adapter", "chat-turn-watermarks.json"), + "utf-8", + )); + expect(persisted[pendingSession].w).toBe(4); + expect(persisted[syncSession].w).toBe(2); + expect(persisted[syncSession].b).toBe(1); + expect((writer as any).debounceTimers.has(pendingSession)).toBe(true); + expect((writer as any).debounceTimers.has(syncSession)).toBe(false); + expect((writer as any).cachedWatermarks.has(pendingSession)).toBe(false); + expect((writer as any).cachedWatermarks.get(syncSession)).toBe(2); + + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + expect((restarted as any).cachedWatermarks.get(pendingSession)).toBe(4); + expect((restarted as any).cachedWatermarks.get(syncSession)).toBe(2); + expect((restarted as any).w4bSessionCounts.get(syncSession)).toBe(1); + restarted.flushSync(); + }); + it("T81 — before_reset can use event payload identity and clears stale W4b state", async () => { writer.onMessageReceived({ sessionKey: "sk", From cd147826079ae0f8518f0942660375d318014ee9 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Thu, 30 Apr 2026 22:28:44 +0200 Subject: [PATCH 07/14] fix(openclaw): avoid reset gate self-wait --- .../adapter-openclaw/src/ChatTurnWriter.ts | 12 +++++++- .../test/ChatTurnWriter.test.ts | 29 +++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/packages/adapter-openclaw/src/ChatTurnWriter.ts b/packages/adapter-openclaw/src/ChatTurnWriter.ts index d97f7925b..1ba89e1bf 100644 --- a/packages/adapter-openclaw/src/ChatTurnWriter.ts +++ b/packages/adapter-openclaw/src/ChatTurnWriter.ts @@ -711,6 +711,11 @@ export class ChatTurnWriter { }): Promise { const sessionIds = this.collectResetSessionIds(identity); if (sessionIds.length === 0) return; + const preResetChains = new Map>(); + for (const sessionId of sessionIds) { + const chain = this.w4aSessionChains.get(sessionId); + if (chain) preResetChains.set(sessionId, chain); + } let startReset!: () => void; const reset = new Promise((resolve, reject) => { startReset = () => { @@ -720,8 +725,10 @@ export class ChatTurnWriter { // scheduling time, so chain entries queued before this reset do // not wait on themselves, while new W4a/W4b/internal-hook work // that arrives after the gate is installed waits or replays. + // T101 - Await the pre-gate snapshot only; post-gate W4a work + // waits on this reset and must not become something reset awaits. for (const sessionId of sessionIds) { - const chain = this.w4aSessionChains.get(sessionId); + const chain = preResetChains.get(sessionId); if (chain) { await chain.catch(() => undefined); } @@ -1591,6 +1598,9 @@ export class ChatTurnWriter { for (const turnId of turnIds) { const marker = this.externalTurnMarkerId(turnId); if (marker && this.hasExternalTurnMarker(sessionKeyCursor, marker)) { + // Exact external markers are durable daemon-success facts, not + // one-shot tickets. Keep them for later reset/compaction replays + // until a future transcript-retention cursor can prove safe GC. return { skip: true, markers: [marker], rollbackMarkers: [] }; } } diff --git a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts index 3db6531c1..236d5d97f 100644 --- a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts +++ b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts @@ -584,6 +584,35 @@ describe("ChatTurnWriter", () => { expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("after reset"); }); + it("T101 - reset awaits only pre-gate W4a chains", async () => { + const sessionId = "openclaw:tg:::sk"; + const chains = (writer as any).w4aSessionChains as Map>; + const originalGet = chains.get.bind(chains); + let lookedUpPostGateChain = false; + (chains as any).get = (key: string) => { + if (key === sessionId && (writer as any).pendingResets.has(sessionId)) { + lookedUpPostGateChain = true; + const reset = (writer as any).pendingResets.get(sessionId) as Promise; + const postGateChain = reset.then(() => undefined); + chains.set(sessionId, postGateChain); + return postGateChain; + } + return originalGet(key); + }; + + try { + const result = await Promise.race([ + (writer as any).runReset({ sessionId }).then(() => "done"), + new Promise((resolve) => setTimeout(() => resolve("timeout"), 80)), + ]); + + expect(result).toBe("done"); + expect(lookedUpPostGateChain).toBe(false); + } finally { + delete (chains as any).get; + } + }); + it("T95 — partial reset identity does not clear sibling thread state", async () => { writer.onMessageReceived({ sessionKey: "sk", From c7fc08446f05b5b257a3f0478df0e62beddc94f7 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Thu, 30 Apr 2026 22:49:55 +0200 Subject: [PATCH 08/14] fix(openclaw): harden marker durability edges --- .../adapter-openclaw/src/ChatTurnWriter.ts | 40 +++++------ .../adapter-openclaw/src/DkgChannelPlugin.ts | 9 ++- .../test/ChatTurnWriter.test.ts | 72 +++++++++++++++++++ .../adapter-openclaw/test/dkg-channel.test.ts | 46 +++++++++++- 4 files changed, 139 insertions(+), 28 deletions(-) diff --git a/packages/adapter-openclaw/src/ChatTurnWriter.ts b/packages/adapter-openclaw/src/ChatTurnWriter.ts index 1ba89e1bf..03b3372a3 100644 --- a/packages/adapter-openclaw/src/ChatTurnWriter.ts +++ b/packages/adapter-openclaw/src/ChatTurnWriter.ts @@ -341,10 +341,23 @@ export class ChatTurnWriter { } if (live.size > 0) this.externalTurnMarkers.set(key, live); } + const finalDiskWm = this.snapshotWatermarksForWrite(); + wrote = this.writeWatermarkFile(newWatermarkFilePath, { + wm: finalDiskWm, + bc: this.w4bSessionCounts, + markers: this.externalTurnMarkers, + }); + if (!wrote) { + this.logger.warn?.( + "[ChatTurnWriter.setStateDir] Final post-commit rewrite at new path failed; preserving old path for retry.", + { newWatermarkFilePath }, + ); + } } - // T45 — On failure, live state is already untouched. No restore - // needed; concurrent persists' advances during the failed merge - // are preserved automatically. + // T45 - If the initial new-path write failed, live state is still + // untouched. If only the final post-union rewrite failed, live may + // hold merged state but the old path is preserved so a retry or + // normal flush can serialize it without switching to a stale file. if (wrote) { // Only NOW commit the swap. Subsequent normal writes via // `writeWatermarkFile()` (no explicit target) will hit the new @@ -362,27 +375,6 @@ export class ChatTurnWriter { // depends on them. this.stateDir = newStateDir; this.watermarkFilePath = newWatermarkFilePath; - // T54 — Final rewrite at the new path with the post-union live - // state. The earlier `writeWatermarkFile(newWatermarkFilePath, - // { wm: mergedWm, bc: mergedBc })` wrote a SNAPSHOT taken - // before the union; any late persist that fired between - // `flush()` returning and the union step landed in live but - // not in the file. Without this rewrite, a process crash - // before the next debounce flush would leave the new file - // stale, and the restarted writer would load the snapshot - // and replay turns the daemon already has (daemon does not - // dedup — ADR-002). Best-effort; if this write fails, live - // still has the unioned state in memory and the next debounce - // flush at this path catches up. Race window narrows from - // "merge+write+union" (multi-step) to "between this final - // rewrite and the next persist's debounce" (~50ms cap, same - // shape as the writer's normal durability gap). - try { this.writeWatermarkFile(); } catch (err) { - this.logger.warn?.( - "[ChatTurnWriter.setStateDir] Final post-commit rewrite at new path failed; next debounce flush will retry.", - { err, newWatermarkFilePath }, - ); - } } else { // T23/T27 — Internal state stays at the OLD path so a future // setStateDir(newStateDir) retry re-attempts the write. The diff --git a/packages/adapter-openclaw/src/DkgChannelPlugin.ts b/packages/adapter-openclaw/src/DkgChannelPlugin.ts index 6d188b340..eb499aa18 100644 --- a/packages/adapter-openclaw/src/DkgChannelPlugin.ts +++ b/packages/adapter-openclaw/src/DkgChannelPlugin.ts @@ -1887,7 +1887,14 @@ export class DkgChannelPlugin { this.deletePendingMarkerPersistence(opts.correlationId); return; } - const retryDelayMs = TURN_PERSIST_RETRY_DELAYS_MS[Math.min(attempt - 1, TURN_PERSIST_RETRY_DELAYS_MS.length - 1)]; + const retryDelayMs = TURN_PERSIST_RETRY_DELAYS_MS[attempt - 1]; + if (retryDelayMs == null) { + this.deletePendingMarkerPersistence(opts.correlationId); + this.api?.logger.warn?.( + `[dkg-channel] ChatTurnWriter marker failed permanently after ${attempt} retry attempt(s) for ${opts.correlationId}: ${err?.message ?? err}`, + ); + return; + } this.api?.logger.warn?.( `[dkg-channel] Turn persisted but ChatTurnWriter marker failed for ${opts.correlationId}; retrying marker in ${retryDelayMs}ms: ${err?.message ?? err}`, ); diff --git a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts index 236d5d97f..bb83e48ad 100644 --- a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts +++ b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts @@ -1093,6 +1093,78 @@ describe("ChatTurnWriter", () => { } }); + it("T102 - setStateDir final rewrite preserves concurrent external markers", async () => { + const destinationStateDir = fs.mkdtempSync(path.join(os.tmpdir(), "chatturnwriter-dest-marker-race-")); + try { + const newDir = path.join(destinationStateDir, "dkg-adapter"); + fs.mkdirSync(newDir, { recursive: true }); + const newFile = path.join(newDir, "chat-turn-watermarks.json"); + fs.writeFileSync(newFile, JSON.stringify({})); + + const dkw = writer as any; + const externalCursorKey = dkw.externalCursorKeyFromSessionKey("agent:main:main"); + const marker = dkw.externalTurnMarkerId("node-ui-corr-marker-race"); + const realWrite = dkw.writeWatermarkFile.bind(dkw); + const writeSpy = vi.spyOn(dkw, "writeWatermarkFile").mockImplementationOnce((target: string, override: any) => { + dkw.restoreExternalTurnMarker(externalCursorKey, marker); + return realWrite(target, override); + }); + + await writer.setStateDir(destinationStateDir); + + const persisted = JSON.parse(fs.readFileSync(newFile, "utf-8")); + expect(persisted[externalCursorKey].m[marker]).toBe(1); + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir: destinationStateDir }); + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "migrated ui question", context: { Provider: "dkg-ui", DkgTurnId: "node-ui-corr-marker-race" } }, + { role: "assistant", content: "migrated ui answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(0); + writeSpy.mockRestore(); + restarted.flushSync(); + } finally { + fs.rmSync(destinationStateDir, { recursive: true, force: true }); + } + }); + + it("T103 - setStateDir does not swap to a stale file when final marker rewrite fails", async () => { + const destinationStateDir = fs.mkdtempSync(path.join(os.tmpdir(), "chatturnwriter-dest-marker-fail-")); + try { + const newDir = path.join(destinationStateDir, "dkg-adapter"); + fs.mkdirSync(newDir, { recursive: true }); + fs.writeFileSync(path.join(newDir, "chat-turn-watermarks.json"), JSON.stringify({})); + + const dkw = writer as any; + const originalStateDir = dkw.stateDir; + const originalWatermarkFilePath = dkw.watermarkFilePath; + const externalCursorKey = dkw.externalCursorKeyFromSessionKey("agent:main:main"); + const marker = dkw.externalTurnMarkerId("node-ui-corr-marker-final-fail"); + const realWrite = dkw.writeWatermarkFile.bind(dkw); + const writeSpy = vi.spyOn(dkw, "writeWatermarkFile") + .mockImplementationOnce((target: string, override: any) => { + dkw.restoreExternalTurnMarker(externalCursorKey, marker); + return realWrite(target, override); + }) + .mockImplementationOnce(() => false); + + await writer.setStateDir(destinationStateDir); + + expect(dkw.stateDir).toBe(originalStateDir); + expect(dkw.watermarkFilePath).toBe(originalWatermarkFilePath); + expect(dkw.externalTurnMarkers.get(externalCursorKey)?.get(marker)).toBe(1); + expect(writeSpy).toHaveBeenCalledTimes(2); + writeSpy.mockRestore(); + } finally { + fs.rmSync(destinationStateDir, { recursive: true, force: true }); + } + }); + it("T17 — disk file accepts the legacy number format for backward compat", async () => { // The pre-fix file contained `{ "sid": }` (watermark only). // Existing on-disk files MUST still load correctly to avoid losing diff --git a/packages/adapter-openclaw/test/dkg-channel.test.ts b/packages/adapter-openclaw/test/dkg-channel.test.ts index 63116d50c..74426b6f8 100644 --- a/packages/adapter-openclaw/test/dkg-channel.test.ts +++ b/packages/adapter-openclaw/test/dkg-channel.test.ts @@ -1708,7 +1708,6 @@ describe('DkgChannelPlugin', () => { const markExternalTurnPersistedDurable = vi.fn() .mockRejectedValueOnce(new Error('marker disk outage')) .mockRejectedValueOnce(new Error('marker disk outage again')) - .mockRejectedValueOnce(new Error('marker disk outage third')) .mockResolvedValueOnce(undefined); plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); plugin.register(api); @@ -1724,11 +1723,52 @@ describe('DkgChannelPlugin', () => { await vi.advanceTimersByTimeAsync(1_000); expect(storeCalls).toHaveLength(1); expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(3); + expect(api.logger.warn.calls.some((call: unknown[]) => + String(call[0]).includes('retrying marker'), + )).toBe(true); + } finally { + vi.useRealTimers(); + } + }); + + it('processInbound caps ChatTurnWriter marker-only retries after daemon write succeeds', async () => { + vi.useFakeTimers(); + try { + const { runtime } = makeMockRuntime({ + dispatchImpl: async (params) => { + await params.dispatcherOptions.deliver({ text: 'Persisted reply' }); + }, + }); + const mockCfg = { session: { dmScope: 'main' }, agents: {} }; + + const api = makeApi({ + logger: { info: trackFn(), warn: trackFn(), debug: trackFn() }, + } as any) as any; + api.runtime = runtime; + api.cfg = mockCfg; + const storeCalls: unknown[][] = []; + client.storeChatTurn = async (...args: unknown[]) => { storeCalls.push(args); return undefined as any; }; + const markExternalTurnPersistedDurable = vi.fn() + .mockRejectedValue(new Error('marker disk outage')); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); + plugin.register(api); + + await plugin.processInbound('Already stored', 'corr-marker-permanent-fail', 'owner'); + await vi.advanceTimersByTimeAsync(10); + expect(storeCalls).toHaveLength(1); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(1); + + await vi.advanceTimersByTimeAsync(250); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(2); await vi.advanceTimersByTimeAsync(1_000); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(3); + await vi.advanceTimersByTimeAsync(5_000); + expect(storeCalls).toHaveLength(1); - expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(4); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(3); + expect((plugin as any).pendingMarkerPersistence.size).toBe(0); expect(api.logger.warn.calls.some((call: unknown[]) => - String(call[0]).includes('retrying marker'), + String(call[0]).includes('failed permanently'), )).toBe(true); } finally { vi.useRealTimers(); From b29352eefcc911460dadd437342a20ee0de675bd Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Thu, 30 Apr 2026 23:15:55 +0200 Subject: [PATCH 09/14] fix(openclaw): preserve fallback marker state --- .../adapter-openclaw/src/ChatTurnWriter.ts | 6 + .../adapter-openclaw/src/DkgChannelPlugin.ts | 23 ++- packages/adapter-openclaw/src/types.ts | 6 + .../test/ChatTurnWriter.test.ts | 4 +- .../adapter-openclaw/test/dkg-channel.test.ts | 137 +++++++++++++++++- 5 files changed, 168 insertions(+), 8 deletions(-) diff --git a/packages/adapter-openclaw/src/ChatTurnWriter.ts b/packages/adapter-openclaw/src/ChatTurnWriter.ts index 03b3372a3..38a12a3df 100644 --- a/packages/adapter-openclaw/src/ChatTurnWriter.ts +++ b/packages/adapter-openclaw/src/ChatTurnWriter.ts @@ -352,6 +352,12 @@ export class ChatTurnWriter { "[ChatTurnWriter.setStateDir] Final post-commit rewrite at new path failed; preserving old path for retry.", { newWatermarkFilePath }, ); + if (!this.writeWatermarkFile()) { + this.logger.warn?.( + "[ChatTurnWriter.setStateDir] Failed to preserve post-commit state at old path after migration rewrite failure.", + { oldWatermarkFilePath: this.watermarkFilePath, newWatermarkFilePath }, + ); + } } } // T45 - If the initial new-path write failed, live state is still diff --git a/packages/adapter-openclaw/src/DkgChannelPlugin.ts b/packages/adapter-openclaw/src/DkgChannelPlugin.ts index eb499aa18..088f42565 100644 --- a/packages/adapter-openclaw/src/DkgChannelPlugin.ts +++ b/packages/adapter-openclaw/src/DkgChannelPlugin.ts @@ -47,6 +47,11 @@ function sanitizeIdentity(raw: string): string { return raw.replace(/[^a-zA-Z0-9_-]/g, '').slice(0, 64) || 'unknown'; } +function fallbackRouteInboundSessionKey(identity: string | undefined): string { + const rawIdentity = identity || 'owner'; + return rawIdentity === 'owner' ? 'agent:main:main' : `agent:main:${sanitizeIdentity(rawIdentity)}`; +} + function finalizeAgentReplyText(text: string): string { if (text.trim().length === 0) { throw new Error(NO_TEXT_RESPONSE_ERROR); @@ -1169,12 +1174,14 @@ export class DkgChannelPlugin { // used when `runtime.channel` is unavailable must do the same, or // tool calls fired during this dispatch will read an empty ALS store // and silently degrade recall to `agent-context` only. We don't have - // a resolved sessionKey on this path (routing lives in - // runtime.channel), so the context carries only `uiContextGraphId` - // and `correlationId`. + // runtime.channel route metadata here, so stamp a deterministic + // transcript key that matches the DKG UI owner transcript marker + // bucket used by ChatTurnWriter replay dedupe. + const fallbackSessionKey = fallbackRouteInboundSessionKey(identity || 'owner'); const dispatchContext: DkgDispatchContext = { uiContextGraphId, correlationId, + sessionKey: fallbackSessionKey, }; const reply = await this.runWithDispatchContext(dispatchContext, () => api.routeInboundMessage!({ @@ -1183,9 +1190,15 @@ export class DkgChannelPlugin { senderIsOwner: true, text: buildAgentBody(text, { attachmentRefs: contextAttachmentRefs, contextEntries: sanitizedContextEntries }), correlationId, - } as any), + sessionKey: fallbackSessionKey, + SessionKey: fallbackSessionKey, + }), ); - const { sessionKey, ...replyForCaller } = reply; + const { sessionKey: replySessionKey, SessionKey: replyOpenClawSessionKey, ...replyForCaller } = reply; + const sessionKey = + (typeof replySessionKey === 'string' ? replySessionKey.trim() : '') + || (typeof replyOpenClawSessionKey === 'string' ? replyOpenClawSessionKey.trim() : '') + || fallbackSessionKey; this.queueTurnPersistence(text, reply.text, correlationId, identity || 'owner', { attachmentRefs, sessionKey, diff --git a/packages/adapter-openclaw/src/types.ts b/packages/adapter-openclaw/src/types.ts index 20a88bb7f..0474ace1e 100644 --- a/packages/adapter-openclaw/src/types.ts +++ b/packages/adapter-openclaw/src/types.ts @@ -126,6 +126,10 @@ export interface ChannelInboundMessage { text: string; /** Correlation ID for request-reply tracking. */ correlationId?: string; + /** Optional transcript session key for legacy route fallbacks that can honor it. */ + sessionKey?: string; + /** OpenClaw context-style alias for `sessionKey`. */ + SessionKey?: string; } /** Outbound reply from OpenClaw to an external channel. */ @@ -138,6 +142,8 @@ export interface ChannelOutboundReply { turnId?: string; /** OpenClaw transcript session key resolved for this channel dispatch. */ sessionKey?: string; + /** OpenClaw context-style alias for `sessionKey` returned by legacy routes. */ + SessionKey?: string; /** Tool calls made during this turn. */ toolCalls?: Array<{ name: string; args: Record; result: unknown }>; } diff --git a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts index bb83e48ad..fb243909e 100644 --- a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts +++ b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts @@ -1158,7 +1158,9 @@ describe("ChatTurnWriter", () => { expect(dkw.stateDir).toBe(originalStateDir); expect(dkw.watermarkFilePath).toBe(originalWatermarkFilePath); expect(dkw.externalTurnMarkers.get(externalCursorKey)?.get(marker)).toBe(1); - expect(writeSpy).toHaveBeenCalledTimes(2); + expect(writeSpy).toHaveBeenCalledTimes(3); + const persistedOldPath = JSON.parse(fs.readFileSync(originalWatermarkFilePath, "utf-8")); + expect(persistedOldPath[externalCursorKey].m[marker]).toBe(1); writeSpy.mockRestore(); } finally { fs.rmSync(destinationStateDir, { recursive: true, force: true }); diff --git a/packages/adapter-openclaw/test/dkg-channel.test.ts b/packages/adapter-openclaw/test/dkg-channel.test.ts index 74426b6f8..12512d640 100644 --- a/packages/adapter-openclaw/test/dkg-channel.test.ts +++ b/packages/adapter-openclaw/test/dkg-channel.test.ts @@ -1904,6 +1904,8 @@ describe('DkgChannelPlugin', () => { senderIsOwner: true, text: 'Hello', correlationId: 'corr-2', + sessionKey: 'agent:main:main', + SessionKey: 'agent:main:main', }); expect(reply.text).toBe('Reply!'); expect(reply.correlationId).toBe('corr-2'); @@ -1917,6 +1919,123 @@ describe('DkgChannelPlugin', () => { ]); }); + it('processInbound routeInboundMessage fallback marks direct-channel persists with a stable session key', async () => { + const routeInboundMessage = trackAsyncFn(async () => ({ + correlationId: 'corr-route-marker', + text: 'Reply!', + })); + const storeCalls: unknown[][] = []; + client.storeChatTurn = async (...args: unknown[]) => { storeCalls.push(args); return undefined as any; }; + const markExternalTurnPersistedDurable = vi.fn().mockResolvedValue(undefined); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); + const api = makeApi({ routeInboundMessage }); + plugin.register(api); + + await plugin.processInbound('Hello', 'corr-route-marker', 'owner'); + await new Promise((resolve) => setTimeout(resolve, 10)); + + expect(routeInboundMessage.calls[0][0]).toEqual(expect.objectContaining({ + sessionKey: 'agent:main:main', + })); + expect(storeCalls[0]).toEqual([ + 'openclaw:dkg-ui', + 'Hello', + 'Reply!', + { turnId: 'corr-route-marker' }, + ]); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledWith({ + sessionKey: 'agent:main:main', + turnId: 'corr-route-marker', + user: 'Hello', + assistant: 'Reply!', + }); + }); + + it('processInbound routeInboundMessage fallback does not collapse owner-like identities into the owner marker bucket', async () => { + const routeInboundMessage = trackAsyncFn(async () => ({ + correlationId: 'corr-route-ownerish', + text: 'Reply!', + })); + client.storeChatTurn = async () => undefined as any; + const markExternalTurnPersistedDurable = vi.fn().mockResolvedValue(undefined); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); + const api = makeApi({ routeInboundMessage }); + plugin.register(api); + + await plugin.processInbound('Hello', 'corr-route-ownerish', 'owner!'); + await new Promise((resolve) => setTimeout(resolve, 10)); + + expect(routeInboundMessage.calls[0][0]).toEqual(expect.objectContaining({ + senderId: 'owner!', + sessionKey: 'agent:main:owner', + SessionKey: 'agent:main:owner', + })); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledWith({ + sessionKey: 'agent:main:owner', + turnId: 'corr-route-ownerish', + user: 'Hello', + assistant: 'Reply!', + }); + }); + + it('processInbound routeInboundMessage fallback marks non-owner direct-channel persists with the non-owner session key', async () => { + const routeInboundMessage = trackAsyncFn(async () => ({ + correlationId: 'corr-route-worker', + text: 'Worker reply', + })); + const storeCalls: unknown[][] = []; + client.storeChatTurn = async (...args: unknown[]) => { storeCalls.push(args); return undefined as any; }; + const markExternalTurnPersistedDurable = vi.fn().mockResolvedValue(undefined); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); + const api = makeApi({ routeInboundMessage }); + plugin.register(api); + + await plugin.processInbound('Work item', 'corr-route-worker', 'background-worker'); + await new Promise((resolve) => setTimeout(resolve, 10)); + + expect(routeInboundMessage.calls[0][0]).toEqual(expect.objectContaining({ + senderId: 'background-worker', + sessionKey: 'agent:main:background-worker', + SessionKey: 'agent:main:background-worker', + })); + expect(storeCalls[0]).toEqual([ + 'openclaw:dkg-ui:background-worker', + 'Work item', + 'Worker reply', + { turnId: 'corr-route-worker' }, + ]); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledWith({ + sessionKey: 'agent:main:background-worker', + turnId: 'corr-route-worker', + user: 'Work item', + assistant: 'Worker reply', + }); + }); + + it('processInbound routeInboundMessage fallback accepts uppercase reply SessionKey for marker persistence', async () => { + const routeInboundMessage = trackAsyncFn(async () => ({ + correlationId: 'corr-route-uppercase-session', + text: 'Reply!', + SessionKey: 'agent:legacy:actual', + })); + client.storeChatTurn = async () => undefined as any; + const markExternalTurnPersistedDurable = vi.fn().mockResolvedValue(undefined); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); + const api = makeApi({ routeInboundMessage }); + plugin.register(api); + + const reply = await plugin.processInbound('Hello', 'corr-route-uppercase-session', 'owner'); + await new Promise((resolve) => setTimeout(resolve, 10)); + + expect((reply as any).SessionKey).toBeUndefined(); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledWith({ + sessionKey: 'agent:legacy:actual', + turnId: 'corr-route-uppercase-session', + user: 'Hello', + assistant: 'Reply!', + }); + }); + it('processInbound wraps the routeInboundMessage fallback in an ALS dispatch scope so slot-backed recall sees the UI-selected CG (Codex B13)', async () => { // B13 regression guard. When the gateway has no `runtime.channel` and // the adapter falls back to `api.routeInboundMessage`, the fallback @@ -1928,9 +2047,19 @@ describe('DkgChannelPlugin', () => { // `plugin.getSessionProjectContextGraphId(undefined)` from inside the // callback (i.e. while the ALS scope is active) and asserts the // captured value matches the stamped `uiContextGraphId`. - const capture: { inScope?: string | undefined } = {}; - const routeInboundMessage = vi.fn().mockImplementation(async () => { + const capture: { + inScope?: string | undefined; + sessionScope?: string | undefined; + mismatchedSessionScope?: string | undefined; + messageSessionKey?: string | undefined; + messageOpenClawSessionKey?: string | undefined; + } = {}; + const routeInboundMessage = vi.fn().mockImplementation(async (message: any) => { + capture.messageSessionKey = message.sessionKey; + capture.messageOpenClawSessionKey = message.SessionKey; capture.inScope = plugin.getSessionProjectContextGraphId(undefined); + capture.sessionScope = plugin.getSessionProjectContextGraphId('agent:main:main'); + capture.mismatchedSessionScope = plugin.getSessionProjectContextGraphId('agent:other:owner'); return { correlationId: 'corr-b13', text: 'Reply from route' }; }); const api = makeApi({ routeInboundMessage }); @@ -1945,6 +2074,10 @@ describe('DkgChannelPlugin', () => { // While the fallback was running, the ALS scope was populated. expect(capture.inScope).toBe('research-b13'); + expect(capture.sessionScope).toBe('research-b13'); + expect(capture.mismatchedSessionScope).toBeUndefined(); + expect(capture.messageSessionKey).toBe('agent:main:main'); + expect(capture.messageOpenClawSessionKey).toBe('agent:main:main'); // After the dispatch resolves, the ALS is torn down. expect(plugin.getSessionProjectContextGraphId(undefined)).toBeUndefined(); }); From 18e0078d8a7ad3810095c22a14781947bc2aa253 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Thu, 30 Apr 2026 23:40:21 +0200 Subject: [PATCH 10/14] fix(openclaw): harden channel marker shutdown --- .../adapter-openclaw/src/DkgChannelPlugin.ts | 120 ++++++++++---- .../adapter-openclaw/test/dkg-channel.test.ts | 151 ++++++++++++++++++ 2 files changed, 244 insertions(+), 27 deletions(-) diff --git a/packages/adapter-openclaw/src/DkgChannelPlugin.ts b/packages/adapter-openclaw/src/DkgChannelPlugin.ts index 088f42565..a8d7e7bc7 100644 --- a/packages/adapter-openclaw/src/DkgChannelPlugin.ts +++ b/packages/adapter-openclaw/src/DkgChannelPlugin.ts @@ -38,6 +38,7 @@ const DEFAULT_CHANNEL_ACCOUNT_ID = 'default'; const TURN_PERSIST_RETRY_DELAYS_MS = [250, 1_000] as const; const CHANNEL_RESPONSE_TIMEOUT_MS = 180_000; const STOP_DRAIN_TIMEOUT_MS = 1_500; +const FINAL_MARKER_FLUSH_TIMEOUT_MS = 250; const NO_TEXT_RESPONSE_ERROR = 'Agent returned no text response'; const CANCELLED_TURN_MESSAGE = '[OpenClaw reply cancelled before completion]'; const FAILED_TURN_MESSAGE_PREFIX = '[OpenClaw reply failed before completion'; @@ -224,6 +225,14 @@ interface PersistTurnOptions { turnId?: string; } +interface ExternalTurnMarkerPersistOptions { + sessionKey?: string; + turnId: string; + user: string; + assistant: string; + correlationId: string; +} + interface InboundChatOptions { attachmentRefs?: OpenClawAttachmentRef[]; contextEntries?: ChatContextEntry[]; @@ -370,6 +379,8 @@ export class DkgChannelPlugin { attempt: number; timer: ReturnType | null; allowDuringShutdown: boolean; + opts: ExternalTurnMarkerPersistOptions; + inFlight: Promise | null; }>(); /** * Per-dispatch AsyncLocalStorage holding the UI-selected project @@ -667,6 +678,7 @@ export class DkgChannelPlugin { this.api?.logger.warn?.( `[dkg-channel] Channel stop timed out after ${STOP_DRAIN_TIMEOUT_MS}ms waiting for turn persistence to drain; continuing shutdown`, ); + await this.flushPendingMarkerPersistenceBeforeDrop(); this.clearPendingTurnPersistence(); this.clearPendingMarkerPersistence(); } @@ -927,6 +939,56 @@ export class DkgChannelPlugin { this.notifyStopIdle(); } + private async flushPendingMarkerPersistenceBeforeDrop(): Promise { + if (!this.chatTurnWriter || this.pendingMarkerPersistence.size === 0) return; + const jobs = Array.from(this.pendingMarkerPersistence.entries()); + const deadlineAt = Date.now() + FINAL_MARKER_FLUSH_TIMEOUT_MS; + for (const [correlationId, job] of jobs) { + try { + if (job.timer) clearTimeout(job.timer); + const remainingMs = Math.max(0, deadlineAt - Date.now()); + if (remainingMs === 0) { + this.api?.logger.warn?.( + `[dkg-channel] Final ChatTurnWriter marker flush timed out during shutdown for ${correlationId}; dropping marker job.`, + ); + continue; + } + const markerWrite = job.inFlight ?? this.writeExternalTurnMarker(job.opts); + const flushed = await this.waitForExternalMarkerWrite(markerWrite, remainingMs); + if (!flushed) { + this.api?.logger.warn?.( + `[dkg-channel] Final ChatTurnWriter marker flush timed out during shutdown for ${correlationId}; dropping marker job.`, + ); + } + } catch (err: any) { + this.api?.logger.warn?.( + `[dkg-channel] Final ChatTurnWriter marker flush failed during shutdown for ${correlationId}: ${err?.message ?? err}`, + ); + } finally { + this.pendingMarkerPersistence.delete(correlationId); + } + } + this.notifyStopIdle(); + } + + private async waitForExternalMarkerWrite( + markerWrite: Promise, + timeoutMs: number, + ): Promise { + let timer: ReturnType | null = null; + void markerWrite.catch(() => {}); + try { + return await Promise.race([ + markerWrite.then(() => true), + new Promise((resolve) => { + timer = setTimeout(() => resolve(false), timeoutMs); + }), + ]); + } finally { + if (timer) clearTimeout(timer); + } + } + private notifyStopIdle(): void { if ( !this.stopping @@ -1195,13 +1257,17 @@ export class DkgChannelPlugin { }), ); const { sessionKey: replySessionKey, SessionKey: replyOpenClawSessionKey, ...replyForCaller } = reply; - const sessionKey = + const returnedSessionKey = (typeof replySessionKey === 'string' ? replySessionKey.trim() : '') - || (typeof replyOpenClawSessionKey === 'string' ? replyOpenClawSessionKey.trim() : '') - || fallbackSessionKey; + || (typeof replyOpenClawSessionKey === 'string' ? replyOpenClawSessionKey.trim() : ''); + if (!returnedSessionKey && this.chatTurnWriter) { + api.logger.warn?.( + `[dkg-channel] routeInboundMessage reply for ${correlationId} did not include sessionKey; skipping ChatTurnWriter marker for this direct-channel turn.`, + ); + } this.queueTurnPersistence(text, reply.text, correlationId, identity || 'owner', { attachmentRefs, - sessionKey, + sessionKey: returnedSessionKey || undefined, }, true); return replyForCaller; } @@ -1854,14 +1920,12 @@ export class DkgChannelPlugin { this.api?.logger.info?.(`[dkg-channel] Turn persisted to DKG graph: ${correlationId}`); } - private async markExternalTurnPersistedAfterStore(opts: { - sessionKey?: string; - turnId: string; - user: string; - assistant: string; - correlationId: string; - }, allowDuringShutdown: boolean): Promise { + private async markExternalTurnPersistedAfterStore( + opts: ExternalTurnMarkerPersistOptions, + allowDuringShutdown: boolean, + ): Promise { if (!this.chatTurnWriter) return; + if (!opts.sessionKey) return; try { await this.writeExternalTurnMarker(opts); this.deletePendingMarkerPersistence(opts.correlationId); @@ -1870,12 +1934,7 @@ export class DkgChannelPlugin { } } - private async writeExternalTurnMarker(opts: { - sessionKey?: string; - turnId: string; - user: string; - assistant: string; - }): Promise { + private async writeExternalTurnMarker(opts: ExternalTurnMarkerPersistOptions): Promise { await this.chatTurnWriter?.markExternalTurnPersistedDurable({ sessionKey: opts.sessionKey, turnId: opts.turnId, @@ -1885,13 +1944,7 @@ export class DkgChannelPlugin { } private scheduleExternalTurnMarkerRetry( - opts: { - sessionKey?: string; - turnId: string; - user: string; - assistant: string; - correlationId: string; - }, + opts: ExternalTurnMarkerPersistOptions, attempt: number, allowDuringShutdown: boolean, err: any, @@ -1918,8 +1971,15 @@ export class DkgChannelPlugin { this.deletePendingMarkerPersistence(opts.correlationId); return; } - this.pendingMarkerPersistence.set(opts.correlationId, { attempt: attempt + 1, timer: null, allowDuringShutdown }); - void this.writeExternalTurnMarker(opts) + const markerWrite = this.writeExternalTurnMarker(opts); + this.pendingMarkerPersistence.set(opts.correlationId, { + attempt: attempt + 1, + timer: null, + allowDuringShutdown, + opts, + inFlight: markerWrite, + }); + void markerWrite .then(() => { this.deletePendingMarkerPersistence(opts.correlationId); }) @@ -1927,7 +1987,13 @@ export class DkgChannelPlugin { this.scheduleExternalTurnMarkerRetry(opts, attempt + 1, allowDuringShutdown, nextErr); }); }, retryDelayMs); - this.pendingMarkerPersistence.set(opts.correlationId, { attempt, timer, allowDuringShutdown }); + this.pendingMarkerPersistence.set(opts.correlationId, { + attempt, + timer, + allowDuringShutdown, + opts, + inFlight: null, + }); this.notifyStopIdle(); } diff --git a/packages/adapter-openclaw/test/dkg-channel.test.ts b/packages/adapter-openclaw/test/dkg-channel.test.ts index 12512d640..897dda9f0 100644 --- a/packages/adapter-openclaw/test/dkg-channel.test.ts +++ b/packages/adapter-openclaw/test/dkg-channel.test.ts @@ -1775,6 +1775,120 @@ describe('DkgChannelPlugin', () => { } }); + it('stop should force one final ChatTurnWriter marker flush before dropping timed-out marker jobs', async () => { + vi.useFakeTimers(); + try { + let resolveSecondMarker!: () => void; + const { runtime } = makeMockRuntime({ + dispatchImpl: async (params) => { + await params.dispatcherOptions.deliver({ text: 'Persisted reply' }); + }, + }); + const mockCfg = { session: { dmScope: 'main' }, agents: {} }; + + const api = makeApi({ + logger: { info: trackFn(), warn: trackFn(), debug: trackFn() }, + } as any) as any; + api.runtime = runtime; + api.cfg = mockCfg; + client.storeChatTurn = async () => undefined as any; + const markExternalTurnPersistedDurable = vi.fn() + .mockRejectedValueOnce(new Error('marker disk outage')) + .mockImplementationOnce(() => new Promise((resolve) => { resolveSecondMarker = resolve; })); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); + plugin.register(api); + + await plugin.processInbound('Already stored', 'corr-marker-stop-timeout', 'owner'); + await vi.advanceTimersByTimeAsync(10); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(1); + + await vi.advanceTimersByTimeAsync(250); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(2); + expect((plugin as any).pendingMarkerPersistence.size).toBe(1); + + const stopPromise = plugin.stop(); + let stopSettled = false; + void stopPromise.then(() => { stopSettled = true; }); + await Promise.resolve(); + expect(stopSettled).toBe(false); + + await vi.advanceTimersByTimeAsync(1_500); + await Promise.resolve(); + expect(stopSettled).toBe(false); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(2); + + resolveSecondMarker(); + await stopPromise; + + expect(stopSettled).toBe(true); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(2); + expect(markExternalTurnPersistedDurable).toHaveBeenLastCalledWith({ + sessionKey: 'session-1', + turnId: 'corr-marker-stop-timeout', + user: 'Already stored', + assistant: 'Persisted reply', + }); + expect((plugin as any).pendingMarkerPersistence.size).toBe(0); + } finally { + vi.useRealTimers(); + } + }); + + it('stop should keep the final ChatTurnWriter marker flush bounded when the final write hangs', async () => { + vi.useFakeTimers(); + try { + const { runtime } = makeMockRuntime({ + dispatchImpl: async (params) => { + await params.dispatcherOptions.deliver({ text: 'Persisted reply' }); + }, + }); + const mockCfg = { session: { dmScope: 'main' }, agents: {} }; + + const api = makeApi({ + logger: { info: trackFn(), warn: trackFn(), debug: trackFn() }, + } as any) as any; + api.runtime = runtime; + api.cfg = mockCfg; + client.storeChatTurn = async () => undefined as any; + const markExternalTurnPersistedDurable = vi.fn() + .mockRejectedValueOnce(new Error('marker disk outage')) + .mockImplementation(() => new Promise(() => {})); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); + plugin.register(api); + + await plugin.processInbound('Already stored', 'corr-marker-stop-timeout-hang', 'owner'); + await vi.advanceTimersByTimeAsync(10); + await vi.advanceTimersByTimeAsync(250); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(2); + + const stopPromise = plugin.stop(); + let stopSettled = false; + void stopPromise.then(() => { stopSettled = true; }); + await Promise.resolve(); + expect(stopSettled).toBe(false); + + await vi.advanceTimersByTimeAsync(1_500); + await Promise.resolve(); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(2); + expect(stopSettled).toBe(false); + + await vi.advanceTimersByTimeAsync(249); + await Promise.resolve(); + expect(stopSettled).toBe(false); + + await vi.advanceTimersByTimeAsync(1); + await stopPromise; + + expect(stopSettled).toBe(true); + expect((plugin as any).pendingMarkerPersistence.size).toBe(0); + expect(api.logger.warn.calls.some((call: unknown[]) => + String(call[0]).includes('Final ChatTurnWriter marker flush timed out'), + )).toBe(true); + } finally { + vi.useRealTimers(); + } + }); + it('persistTurn should use separate sessionId for non-owner identities', async () => { const { runtime } = makeMockRuntime({ resolveAgentRouteImpl: () => ({ agentId: 'agent-1', sessionKey: 'session-1' }), @@ -1923,6 +2037,7 @@ describe('DkgChannelPlugin', () => { const routeInboundMessage = trackAsyncFn(async () => ({ correlationId: 'corr-route-marker', text: 'Reply!', + sessionKey: 'agent:main:main', })); const storeCalls: unknown[][] = []; client.storeChatTurn = async (...args: unknown[]) => { storeCalls.push(args); return undefined as any; }; @@ -1955,6 +2070,7 @@ describe('DkgChannelPlugin', () => { const routeInboundMessage = trackAsyncFn(async () => ({ correlationId: 'corr-route-ownerish', text: 'Reply!', + sessionKey: 'agent:main:owner', })); client.storeChatTurn = async () => undefined as any; const markExternalTurnPersistedDurable = vi.fn().mockResolvedValue(undefined); @@ -1982,6 +2098,7 @@ describe('DkgChannelPlugin', () => { const routeInboundMessage = trackAsyncFn(async () => ({ correlationId: 'corr-route-worker', text: 'Worker reply', + sessionKey: 'agent:main:background-worker', })); const storeCalls: unknown[][] = []; client.storeChatTurn = async (...args: unknown[]) => { storeCalls.push(args); return undefined as any; }; @@ -2036,6 +2153,40 @@ describe('DkgChannelPlugin', () => { }); }); + it('processInbound routeInboundMessage fallback skips marker persistence when the route does not return its resolved session key', async () => { + const routeInboundMessage = trackAsyncFn(async () => ({ + correlationId: 'corr-route-no-session', + text: 'Reply!', + })); + const storeCalls: unknown[][] = []; + client.storeChatTurn = async (...args: unknown[]) => { storeCalls.push(args); return undefined as any; }; + const markExternalTurnPersistedDurable = vi.fn().mockResolvedValue(undefined); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); + const api = makeApi({ + routeInboundMessage, + logger: { info: trackFn(), warn: trackFn(), debug: trackFn() }, + }); + plugin.register(api); + + await plugin.processInbound('Hello', 'corr-route-no-session', 'owner'); + await new Promise((resolve) => setTimeout(resolve, 10)); + + expect(routeInboundMessage.calls[0][0]).toEqual(expect.objectContaining({ + sessionKey: 'agent:main:main', + SessionKey: 'agent:main:main', + })); + expect(storeCalls[0]).toEqual([ + 'openclaw:dkg-ui', + 'Hello', + 'Reply!', + { turnId: 'corr-route-no-session' }, + ]); + expect(markExternalTurnPersistedDurable).not.toHaveBeenCalled(); + expect(api.logger.warn.calls.some((call: unknown[]) => + String(call[0]).includes('did not include sessionKey'), + )).toBe(true); + }); + it('processInbound wraps the routeInboundMessage fallback in an ALS dispatch scope so slot-backed recall sees the UI-selected CG (Codex B13)', async () => { // B13 regression guard. When the gateway has no `runtime.channel` and // the adapter falls back to `api.routeInboundMessage`, the fallback From 0e6e218f831f4d1720bb6fd94457da322d9ec4f6 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Thu, 30 Apr 2026 23:54:16 +0200 Subject: [PATCH 11/14] fix(openclaw): avoid guessed fallback session keys --- .../adapter-openclaw/src/DkgChannelPlugin.ts | 18 +++------ .../adapter-openclaw/test/dkg-channel.test.ts | 37 +++++++------------ 2 files changed, 19 insertions(+), 36 deletions(-) diff --git a/packages/adapter-openclaw/src/DkgChannelPlugin.ts b/packages/adapter-openclaw/src/DkgChannelPlugin.ts index a8d7e7bc7..3d6421524 100644 --- a/packages/adapter-openclaw/src/DkgChannelPlugin.ts +++ b/packages/adapter-openclaw/src/DkgChannelPlugin.ts @@ -48,11 +48,6 @@ function sanitizeIdentity(raw: string): string { return raw.replace(/[^a-zA-Z0-9_-]/g, '').slice(0, 64) || 'unknown'; } -function fallbackRouteInboundSessionKey(identity: string | undefined): string { - const rawIdentity = identity || 'owner'; - return rawIdentity === 'owner' ? 'agent:main:main' : `agent:main:${sanitizeIdentity(rawIdentity)}`; -} - function finalizeAgentReplyText(text: string): string { if (text.trim().length === 0) { throw new Error(NO_TEXT_RESPONSE_ERROR); @@ -1235,15 +1230,14 @@ export class DkgChannelPlugin { // UI-selected `uiContextGraphId`. The `routeInboundMessage` fallback // used when `runtime.channel` is unavailable must do the same, or // tool calls fired during this dispatch will read an empty ALS store - // and silently degrade recall to `agent-context` only. We don't have - // runtime.channel route metadata here, so stamp a deterministic - // transcript key that matches the DKG UI owner transcript marker - // bucket used by ChatTurnWriter replay dedupe. - const fallbackSessionKey = fallbackRouteInboundSessionKey(identity || 'owner'); + // and silently degrade recall to `agent-context` only. We deliberately + // do not guess a sessionKey here: legacy routes may resolve a different + // OpenClaw session, and sending a synthetic key can split transcript + // state from the real route. Marker persistence uses only the key + // returned by routeInboundMessage. const dispatchContext: DkgDispatchContext = { uiContextGraphId, correlationId, - sessionKey: fallbackSessionKey, }; const reply = await this.runWithDispatchContext(dispatchContext, () => api.routeInboundMessage!({ @@ -1252,8 +1246,6 @@ export class DkgChannelPlugin { senderIsOwner: true, text: buildAgentBody(text, { attachmentRefs: contextAttachmentRefs, contextEntries: sanitizedContextEntries }), correlationId, - sessionKey: fallbackSessionKey, - SessionKey: fallbackSessionKey, }), ); const { sessionKey: replySessionKey, SessionKey: replyOpenClawSessionKey, ...replyForCaller } = reply; diff --git a/packages/adapter-openclaw/test/dkg-channel.test.ts b/packages/adapter-openclaw/test/dkg-channel.test.ts index 897dda9f0..e31f61d11 100644 --- a/packages/adapter-openclaw/test/dkg-channel.test.ts +++ b/packages/adapter-openclaw/test/dkg-channel.test.ts @@ -2018,8 +2018,6 @@ describe('DkgChannelPlugin', () => { senderIsOwner: true, text: 'Hello', correlationId: 'corr-2', - sessionKey: 'agent:main:main', - SessionKey: 'agent:main:main', }); expect(reply.text).toBe('Reply!'); expect(reply.correlationId).toBe('corr-2'); @@ -2033,7 +2031,7 @@ describe('DkgChannelPlugin', () => { ]); }); - it('processInbound routeInboundMessage fallback marks direct-channel persists with a stable session key', async () => { + it('processInbound routeInboundMessage fallback marks direct-channel persists with the returned session key', async () => { const routeInboundMessage = trackAsyncFn(async () => ({ correlationId: 'corr-route-marker', text: 'Reply!', @@ -2049,9 +2047,8 @@ describe('DkgChannelPlugin', () => { await plugin.processInbound('Hello', 'corr-route-marker', 'owner'); await new Promise((resolve) => setTimeout(resolve, 10)); - expect(routeInboundMessage.calls[0][0]).toEqual(expect.objectContaining({ - sessionKey: 'agent:main:main', - })); + expect(routeInboundMessage.calls[0][0]).not.toHaveProperty('sessionKey'); + expect(routeInboundMessage.calls[0][0]).not.toHaveProperty('SessionKey'); expect(storeCalls[0]).toEqual([ 'openclaw:dkg-ui', 'Hello', @@ -2083,9 +2080,9 @@ describe('DkgChannelPlugin', () => { expect(routeInboundMessage.calls[0][0]).toEqual(expect.objectContaining({ senderId: 'owner!', - sessionKey: 'agent:main:owner', - SessionKey: 'agent:main:owner', })); + expect(routeInboundMessage.calls[0][0]).not.toHaveProperty('sessionKey'); + expect(routeInboundMessage.calls[0][0]).not.toHaveProperty('SessionKey'); expect(markExternalTurnPersistedDurable).toHaveBeenCalledWith({ sessionKey: 'agent:main:owner', turnId: 'corr-route-ownerish', @@ -2112,9 +2109,9 @@ describe('DkgChannelPlugin', () => { expect(routeInboundMessage.calls[0][0]).toEqual(expect.objectContaining({ senderId: 'background-worker', - sessionKey: 'agent:main:background-worker', - SessionKey: 'agent:main:background-worker', })); + expect(routeInboundMessage.calls[0][0]).not.toHaveProperty('sessionKey'); + expect(routeInboundMessage.calls[0][0]).not.toHaveProperty('SessionKey'); expect(storeCalls[0]).toEqual([ 'openclaw:dkg-ui:background-worker', 'Work item', @@ -2171,10 +2168,8 @@ describe('DkgChannelPlugin', () => { await plugin.processInbound('Hello', 'corr-route-no-session', 'owner'); await new Promise((resolve) => setTimeout(resolve, 10)); - expect(routeInboundMessage.calls[0][0]).toEqual(expect.objectContaining({ - sessionKey: 'agent:main:main', - SessionKey: 'agent:main:main', - })); + expect(routeInboundMessage.calls[0][0]).not.toHaveProperty('sessionKey'); + expect(routeInboundMessage.calls[0][0]).not.toHaveProperty('SessionKey'); expect(storeCalls[0]).toEqual([ 'openclaw:dkg-ui', 'Hello', @@ -2201,16 +2196,14 @@ describe('DkgChannelPlugin', () => { const capture: { inScope?: string | undefined; sessionScope?: string | undefined; - mismatchedSessionScope?: string | undefined; - messageSessionKey?: string | undefined; - messageOpenClawSessionKey?: string | undefined; + alternateSessionScope?: string | undefined; } = {}; const routeInboundMessage = vi.fn().mockImplementation(async (message: any) => { - capture.messageSessionKey = message.sessionKey; - capture.messageOpenClawSessionKey = message.SessionKey; + expect(message).not.toHaveProperty('sessionKey'); + expect(message).not.toHaveProperty('SessionKey'); capture.inScope = plugin.getSessionProjectContextGraphId(undefined); capture.sessionScope = plugin.getSessionProjectContextGraphId('agent:main:main'); - capture.mismatchedSessionScope = plugin.getSessionProjectContextGraphId('agent:other:owner'); + capture.alternateSessionScope = plugin.getSessionProjectContextGraphId('agent:other:owner'); return { correlationId: 'corr-b13', text: 'Reply from route' }; }); const api = makeApi({ routeInboundMessage }); @@ -2226,9 +2219,7 @@ describe('DkgChannelPlugin', () => { // While the fallback was running, the ALS scope was populated. expect(capture.inScope).toBe('research-b13'); expect(capture.sessionScope).toBe('research-b13'); - expect(capture.mismatchedSessionScope).toBeUndefined(); - expect(capture.messageSessionKey).toBe('agent:main:main'); - expect(capture.messageOpenClawSessionKey).toBe('agent:main:main'); + expect(capture.alternateSessionScope).toBe('research-b13'); // After the dispatch resolves, the ALS is torn down. expect(plugin.getSessionProjectContextGraphId(undefined)).toBeUndefined(); }); From dd4402629fddfe712c78a05bcd3277b1e6decbd2 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Fri, 1 May 2026 00:20:24 +0200 Subject: [PATCH 12/14] fix(openclaw): bind external markers to turn content --- .../adapter-openclaw/src/ChatTurnWriter.ts | 23 +++++++--- .../adapter-openclaw/src/DkgChannelPlugin.ts | 35 ++++++++++++--- packages/adapter-openclaw/src/types.ts | 2 +- .../test/ChatTurnWriter.test.ts | 45 +++++++++++++++++-- .../adapter-openclaw/test/dkg-channel.test.ts | 37 +++++++++++++-- 5 files changed, 120 insertions(+), 22 deletions(-) diff --git a/packages/adapter-openclaw/src/ChatTurnWriter.ts b/packages/adapter-openclaw/src/ChatTurnWriter.ts index 38a12a3df..7bf165841 100644 --- a/packages/adapter-openclaw/src/ChatTurnWriter.ts +++ b/packages/adapter-openclaw/src/ChatTurnWriter.ts @@ -512,6 +512,8 @@ export class ChatTurnWriter { ? this.consumeExternalTurnMarkersForPair( externalCursorKey, externalTurnIds, + user, + assistant, ) : { skip: false, markers: [], rollbackMarkers: [] }; if (externalCursorKey && externalMarkerAction.markers.length > 0) { @@ -679,7 +681,7 @@ export class ChatTurnWriter { }): Promise { const externalCursorKey = this.externalCursorKeyFromSessionKey(opts.sessionKey); const markers = [ - this.externalTurnMarkerId(opts.turnId), + this.externalTurnMarkerId(opts.turnId, opts.user, opts.assistant), ].filter(Boolean); if (!externalCursorKey || markers.length === 0) return; for (const marker of markers) { @@ -1584,21 +1586,28 @@ export class ChatTurnWriter { return `w4b-content::${this.contentHash(user, assistant)}`; } - private externalTurnMarkerId(turnId?: unknown): string { + private externalTurnMarkerId(turnId?: unknown, user?: string, assistant?: string): string { if (typeof turnId !== "string" || turnId.trim().length === 0) return ""; - return `external-id::${createHash("sha256").update(turnId.trim()).digest("hex").slice(0, 16)}`; + const idHash = createHash("sha256").update(turnId.trim()).digest("hex").slice(0, 16); + if (typeof user !== "string" || typeof assistant !== "string") { + return `external-id::${idHash}`; + } + return `external-id::${idHash}::${this.contentHash(user, this.stripRecalledMemory(assistant))}`; } private consumeExternalTurnMarkersForPair( sessionKeyCursor: string, turnIds: string[], + user: string, + assistant: string, ): ExternalMarkerAction { for (const turnId of turnIds) { - const marker = this.externalTurnMarkerId(turnId); + const marker = this.externalTurnMarkerId(turnId, user, assistant); if (marker && this.hasExternalTurnMarker(sessionKeyCursor, marker)) { - // Exact external markers are durable daemon-success facts, not - // one-shot tickets. Keep them for later reset/compaction replays - // until a future transcript-retention cursor can prove safe GC. + // Content-bound exact markers are durable daemon-success facts, + // not one-shot tickets. Keep them for later reset/compaction + // replays until a future transcript-retention cursor can prove + // safe GC. return { skip: true, markers: [marker], rollbackMarkers: [] }; } } diff --git a/packages/adapter-openclaw/src/DkgChannelPlugin.ts b/packages/adapter-openclaw/src/DkgChannelPlugin.ts index 3d6421524..1f19e68c4 100644 --- a/packages/adapter-openclaw/src/DkgChannelPlugin.ts +++ b/packages/adapter-openclaw/src/DkgChannelPlugin.ts @@ -218,6 +218,7 @@ interface PersistTurnOptions { attachmentRefs?: OpenClawAttachmentRef[]; sessionKey?: string; turnId?: string; + markerUser?: string; } interface ExternalTurnMarkerPersistOptions { @@ -1200,6 +1201,10 @@ export class DkgChannelPlugin { if (opts?.contextEntries != null && contextEntries === undefined) { throw new Error('Invalid context entries'); } + const markerUserMessage = buildAgentBody(text, { + attachmentRefs: contextAttachmentRefs, + contextEntries: sanitizedContextEntries, + }); // Re-assert memory-slot capability before dispatch so our runtime // handles recall even if memory-core's dreaming sidecar overwrote it. @@ -1208,13 +1213,14 @@ export class DkgChannelPlugin { api.logger.info?.(`[dkg-channel] Dispatching for: ${correlationId}`); try { const reply = await this.dispatchViaPluginSdk(text, correlationId, identity, contextAttachmentRefs, sanitizedContextEntries, uiContextGraphId); - const { sessionKey, ...replyForCaller } = reply; + const sessionKey = typeof reply.sessionKey === 'string' ? reply.sessionKey : undefined; // Fire-and-forget: persist turn to DKG graph for Agent Hub visualization this.queueTurnPersistence(text, reply.text, correlationId, identity, { attachmentRefs, sessionKey, + markerUser: markerUserMessage, }, true); - return replyForCaller; + return reply; } catch (err: any) { api.logger.warn?.(`[dkg-channel] dispatchViaPluginSdk failed: ${err.message}`); throw err; @@ -1248,7 +1254,8 @@ export class DkgChannelPlugin { correlationId, }), ); - const { sessionKey: replySessionKey, SessionKey: replyOpenClawSessionKey, ...replyForCaller } = reply; + const replySessionKey = reply.sessionKey; + const replyOpenClawSessionKey = reply.SessionKey; const returnedSessionKey = (typeof replySessionKey === 'string' ? replySessionKey.trim() : '') || (typeof replyOpenClawSessionKey === 'string' ? replyOpenClawSessionKey.trim() : ''); @@ -1260,8 +1267,9 @@ export class DkgChannelPlugin { this.queueTurnPersistence(text, reply.text, correlationId, identity || 'owner', { attachmentRefs, sessionKey: returnedSessionKey || undefined, + markerUser: markerUserMessage, }, true); - return replyForCaller; + return reply; } throw new Error( @@ -1680,6 +1688,7 @@ export class DkgChannelPlugin { this.queueTurnPersistence(text, resolvedFinalText, correlationId, identity, { attachmentRefs, sessionKey: route.sessionKey, + markerUser: agentBody, }, true); } else if (resolvedTerminalState === 'failed') { this.queueTurnPersistence( @@ -1687,7 +1696,13 @@ export class DkgChannelPlugin { this.buildFailedAssistantReply(resolvedFailureReason), correlationId, identity, - { persistenceState: 'failed', failureReason: resolvedFailureReason, attachmentRefs, sessionKey: route.sessionKey }, + { + persistenceState: 'failed', + failureReason: resolvedFailureReason, + attachmentRefs, + sessionKey: route.sessionKey, + markerUser: agentBody, + }, true, ); } else { @@ -1696,7 +1711,13 @@ export class DkgChannelPlugin { CANCELLED_TURN_MESSAGE, correlationId, identity, - { persistenceState: 'failed', failureReason: 'cancelled', attachmentRefs, sessionKey: route.sessionKey }, + { + persistenceState: 'failed', + failureReason: 'cancelled', + attachmentRefs, + sessionKey: route.sessionKey, + markerUser: agentBody, + }, true, ); } @@ -1905,7 +1926,7 @@ export class DkgChannelPlugin { await this.markExternalTurnPersistedAfterStore({ sessionKey: opts?.sessionKey, turnId: opts?.turnId ?? correlationId, - user: userMessage, + user: opts?.markerUser ?? userMessage, assistant: assistantReply, correlationId, }, allowDuringShutdown); diff --git a/packages/adapter-openclaw/src/types.ts b/packages/adapter-openclaw/src/types.ts index 0474ace1e..9666c1c5e 100644 --- a/packages/adapter-openclaw/src/types.ts +++ b/packages/adapter-openclaw/src/types.ts @@ -126,7 +126,7 @@ export interface ChannelInboundMessage { text: string; /** Correlation ID for request-reply tracking. */ correlationId?: string; - /** Optional transcript session key for legacy route fallbacks that can honor it. */ + /** Optional transcript session key when the caller already knows the resolved route. */ sessionKey?: string; /** OpenClaw context-style alias for `sessionKey`. */ SessionKey?: string; diff --git a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts index fb243909e..484644700 100644 --- a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts +++ b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts @@ -669,6 +669,31 @@ describe("ChatTurnWriter", () => { restarted.flushSync(); }); + it("T104 - reused direct-channel turnId with different content does not skip W4a", async () => { + await writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-reused", + user: "first ui question", + assistant: "first ui answer", + }); + + const restarted = new ChatTurnWriter({ client: mockClient, logger: mockLogger, stateDir }); + mockClient.storeChatTurn.mockClear(); + restarted.onAgentEnd({ + sessionId: "test", + messages: [ + { role: "user", content: "second ui question", context: { Provider: "dkg-ui", DkgTurnId: "node-ui-corr-reused" } }, + { role: "assistant", content: "second ui answer" }, + ], + }, { channelId: "telegram", sessionKey: "agent:main:main" }); + await flushMicrotasks(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + expect(mockClient.storeChatTurn.mock.calls[0][1]).toBe("second ui question"); + expect(mockClient.storeChatTurn.mock.calls[0][2]).toBe("second ui answer"); + restarted.flushSync(); + }); + for (const hookName of ["onBeforeReset", "onBeforeCompaction"] as const) { it(`T99 - ${hookName} preserves durable external markers for replay dedupe`, async () => { const turnId = `node-ui-corr-${hookName}`; @@ -681,7 +706,7 @@ describe("ChatTurnWriter", () => { await writer[hookName]({ channelId: "telegram", sessionKey: "agent:main:main" }); const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); - const marker = (writer as any).externalTurnMarkerId(turnId); + const marker = (writer as any).externalTurnMarkerId(turnId, "reset ui question", "reset ui answer"); const persisted = JSON.parse(fs.readFileSync( path.join(stateDir, "dkg-adapter", "chat-turn-watermarks.json"), "utf-8", @@ -1071,7 +1096,11 @@ describe("ChatTurnWriter", () => { const destinationStateDir = fs.mkdtempSync(path.join(os.tmpdir(), "chatturnwriter-dest-counts-")); try { const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); - const marker = (writer as any).externalTurnMarkerId("node-ui-corr-counted"); + const marker = (writer as any).externalTurnMarkerId( + "node-ui-corr-counted", + "counted question", + "counted answer", + ); (writer as any).restoreExternalTurnMarker(externalCursorKey, marker); (writer as any).writeWatermarkFile(); @@ -1103,7 +1132,11 @@ describe("ChatTurnWriter", () => { const dkw = writer as any; const externalCursorKey = dkw.externalCursorKeyFromSessionKey("agent:main:main"); - const marker = dkw.externalTurnMarkerId("node-ui-corr-marker-race"); + const marker = dkw.externalTurnMarkerId( + "node-ui-corr-marker-race", + "migrated ui question", + "migrated ui answer", + ); const realWrite = dkw.writeWatermarkFile.bind(dkw); const writeSpy = vi.spyOn(dkw, "writeWatermarkFile").mockImplementationOnce((target: string, override: any) => { dkw.restoreExternalTurnMarker(externalCursorKey, marker); @@ -1144,7 +1177,11 @@ describe("ChatTurnWriter", () => { const originalStateDir = dkw.stateDir; const originalWatermarkFilePath = dkw.watermarkFilePath; const externalCursorKey = dkw.externalCursorKeyFromSessionKey("agent:main:main"); - const marker = dkw.externalTurnMarkerId("node-ui-corr-marker-final-fail"); + const marker = dkw.externalTurnMarkerId( + "node-ui-corr-marker-final-fail", + "final fail question", + "final fail answer", + ); const realWrite = dkw.writeWatermarkFile.bind(dkw); const writeSpy = vi.spyOn(dkw, "writeWatermarkFile") .mockImplementationOnce((target: string, override: any) => { diff --git a/packages/adapter-openclaw/test/dkg-channel.test.ts b/packages/adapter-openclaw/test/dkg-channel.test.ts index e31f61d11..c4e696674 100644 --- a/packages/adapter-openclaw/test/dkg-channel.test.ts +++ b/packages/adapter-openclaw/test/dkg-channel.test.ts @@ -1389,6 +1389,7 @@ describe('DkgChannelPlugin', () => { expect(reply.text).toBe('Hello from agent'); expect(reply.correlationId).toBe('corr-1'); + expect(reply.sessionKey).toBe('session-1'); expect(dispatched).toMatchObject({ ctx: expect.objectContaining({ BodyForAgent: 'Hello', @@ -2044,9 +2045,10 @@ describe('DkgChannelPlugin', () => { const api = makeApi({ routeInboundMessage }); plugin.register(api); - await plugin.processInbound('Hello', 'corr-route-marker', 'owner'); + const reply = await plugin.processInbound('Hello', 'corr-route-marker', 'owner'); await new Promise((resolve) => setTimeout(resolve, 10)); + expect(reply.sessionKey).toBe('agent:main:main'); expect(routeInboundMessage.calls[0][0]).not.toHaveProperty('sessionKey'); expect(routeInboundMessage.calls[0][0]).not.toHaveProperty('SessionKey'); expect(storeCalls[0]).toEqual([ @@ -2063,6 +2065,34 @@ describe('DkgChannelPlugin', () => { }); }); + it('processInbound routeInboundMessage fallback hashes the routed agent body for direct-channel markers', async () => { + const routeInboundMessage = trackAsyncFn(async () => ({ + correlationId: 'corr-route-context-marker', + text: 'Reply!', + sessionKey: 'agent:main:main', + })); + const storeCalls: unknown[][] = []; + client.storeChatTurn = async (...args: unknown[]) => { storeCalls.push(args); return undefined as any; }; + const markExternalTurnPersistedDurable = vi.fn().mockResolvedValue(undefined); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); + const api = makeApi({ routeInboundMessage }); + plugin.register(api); + + await plugin.processInbound('Hello', 'corr-route-context-marker', 'owner', { + contextEntries: [{ key: 'target_context_graph', label: 'Target context graph', value: 'dkg-code-project' }], + }); + await new Promise((resolve) => setTimeout(resolve, 10)); + + expect(routeInboundMessage.calls[0][0].text).toContain('Context for this chat turn:'); + expect(storeCalls[0][1]).toBe('Hello'); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledWith({ + sessionKey: 'agent:main:main', + turnId: 'corr-route-context-marker', + user: expect.stringContaining('Context for this chat turn:'), + assistant: 'Reply!', + }); + }); + it('processInbound routeInboundMessage fallback does not collapse owner-like identities into the owner marker bucket', async () => { const routeInboundMessage = trackAsyncFn(async () => ({ correlationId: 'corr-route-ownerish', @@ -2141,7 +2171,7 @@ describe('DkgChannelPlugin', () => { const reply = await plugin.processInbound('Hello', 'corr-route-uppercase-session', 'owner'); await new Promise((resolve) => setTimeout(resolve, 10)); - expect((reply as any).SessionKey).toBeUndefined(); + expect((reply as any).SessionKey).toBe('agent:legacy:actual'); expect(markExternalTurnPersistedDurable).toHaveBeenCalledWith({ sessionKey: 'agent:legacy:actual', turnId: 'corr-route-uppercase-session', @@ -2349,7 +2379,7 @@ describe('DkgChannelPlugin', () => { expect(markExternalTurnPersistedDurable).toHaveBeenCalledWith({ sessionKey: 'session-1', turnId: 'corr-stream-runtime', - user: 'Hello', + user: expect.stringContaining('Attached Working Memory items:'), assistant: 'Streamed reply', }); }); @@ -2783,6 +2813,7 @@ describe('DkgChannelPlugin', () => { await expect(replyPromise).resolves.toEqual({ text: 'Reply before shutdown', correlationId: 'corr-stop-nonstream', + sessionKey: 'session-1', }); let stopSettled = false; From aaa5e91b74ed10fb532d8c3c999d3d78206c4ca1 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Fri, 1 May 2026 00:37:21 +0200 Subject: [PATCH 13/14] fix(openclaw): make marker migration retries idempotent --- .../adapter-openclaw/src/ChatTurnWriter.ts | 88 ++++++++++++++++--- .../test/ChatTurnWriter.test.ts | 45 +++++++++- 2 files changed, 117 insertions(+), 16 deletions(-) diff --git a/packages/adapter-openclaw/src/ChatTurnWriter.ts b/packages/adapter-openclaw/src/ChatTurnWriter.ts index 7bf165841..3f421023d 100644 --- a/packages/adapter-openclaw/src/ChatTurnWriter.ts +++ b/packages/adapter-openclaw/src/ChatTurnWriter.ts @@ -257,12 +257,15 @@ export class ChatTurnWriter { // persist's increment that landed in live during the write // window is preserved. On write failure live is unchanged — // concurrent persists keep their advances; nothing got wiped. + const destinationFileExisted = fs.existsSync(newWatermarkFilePath); + const destinationWm = new Map(); + const destinationBc = new Map(); + const destinationMarkers = new Map>(); const mergedWm = new Map(this.cachedWatermarks); const mergedBc = new Map(this.w4bSessionCounts); - const baseMarkers = this.cloneExternalTurnMarkers(this.externalTurnMarkers); const mergedMarkers = this.cloneExternalTurnMarkers(this.externalTurnMarkers); try { - if (fs.existsSync(newWatermarkFilePath)) { + if (destinationFileExisted) { const raw = fs.readFileSync(newWatermarkFilePath, "utf-8"); const parsed = JSON.parse(raw); if (parsed && typeof parsed === "object") { @@ -275,6 +278,11 @@ export class ChatTurnWriter { if (typeof obj.w === "number") w = obj.w; if (typeof obj.b === "number") b = obj.b; if (obj.m && typeof obj.m === "object" && !Array.isArray(obj.m)) { + this.mergeExternalTurnMarkers( + destinationMarkers, + key, + obj.m as Record, + ); this.mergeExternalTurnMarkers( mergedMarkers, key, @@ -282,6 +290,8 @@ export class ChatTurnWriter { ); } } + destinationWm.set(key, w); + destinationBc.set(key, b); mergedWm.set(key, Math.max(mergedWm.get(key) ?? -1, w)); mergedBc.set(key, Math.max(mergedBc.get(key) ?? 0, b)); } @@ -323,9 +333,8 @@ export class ChatTurnWriter { // persist advanced live's watermark during the write window, // its increment is preserved (max takes the higher of merged- // from-destination and post-flush-live). External markers are - // multiplicities, so only add the destination delta beyond the - // baseline snapshot; adding the whole merged snapshot would - // double-count markers that were already live before migration. + // exact daemon-success facts, so identical marker keys merge + // idempotently instead of adding counts. for (const [key, val] of mergedWm) { this.cachedWatermarks.set(key, Math.max(this.cachedWatermarks.get(key) ?? -1, val)); } @@ -335,9 +344,7 @@ export class ChatTurnWriter { for (const [key, markers] of mergedMarkers) { const live = this.externalTurnMarkers.get(key) ?? new Map(); for (const [marker, count] of markers) { - const baseCount = baseMarkers.get(key)?.get(marker) ?? 0; - const delta = count - baseCount; - if (delta > 0) live.set(marker, (live.get(marker) ?? 0) + delta); + if (count > 0) live.set(marker, Math.max(live.get(marker) ?? 0, count)); } if (live.size > 0) this.externalTurnMarkers.set(key, live); } @@ -358,6 +365,13 @@ export class ChatTurnWriter { { oldWatermarkFilePath: this.watermarkFilePath, newWatermarkFilePath }, ); } + this.restoreFailedMigrationDestination( + newWatermarkFilePath, + destinationFileExisted, + destinationWm, + destinationBc, + destinationMarkers, + ); } } // T45 - If the initial new-path write failed, live state is still @@ -684,17 +698,51 @@ export class ChatTurnWriter { this.externalTurnMarkerId(opts.turnId, opts.user, opts.assistant), ].filter(Boolean); if (!externalCursorKey || markers.length === 0) return; + const previousMarkerCounts = markers.map((marker) => ({ + marker, + count: this.externalTurnMarkers.get(externalCursorKey)?.get(marker) ?? 0, + })); for (const marker of markers) { this.restoreExternalTurnMarker(externalCursorKey, marker); } if (!this.commitWatermarkStateSync(externalCursorKey)) { - for (const marker of markers) { - this.consumeExternalTurnMarker(externalCursorKey, marker); + for (const previous of previousMarkerCounts) { + this.restoreExternalTurnMarkerCount(externalCursorKey, previous.marker, previous.count); } throw new Error("Failed to write external chat-turn marker"); } } + private restoreFailedMigrationDestination( + newWatermarkFilePath: string, + destinationFileExisted: boolean, + destinationWm: Map, + destinationBc: Map, + destinationMarkers: Map>, + ): void { + try { + if (destinationFileExisted) { + if (!this.writeWatermarkFile(newWatermarkFilePath, { + wm: destinationWm, + bc: destinationBc, + markers: destinationMarkers, + })) { + this.logger.warn?.( + "[ChatTurnWriter.setStateDir] Failed to restore destination file after migration rewrite failure.", + { newWatermarkFilePath }, + ); + } + } else if (fs.existsSync(newWatermarkFilePath)) { + fs.unlinkSync(newWatermarkFilePath); + } + } catch (err) { + this.logger.warn?.( + "[ChatTurnWriter.setStateDir] Failed to clean up destination file after migration rewrite failure.", + { err, newWatermarkFilePath }, + ); + } + } + /** * Track the reset promise on `pendingResets` so `onAgentEnd` / * `onMessageSent` can `await` it before processing a turn that arrived @@ -1638,10 +1686,26 @@ export class ChatTurnWriter { private restoreExternalTurnMarker(sessionKeyCursor: string, marker: string): void { if (!marker) return; const bucket = this.externalTurnMarkers.get(sessionKeyCursor) ?? new Map(); - bucket.set(marker, (bucket.get(marker) ?? 0) + 1); + bucket.set(marker, Math.max(bucket.get(marker) ?? 0, 1)); this.externalTurnMarkers.set(sessionKeyCursor, bucket); } + private restoreExternalTurnMarkerCount(sessionKeyCursor: string, marker: string, count: number): void { + if (!marker) return; + const bucket = this.externalTurnMarkers.get(sessionKeyCursor); + if (count > 0) { + const target = bucket ?? new Map(); + target.set(marker, count); + this.externalTurnMarkers.set(sessionKeyCursor, target); + return; + } + if (!bucket) return; + bucket.delete(marker); + if (bucket.size === 0) { + this.externalTurnMarkers.delete(sessionKeyCursor); + } + } + private cloneExternalTurnMarkers( source: Map>, ): Map> { @@ -1660,7 +1724,7 @@ export class ChatTurnWriter { const bucket = target.get(key) ?? new Map(); for (const [marker, count] of Object.entries(markers)) { if (typeof count === "number" && count > 0) { - bucket.set(marker, (bucket.get(marker) ?? 0) + count); + bucket.set(marker, Math.max(bucket.get(marker) ?? 0, count)); } } if (bucket.size > 0) target.set(key, bucket); diff --git a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts index 484644700..fa5f6337f 100644 --- a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts +++ b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts @@ -766,6 +766,34 @@ describe("ChatTurnWriter", () => { writeSpy.mockRestore(); }); + it("T105 - external marker write failure preserves a pre-existing exact marker", async () => { + await writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-preexisting", + user: "preexisting question", + assistant: "preexisting answer", + }); + + const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); + const marker = (writer as any).externalTurnMarkerId( + "node-ui-corr-preexisting", + "preexisting question", + "preexisting answer", + ); + expect((writer as any).externalTurnMarkers.get(externalCursorKey)?.get(marker)).toBe(1); + + const writeSpy = vi.spyOn(writer as any, "writeWatermarkFile").mockReturnValueOnce(false); + await expect(writer.markExternalTurnPersistedDurable({ + sessionKey: "agent:main:main", + turnId: "node-ui-corr-preexisting", + user: "preexisting question", + assistant: "preexisting answer", + })).rejects.toThrow("Failed to write external chat-turn marker"); + + expect((writer as any).externalTurnMarkers.get(externalCursorKey)?.get(marker)).toBe(1); + writeSpy.mockRestore(); + }); + it("T94 — external marker write failure preserves unrelated debounce timers", async () => { writer.onAgentEnd({ sessionId: "test", @@ -1092,7 +1120,7 @@ describe("ChatTurnWriter", () => { } }); - it("T97 - setStateDir adds external marker multiplicities", async () => { + it("T97 - setStateDir deduplicates exact external markers", async () => { const destinationStateDir = fs.mkdtempSync(path.join(os.tmpdir(), "chatturnwriter-dest-counts-")); try { const externalCursorKey = (writer as any).externalCursorKeyFromSessionKey("agent:main:main"); @@ -1111,12 +1139,12 @@ describe("ChatTurnWriter", () => { await writer.setStateDir(destinationStateDir); const bucket: Map | undefined = (writer as any).externalTurnMarkers.get(externalCursorKey); - expect(bucket?.get(marker)).toBe(2); + expect(bucket?.get(marker)).toBe(1); const persisted = JSON.parse(fs.readFileSync( path.join(destinationStateDir, "dkg-adapter", "chat-turn-watermarks.json"), "utf-8", )); - expect(persisted[externalCursorKey].m[marker]).toBe(2); + expect(persisted[externalCursorKey].m[marker]).toBe(1); } finally { fs.rmSync(destinationStateDir, { recursive: true, force: true }); } @@ -1182,6 +1210,9 @@ describe("ChatTurnWriter", () => { "final fail question", "final fail answer", ); + fs.writeFileSync(path.join(newDir, "chat-turn-watermarks.json"), JSON.stringify({ + [externalCursorKey]: { m: { [marker]: 1 } }, + })); const realWrite = dkw.writeWatermarkFile.bind(dkw); const writeSpy = vi.spyOn(dkw, "writeWatermarkFile") .mockImplementationOnce((target: string, override: any) => { @@ -1195,10 +1226,16 @@ describe("ChatTurnWriter", () => { expect(dkw.stateDir).toBe(originalStateDir); expect(dkw.watermarkFilePath).toBe(originalWatermarkFilePath); expect(dkw.externalTurnMarkers.get(externalCursorKey)?.get(marker)).toBe(1); - expect(writeSpy).toHaveBeenCalledTimes(3); + expect(writeSpy).toHaveBeenCalledTimes(4); const persistedOldPath = JSON.parse(fs.readFileSync(originalWatermarkFilePath, "utf-8")); expect(persistedOldPath[externalCursorKey].m[marker]).toBe(1); writeSpy.mockRestore(); + + await writer.setStateDir(destinationStateDir); + expect(dkw.stateDir).toBe(destinationStateDir); + expect(dkw.watermarkFilePath).toBe(path.join(newDir, "chat-turn-watermarks.json")); + const persistedNewPath = JSON.parse(fs.readFileSync(dkw.watermarkFilePath, "utf-8")); + expect(persistedNewPath[externalCursorKey].m[marker]).toBe(1); } finally { fs.rmSync(destinationStateDir, { recursive: true, force: true }); } From e8b666b6ec4af785bac1c5a9a93cfaf99ae09451 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Fri, 1 May 2026 00:51:18 +0200 Subject: [PATCH 14/14] Track initial channel marker writes during shutdown --- .../adapter-openclaw/src/DkgChannelPlugin.ts | 10 ++- .../adapter-openclaw/test/dkg-channel.test.ts | 61 +++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/packages/adapter-openclaw/src/DkgChannelPlugin.ts b/packages/adapter-openclaw/src/DkgChannelPlugin.ts index 1f19e68c4..2a15e42d4 100644 --- a/packages/adapter-openclaw/src/DkgChannelPlugin.ts +++ b/packages/adapter-openclaw/src/DkgChannelPlugin.ts @@ -1939,8 +1939,16 @@ export class DkgChannelPlugin { ): Promise { if (!this.chatTurnWriter) return; if (!opts.sessionKey) return; + const markerWrite = this.writeExternalTurnMarker(opts); + this.pendingMarkerPersistence.set(opts.correlationId, { + attempt: 1, + timer: null, + allowDuringShutdown, + opts, + inFlight: markerWrite, + }); try { - await this.writeExternalTurnMarker(opts); + await markerWrite; this.deletePendingMarkerPersistence(opts.correlationId); } catch (err: any) { this.scheduleExternalTurnMarkerRetry(opts, 1, allowDuringShutdown, err); diff --git a/packages/adapter-openclaw/test/dkg-channel.test.ts b/packages/adapter-openclaw/test/dkg-channel.test.ts index c4e696674..91a33c655 100644 --- a/packages/adapter-openclaw/test/dkg-channel.test.ts +++ b/packages/adapter-openclaw/test/dkg-channel.test.ts @@ -1776,6 +1776,67 @@ describe('DkgChannelPlugin', () => { } }); + it('stop should drain an in-flight initial ChatTurnWriter marker write', async () => { + vi.useFakeTimers(); + try { + let resolveInitialMarker!: () => void; + const { runtime } = makeMockRuntime({ + dispatchImpl: async (params) => { + await params.dispatcherOptions.deliver({ text: 'Persisted reply' }); + }, + }); + const mockCfg = { session: { dmScope: 'main' }, agents: {} }; + + const api = makeApi({ + logger: { info: trackFn(), warn: trackFn(), debug: trackFn() }, + } as any) as any; + api.runtime = runtime; + api.cfg = mockCfg; + client.storeChatTurn = async () => undefined as any; + const markExternalTurnPersistedDurable = vi.fn() + .mockImplementation(() => new Promise((resolve) => { resolveInitialMarker = resolve; })); + plugin.setChatTurnWriter({ markExternalTurnPersistedDurable } as any); + plugin.register(api); + + await plugin.processInbound('Already stored', 'corr-marker-initial-hang', 'owner'); + await vi.advanceTimersByTimeAsync(10); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(1); + const markerJob = (plugin as any).pendingMarkerPersistence.get('corr-marker-initial-hang'); + expect(markerJob).toMatchObject({ + attempt: 1, + timer: null, + allowDuringShutdown: true, + }); + expect(typeof markerJob.inFlight.then).toBe('function'); + + const stopPromise = plugin.stop(); + let stopSettled = false; + void stopPromise.then(() => { stopSettled = true; }); + await Promise.resolve(); + expect(stopSettled).toBe(false); + + await vi.advanceTimersByTimeAsync(1_500); + await Promise.resolve(); + expect(stopSettled).toBe(false); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(1); + + resolveInitialMarker(); + await stopPromise; + + expect(stopSettled).toBe(true); + expect(markExternalTurnPersistedDurable).toHaveBeenCalledTimes(1); + expect(markExternalTurnPersistedDurable).toHaveBeenLastCalledWith({ + sessionKey: 'session-1', + turnId: 'corr-marker-initial-hang', + user: 'Already stored', + assistant: 'Persisted reply', + }); + expect((plugin as any).pendingMarkerPersistence.size).toBe(0); + } finally { + vi.useRealTimers(); + } + }); + it('stop should force one final ChatTurnWriter marker flush before dropping timed-out marker jobs', async () => { vi.useFakeTimers(); try {