From 91a901818ad8b943c4b27283748b4d80904c894a Mon Sep 17 00:00:00 2001 From: olivrg Date: Wed, 17 Jun 2026 19:14:10 +0100 Subject: [PATCH 1/2] fix(correlation): fail closed on runId-lane ambiguity; ASCII key separator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review finding (blocker): runId is per-turn (shared across a turn's tool calls), so two calls in one turn without a toolCallId collided on run: and the 2nd silently overwrote the binding. The registry now distinguishes lanes (toolCall/run/none) — only toolCallId is per-call-unique (full concurrency); the runId lane is fail-closed on ambiguity like the no-ID lane. Also (minor) replace the raw NUL key separator with JSON.stringify([session,toolName]). --- src/correlation/registry.ts | Bin 3580 -> 4088 bytes tests/correlation/registry.test.ts | 31 ++++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/correlation/registry.ts b/src/correlation/registry.ts index 4fc3921f9f4d5650e1a84171f64a4743de6f2a60..664c8b300e14e950a7a170741238e40e6f9ab5f5 100644 GIT binary patch delta 833 zcmZWn!HN?>5G65@Yy|bTs4H7~aFURj96YRS$Vm|*xQgsJ4uQ-o)jL1mB%0evO=X_BDqXVnP0Nh)cm3^g}bb7Lk=WDP2M#1f35 zmjUi#l7WK&w-UGnB@x&VrP%=2UIiv&Fe3!1!uBl?jQ(?d`0?sy6MD*wXiyTe5In{@ zXf-Pmz~PSz0LQQ16Xk!zjANsb-mwR?k8>OLMQ-a9)u*3w_wt1ZY7Kjozih7;J{&X} z5G?cuPQ^*>tGb&IknX^<4kU>QmI=b~>vu3C@zVTpY9=FU)iin&n|NT|91r4-!-s2|5u8(&$w>u;p9(Ffb5& z7as!U$pvkM36dHnHSGCjsSLzivvH3PQ)Y10Ah(?lpyJnGX)U{C|CGL-^Yxaid!MG? I)~*%)0=TOiivR!s delta 387 zcmZvYy-EW?6ot(a6O&+J6A%keA(Ci*x(G?IQG|d-1?@B0y=&I&&V-p+L?qa1r_O8G zrM2=2M8wyywXkqDMzF9v=U(o2&i!2dTDf~!IvEhSsCIS{GENATc}y)|H^DR_p)`aw zW3DB0hvdO!VVK7TB6f+s-RAJa;B25VB8gr}ky3%+6=)&CE{mw#Yk|QP+>NCquA#^Z z&tf3yoLo&_#o{e1P}RPOxmQch8^>u(TF!w}L`lWcXRc-YLgI)?jh(W&Nk5zS`Nkp_ z9j}SLr4i$%m^m)|I6ey%nTyO``;W#r57g`Ep6=TmBPN-)Cx=8M^?&w2# z#&c^|Q~g_S+)L6Y5wwhHqO(t_?Ce=)J9n5)X68P3yAb#&O~pt4ZR$>53+w4``o)E9 diff --git a/tests/correlation/registry.test.ts b/tests/correlation/registry.test.ts index 87635b7..e384c16 100644 --- a/tests/correlation/registry.test.ts +++ b/tests/correlation/registry.test.ts @@ -86,13 +86,42 @@ describe('CorrelationRegistry', () => { expect(reg.reserve({ session: 'oc:s1', toolName: 'send' }).ok).toBe(true) }) - it('does not apply the ambiguity block to ID lanes (full concurrency)', () => { + it('does not apply the ambiguity block to the toolCallId lane (per-call ids → full concurrency)', () => { const reg = new CorrelationRegistry() expect(reg.reserve({ toolCallId: 'tc-1', session: 'oc:s1', toolName: 'send' }).ok).toBe(true) expect(reg.reserve({ toolCallId: 'tc-2', session: 'oc:s1', toolName: 'send' }).ok).toBe(true) }) }) + describe('runId lane fail-closed on ambiguity (runId is per-turn, not per-call)', () => { + it('blocks a second call with the same runId and no toolCallId', () => { + const reg = new CorrelationRegistry() + + const first = reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' }) + const second = reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' }) + + expect(first.ok).toBe(true) + expect(second).toEqual({ ok: false, reason: 'ambiguous' }) + expect(reg.stats.ambiguityBlocks).toBe(1) + }) + + it('does not block distinct runIds', () => { + const reg = new CorrelationRegistry() + expect(reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' }).ok).toBe(true) + expect(reg.reserve({ runId: 'run-2', session: 'oc:s1', toolName: 'send' }).ok).toBe(true) + }) + + it('frees the runId slot after claim, allowing the next sequential call in the turn', () => { + const reg = new CorrelationRegistry() + const r = reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' }) + if (!r.ok) throw new Error('reserve failed') + reg.bind(r.ticket, 'eval-1') + reg.claim({ runId: 'run-1', session: 'oc:s1', toolName: 'send' }) + + expect(reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' }).ok).toBe(true) + }) + }) + describe('TTL eviction', () => { it('evicts an unclaimed entry after the TTL and frees its no-ID slot', () => { let clock = 0 From 1ebf4db6076eccbcb5bcc45894d8a5c71e7c5c9d Mon Sep 17 00:00:00 2001 From: olivrg Date: Wed, 17 Jun 2026 19:14:13 +0100 Subject: [PATCH 2/2] fix(hooks): surface failed approval recording; document the upstream enforcement gap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review findings: (1) onResolution ignored a { ok:false } from resolveApproval — it now throws so the failure is surfaced. The installed OpenClaw runtime calls onResolution fire-and-forget (notifyPluginApprovalResolution), so the throw is logged but does NOT gate execution; this enforcement gap is not adapter-fixable and is documented as a known cooperative-grade limitation / upstream blocker in AGENTS.md + README. (2) Reworded the ambiguity block reason to 'Helio cannot correlate ambiguous concurrent tool calls' (covers runId + no-ID collisions). --- AGENTS.md | 23 ++++++++++++++++++++--- README.md | 12 ++++++++++++ src/hooks/before-tool-call.ts | 13 +++++++++++-- tests/hooks/before-tool-call.test.ts | 18 +++++++++++++++++- 4 files changed, 60 insertions(+), 6 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index de8ac63..6b49b35 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -84,15 +84,32 @@ peer-installed here): proceed on a failed decision (not even dev-only). This is a normative MUST in the contract. Pair with a short bounded `/evaluate` timeout. Do **not** rely on OpenClaw's `failurePolicyByHook`. 2. **Correlation is fail-closed on ambiguity.** Bind `after_tool_call` → `evaluation_id` by a key - chain: `toolCallId` → `runId` → a **no-ID lane** keyed by `(session, toolName)`. The no-ID lane - allows **at most one** in-flight call per `(session, toolName)`; a 2nd concurrent untracked call - is **blocked**, not ambiguously bound. TTL-evict unclaimed keys. Telemetry on no-ID-lane use. + chain: `toolCallId` → `runId` → a **no-ID lane** keyed by `(session, toolName)`. **Only + `toolCallId` is per-call-unique** (full concurrency). **`runId` is _per-turn_** (shared by every + tool call in one agent turn), and the no-ID lane is keyed by `(session, toolName)` — so **both + the `runId` lane and the no-ID lane are fail-closed on ambiguity**: at most one in-flight call per + key, and a 2nd concurrent call on a non-unique key is **blocked**, not ambiguously bound. + TTL-evict unclaimed keys. Telemetry on no-ID-lane use + ambiguity blocks. 3. **Evidence is success-only**, attached on `after_tool_call` (`status:"success"`), extracted from `event.result` per explicit config (not magic). Helio soft-drops bad entries and still finalizes. 4. **A late `/audit` returning `200 already_finalized` is SUCCESS**, not an error (the decision was terminal at `/evaluate`). 5. **Single adapter token** (`HELIO_ADAPTER_TOKEN`); never the SDK token; never `Origin`. +### Known limitations (cooperative `host-enforced` grade) + +- **Approval-resolution recording is not enforceable adapter-side.** When `require_approval` + resolves, the adapter records it via `POST /approval/:id/resolve` inside the hook's + `onResolution` callback. The installed OpenClaw runtime invokes that callback **fire-and-forget** + (`notifyPluginApprovalResolution`: `Promise.resolve(onResolution(...)).catch(log.warn)`), so a + throw/rejection is only **logged** — it does **not** gate execution. If `/approval/:id/resolve` + fails (sideband down), the tool can still run without Helio recording the resolution. The adapter + throws to surface it in the host log (best-effort), and it later shows up as a Helio audit anomaly + (`approval_unresolved` → `evaluation_expired`), but it cannot be **prevented** here. Fixing this + needs an upstream change (an awaited/​deny-capable approval callback or an alternate gating API). + **Tracked as an upstream blocker.** Note: this does not affect the `/evaluate` fail-closed + guarantee (§4.1), which the host _does_ enforce via `block`. + ## 5. Conventions & working norms - **Package manager: pnpm** (matches the Helio org). Strict TypeScript, ESM, `tsup` build → `dist/`. diff --git a/README.md b/README.md index 173e2d9..27f3f08 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,18 @@ adapter never sends an `Origin` header (Helio's browser-forgery guard rejects it See [`docs/adapter-api.md`](./docs/adapter-api.md) for the canonical sideband wire contract. +## Known limitations + +This is the cooperative `host-enforced` enforcement grade — it relies on OpenClaw's hook gate. +The `/evaluate` decision **is** enforced (a `block` result prevents the tool from running). One +known gap: when an approval resolves, the adapter records it via `POST /approval/:id/resolve` in +the hook's `onResolution` callback, but the OpenClaw runtime invokes that callback +**fire-and-forget** (failures are logged, not awaited). So if recording the resolution fails (e.g. +the Helio sideband is down) the tool can still run without Helio recording the approval. The +adapter surfaces the failure (host log; and Helio's audit later flags it as `approval_unresolved` → +`evaluation_expired`), but cannot **prevent** it adapter-side — closing this needs an upstream +OpenClaw change. Tracked upstream. + ## Development ```sh diff --git a/src/hooks/before-tool-call.ts b/src/hooks/before-tool-call.ts index b291692..73adfab 100644 --- a/src/hooks/before-tool-call.ts +++ b/src/hooks/before-tool-call.ts @@ -74,7 +74,7 @@ export function createBeforeToolCallHook(deps: BeforeToolCallDeps) { ...(event.runId !== undefined ? { runId: event.runId } : {}), }) if (!reservation.ok) { - return { block: true, blockReason: 'Helio cannot correlate concurrent untracked tool calls' } + return { block: true, blockReason: 'Helio cannot correlate ambiguous concurrent tool calls' } } const outcome = await client.evaluate(buildRequest(event, ctx, session)) @@ -118,10 +118,19 @@ export function createBeforeToolCallHook(deps: BeforeToolCallDeps) { timeoutBehavior: 'deny', ...(approval.timeout_ms !== undefined ? { timeoutMs: approval.timeout_ms } : {}), onResolution: async (decision) => { - await client.resolveApproval(approval.id, { + const outcome = await client.resolveApproval(approval.id, { ...mapResolution(decision), resolved_by: resolvedBy, }) + if (!outcome.ok) { + // Best-effort signal only. The installed OpenClaw runtime invokes onResolution + // fire-and-forget (`Promise.resolve(onResolution(...)).catch(log.warn)`), so this + // throw is logged by the host but does NOT gate execution — a failed resolution + // recording cannot be enforced adapter-side today. See AGENTS.md "Known limitations"; + // tracked as an upstream blocker. The gap still surfaces (host warn log, and later a + // Helio audit anomaly: approval_unresolved → evaluation_expired). + throw new Error(`Helio could not record the approval resolution: ${outcome.reason}`) + } }, }, } diff --git a/tests/hooks/before-tool-call.test.ts b/tests/hooks/before-tool-call.test.ts index 35f9e1e..9050305 100644 --- a/tests/hooks/before-tool-call.test.ts +++ b/tests/hooks/before-tool-call.test.ts @@ -154,6 +154,22 @@ describe('before_tool_call', () => { }) }) + // NOTE: the throw is a best-effort signal — the host only logs it (see hook comment), it does + // not gate execution. The test asserts the adapter surfaces the failure rather than swallowing it. + it('onResolution surfaces a failed approval-resolution recording (does not swallow it)', async () => { + const { hook, resolveApproval } = setup({ + ok: true, + response: { evaluation_id: 'e', decision: 'require_approval', approval: { id: 'appr-1' } }, + }) + resolveApproval.mockResolvedValue({ ok: false, reason: 'sideband down' }) + + const result = await hook(event(), ctx()) + + await expect(result.requireApproval?.onResolution?.('allow-once')).rejects.toThrow( + /could not record/i, + ) + }) + it('fails closed and releases the reservation when evaluate fails', async () => { const { hook, registry } = setup({ ok: false, reason: 'down' }) @@ -173,7 +189,7 @@ describe('before_tool_call', () => { expect(second).toEqual({ block: true, - blockReason: 'Helio cannot correlate concurrent untracked tool calls', + blockReason: 'Helio cannot correlate ambiguous concurrent tool calls', }) await first })