From 91a901818ad8b943c4b27283748b4d80904c894a Mon Sep 17 00:00:00 2001
From: olivrg <olivrg@gmail.com>
Date: Wed, 17 Jun 2026 19:14:10 +0100
Subject: [PATCH 1/2] fix(correlation): fail closed on runId-lane ambiguity;
 ASCII key separator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Review finding (blocker): runId is per-turn (shared across a turn's tool calls), so two calls in one turn without a toolCallId collided on run:<runId> and the 2nd silently overwrote the binding. The registry now distinguishes lanes (toolCall/run/none) — only toolCallId is per-call-unique (full concurrency); the runId lane is fail-closed on ambiguity like the no-ID lane. Also (minor) replace the raw NUL key separator with JSON.stringify([session,toolName]).
---
 src/correlation/registry.ts        | Bin 3580 -> 4088 bytes
 tests/correlation/registry.test.ts |  31 ++++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/src/correlation/registry.ts b/src/correlation/registry.ts
index 4fc3921f9f4d5650e1a84171f64a4743de6f2a60..664c8b300e14e950a7a170741238e40e6f9ab5f5 100644
GIT binary patch
delta 833
zcmZWn!HN?>5G65@Yy|bTs4H7~aFURj96YRS$Vm|*x<Ld*L`Wx7Ng6xdak^*M0hje8
zcyn!kz`NifS3hDA@ha{=2p&YN?j#yEx9OUydhgYHzc2mTe0y}zks^bxQA*roLbT&9
za19J5n1p>QgsJ4uQ-o)jL1mB%0evO=X_BDqXVnP0Nh)cm3^g}bb7Lk=WDP2M#1f35
zmjUi#l7WK&w-UGnB@x&VrP%=2UIiv&Fe3!1!uBl?jQ(?d`0?sy6MD*wXiyTe5In{@
zXf-Pmz~PSz0LQQ16Xk!zjANsb-mwR?k8>OLMQ-a9)u*3w_wt1ZY7Kjozih7;J{&X}
z5G?cuPQ^*>tGb&IknX^<4kU>QmI=b~>vu3<oa73nF^<Y6n;_hEqFT#XFWs6mw1+<Y
z7W<`eqwK_Q`)<2lynlZ7{Lj$p;_ceJ=t=CC0Z7B02c4aeTv0LlOHLS1=u-B3aXrtq
z{aM^<xt2(qgv%ixWR)jlG%V7nHCM6@{^bs_!^Zfuy5FsPAGYV*rjJg4&YdNi^GJjg
z345{BDL5{6TzcaU>C@zVTpY9=FU)iin&n|NT|91r4-!-s2|5u8(&$w>u;p9(Ffb5&
z7as!U$pvkM36dHnHSGCjsSLzivvH3PQ)Y10Ah(?lpyJnGX)U{C|CGL-^Yxaid!MG?
I)~*%)0=TOiivR!s

delta 387
zcmZvYy-EW?6ot(a6O&+J6A%keA(Ci*x(G?IQG|d-1?@B0y=&I&&V-p+L?qa1r_O8G
zrM2=2M8wyywXkqDMzF9v=U(o2&i!2dTDf~!IvEhSsCIS{GENATc}y)|H^DR_p)`aw
zW3DB0hvdO!VVK7TB6f+s-RAJa;B25VB8gr}ky3%+6=)&CE{mw#Yk|QP+>NCquA#^Z
z&tf3yoLo&_#o{e1P}RPOxmQch8^>u(TF!w}L`lWcXRc-YLgI)?jh(W&Nk5zS`Nkp_
z9j}SLr4i$%m^m)|I6ey%nTyO``;W#r57g`Ep6=TmBPN-)Cx=8M^?&w<?AB&Pn$>2#
z#&c^|Q~g_S+)L6Y5wwhHqO(t_?Ce=)J9n5)X68P3yAb#&O~pt4ZR$>53+w4``o)E9

diff --git a/tests/correlation/registry.test.ts b/tests/correlation/registry.test.ts
index 87635b7..e384c16 100644
--- a/tests/correlation/registry.test.ts
+++ b/tests/correlation/registry.test.ts
@@ -86,13 +86,42 @@ describe('CorrelationRegistry', () => {
       expect(reg.reserve({ session: 'oc:s1', toolName: 'send' }).ok).toBe(true)
     })
 
-    it('does not apply the ambiguity block to ID lanes (full concurrency)', () => {
+    it('does not apply the ambiguity block to the toolCallId lane (per-call ids → full concurrency)', () => {
       const reg = new CorrelationRegistry()
       expect(reg.reserve({ toolCallId: 'tc-1', session: 'oc:s1', toolName: 'send' }).ok).toBe(true)
       expect(reg.reserve({ toolCallId: 'tc-2', session: 'oc:s1', toolName: 'send' }).ok).toBe(true)
     })
   })
 
+  describe('runId lane fail-closed on ambiguity (runId is per-turn, not per-call)', () => {
+    it('blocks a second call with the same runId and no toolCallId', () => {
+      const reg = new CorrelationRegistry()
+
+      const first = reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' })
+      const second = reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' })
+
+      expect(first.ok).toBe(true)
+      expect(second).toEqual({ ok: false, reason: 'ambiguous' })
+      expect(reg.stats.ambiguityBlocks).toBe(1)
+    })
+
+    it('does not block distinct runIds', () => {
+      const reg = new CorrelationRegistry()
+      expect(reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' }).ok).toBe(true)
+      expect(reg.reserve({ runId: 'run-2', session: 'oc:s1', toolName: 'send' }).ok).toBe(true)
+    })
+
+    it('frees the runId slot after claim, allowing the next sequential call in the turn', () => {
+      const reg = new CorrelationRegistry()
+      const r = reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' })
+      if (!r.ok) throw new Error('reserve failed')
+      reg.bind(r.ticket, 'eval-1')
+      reg.claim({ runId: 'run-1', session: 'oc:s1', toolName: 'send' })
+
+      expect(reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' }).ok).toBe(true)
+    })
+  })
+
   describe('TTL eviction', () => {
     it('evicts an unclaimed entry after the TTL and frees its no-ID slot', () => {
       let clock = 0

From 1ebf4db6076eccbcb5bcc45894d8a5c71e7c5c9d Mon Sep 17 00:00:00 2001
From: olivrg <olivrg@gmail.com>
Date: Wed, 17 Jun 2026 19:14:13 +0100
Subject: [PATCH 2/2] fix(hooks): surface failed approval recording; document
 the upstream enforcement gap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Review findings: (1) onResolution ignored a { ok:false } from resolveApproval — it now throws so the failure is surfaced. The installed OpenClaw runtime calls onResolution fire-and-forget (notifyPluginApprovalResolution), so the throw is logged but does NOT gate execution; this enforcement gap is not adapter-fixable and is documented as a known cooperative-grade limitation / upstream blocker in AGENTS.md + README. (2) Reworded the ambiguity block reason to 'Helio cannot correlate ambiguous concurrent tool calls' (covers runId + no-ID collisions).
---
 AGENTS.md                            | 23 ++++++++++++++++++++---
 README.md                            | 12 ++++++++++++
 src/hooks/before-tool-call.ts        | 13 +++++++++++--
 tests/hooks/before-tool-call.test.ts | 18 +++++++++++++++++-
 4 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index de8ac63..6b49b35 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -84,15 +84,32 @@ peer-installed here):
    proceed on a failed decision (not even dev-only). This is a normative MUST in the contract. Pair
    with a short bounded `/evaluate` timeout. Do **not** rely on OpenClaw's `failurePolicyByHook`.
 2. **Correlation is fail-closed on ambiguity.** Bind `after_tool_call` → `evaluation_id` by a key
-   chain: `toolCallId` → `runId` → a **no-ID lane** keyed by `(session, toolName)`. The no-ID lane
-   allows **at most one** in-flight call per `(session, toolName)`; a 2nd concurrent untracked call
-   is **blocked**, not ambiguously bound. TTL-evict unclaimed keys. Telemetry on no-ID-lane use.
+   chain: `toolCallId` → `runId` → a **no-ID lane** keyed by `(session, toolName)`. **Only
+   `toolCallId` is per-call-unique** (full concurrency). **`runId` is _per-turn_** (shared by every
+   tool call in one agent turn), and the no-ID lane is keyed by `(session, toolName)` — so **both
+   the `runId` lane and the no-ID lane are fail-closed on ambiguity**: at most one in-flight call per
+   key, and a 2nd concurrent call on a non-unique key is **blocked**, not ambiguously bound.
+   TTL-evict unclaimed keys. Telemetry on no-ID-lane use + ambiguity blocks.
 3. **Evidence is success-only**, attached on `after_tool_call` (`status:"success"`), extracted from
    `event.result` per explicit config (not magic). Helio soft-drops bad entries and still finalizes.
 4. **A late `/audit` returning `200 already_finalized` is SUCCESS**, not an error (the decision was
    terminal at `/evaluate`).
 5. **Single adapter token** (`HELIO_ADAPTER_TOKEN`); never the SDK token; never `Origin`.
 
+### Known limitations (cooperative `host-enforced` grade)
+
+- **Approval-resolution recording is not enforceable adapter-side.** When `require_approval`
+  resolves, the adapter records it via `POST /approval/:id/resolve` inside the hook's
+  `onResolution` callback. The installed OpenClaw runtime invokes that callback **fire-and-forget**
+  (`notifyPluginApprovalResolution`: `Promise.resolve(onResolution(...)).catch(log.warn)`), so a
+  throw/rejection is only **logged** — it does **not** gate execution. If `/approval/:id/resolve`
+  fails (sideband down), the tool can still run without Helio recording the resolution. The adapter
+  throws to surface it in the host log (best-effort), and it later shows up as a Helio audit anomaly
+  (`approval_unresolved` → `evaluation_expired`), but it cannot be **prevented** here. Fixing this
+  needs an upstream change (an awaited/​deny-capable approval callback or an alternate gating API).
+  **Tracked as an upstream blocker.** Note: this does not affect the `/evaluate` fail-closed
+  guarantee (§4.1), which the host _does_ enforce via `block`.
+
 ## 5. Conventions & working norms
 
 - **Package manager: pnpm** (matches the Helio org). Strict TypeScript, ESM, `tsup` build → `dist/`.
diff --git a/README.md b/README.md
index 173e2d9..27f3f08 100644
--- a/README.md
+++ b/README.md
@@ -92,6 +92,18 @@ adapter never sends an `Origin` header (Helio's browser-forgery guard rejects it
 
 See [`docs/adapter-api.md`](./docs/adapter-api.md) for the canonical sideband wire contract.
 
+## Known limitations
+
+This is the cooperative `host-enforced` enforcement grade — it relies on OpenClaw's hook gate.
+The `/evaluate` decision **is** enforced (a `block` result prevents the tool from running). One
+known gap: when an approval resolves, the adapter records it via `POST /approval/:id/resolve` in
+the hook's `onResolution` callback, but the OpenClaw runtime invokes that callback
+**fire-and-forget** (failures are logged, not awaited). So if recording the resolution fails (e.g.
+the Helio sideband is down) the tool can still run without Helio recording the approval. The
+adapter surfaces the failure (host log; and Helio's audit later flags it as `approval_unresolved` →
+`evaluation_expired`), but cannot **prevent** it adapter-side — closing this needs an upstream
+OpenClaw change. Tracked upstream.
+
 ## Development
 
 ```sh
diff --git a/src/hooks/before-tool-call.ts b/src/hooks/before-tool-call.ts
index b291692..73adfab 100644
--- a/src/hooks/before-tool-call.ts
+++ b/src/hooks/before-tool-call.ts
@@ -74,7 +74,7 @@ export function createBeforeToolCallHook(deps: BeforeToolCallDeps) {
       ...(event.runId !== undefined ? { runId: event.runId } : {}),
     })
     if (!reservation.ok) {
-      return { block: true, blockReason: 'Helio cannot correlate concurrent untracked tool calls' }
+      return { block: true, blockReason: 'Helio cannot correlate ambiguous concurrent tool calls' }
     }
 
     const outcome = await client.evaluate(buildRequest(event, ctx, session))
@@ -118,10 +118,19 @@ export function createBeforeToolCallHook(deps: BeforeToolCallDeps) {
             timeoutBehavior: 'deny',
             ...(approval.timeout_ms !== undefined ? { timeoutMs: approval.timeout_ms } : {}),
             onResolution: async (decision) => {
-              await client.resolveApproval(approval.id, {
+              const outcome = await client.resolveApproval(approval.id, {
                 ...mapResolution(decision),
                 resolved_by: resolvedBy,
               })
+              if (!outcome.ok) {
+                // Best-effort signal only. The installed OpenClaw runtime invokes onResolution
+                // fire-and-forget (`Promise.resolve(onResolution(...)).catch(log.warn)`), so this
+                // throw is logged by the host but does NOT gate execution — a failed resolution
+                // recording cannot be enforced adapter-side today. See AGENTS.md "Known limitations";
+                // tracked as an upstream blocker. The gap still surfaces (host warn log, and later a
+                // Helio audit anomaly: approval_unresolved → evaluation_expired).
+                throw new Error(`Helio could not record the approval resolution: ${outcome.reason}`)
+              }
             },
           },
         }
diff --git a/tests/hooks/before-tool-call.test.ts b/tests/hooks/before-tool-call.test.ts
index 35f9e1e..9050305 100644
--- a/tests/hooks/before-tool-call.test.ts
+++ b/tests/hooks/before-tool-call.test.ts
@@ -154,6 +154,22 @@ describe('before_tool_call', () => {
     })
   })
 
+  // NOTE: the throw is a best-effort signal — the host only logs it (see hook comment), it does
+  // not gate execution. The test asserts the adapter surfaces the failure rather than swallowing it.
+  it('onResolution surfaces a failed approval-resolution recording (does not swallow it)', async () => {
+    const { hook, resolveApproval } = setup({
+      ok: true,
+      response: { evaluation_id: 'e', decision: 'require_approval', approval: { id: 'appr-1' } },
+    })
+    resolveApproval.mockResolvedValue({ ok: false, reason: 'sideband down' })
+
+    const result = await hook(event(), ctx())
+
+    await expect(result.requireApproval?.onResolution?.('allow-once')).rejects.toThrow(
+      /could not record/i,
+    )
+  })
+
   it('fails closed and releases the reservation when evaluate fails', async () => {
     const { hook, registry } = setup({ ok: false, reason: 'down' })
 
@@ -173,7 +189,7 @@ describe('before_tool_call', () => {
 
     expect(second).toEqual({
       block: true,
-      blockReason: 'Helio cannot correlate concurrent untracked tool calls',
+      blockReason: 'Helio cannot correlate ambiguous concurrent tool calls',
     })
     await first
   })