gethelio · olivrg · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -84,15 +84,32 @@ peer-installed here):
    proceed on a failed decision (not even dev-only). This is a normative MUST in the contract. Pair
    with a short bounded `/evaluate` timeout. Do **not** rely on OpenClaw's `failurePolicyByHook`.
 2. **Correlation is fail-closed on ambiguity.** Bind `after_tool_call` → `evaluation_id` by a key
-   chain: `toolCallId` → `runId` → a **no-ID lane** keyed by `(session, toolName)`. The no-ID lane
-   allows **at most one** in-flight call per `(session, toolName)`; a 2nd concurrent untracked call
-   is **blocked**, not ambiguously bound. TTL-evict unclaimed keys. Telemetry on no-ID-lane use.
+   chain: `toolCallId` → `runId` → a **no-ID lane** keyed by `(session, toolName)`. **Only
+   `toolCallId` is per-call-unique** (full concurrency). **`runId` is _per-turn_** (shared by every
+   tool call in one agent turn), and the no-ID lane is keyed by `(session, toolName)` — so **both
+   the `runId` lane and the no-ID lane are fail-closed on ambiguity**: at most one in-flight call per
+   key, and a 2nd concurrent call on a non-unique key is **blocked**, not ambiguously bound.
+   TTL-evict unclaimed keys. Telemetry on no-ID-lane use + ambiguity blocks.
 3. **Evidence is success-only**, attached on `after_tool_call` (`status:"success"`), extracted from
    `event.result` per explicit config (not magic). Helio soft-drops bad entries and still finalizes.
 4. **A late `/audit` returning `200 already_finalized` is SUCCESS**, not an error (the decision was
    terminal at `/evaluate`).
 5. **Single adapter token** (`HELIO_ADAPTER_TOKEN`); never the SDK token; never `Origin`.
 
+### Known limitations (cooperative `host-enforced` grade)
+
+- **Approval-resolution recording is not enforceable adapter-side.** When `require_approval`
+  resolves, the adapter records it via `POST /approval/:id/resolve` inside the hook's
+  `onResolution` callback. The installed OpenClaw runtime invokes that callback **fire-and-forget**
+  (`notifyPluginApprovalResolution`: `Promise.resolve(onResolution(...)).catch(log.warn)`), so a
+  throw/rejection is only **logged** — it does **not** gate execution. If `/approval/:id/resolve`
+  fails (sideband down), the tool can still run without Helio recording the resolution. The adapter
+  throws to surface it in the host log (best-effort), and it later shows up as a Helio audit anomaly
+  (`approval_unresolved` → `evaluation_expired`), but it cannot be **prevented** here. Fixing this
+  needs an upstream change (an awaited/deny-capable approval callback or an alternate gating API).
+  **Tracked as an upstream blocker.** Note: this does not affect the `/evaluate` fail-closed
+  guarantee (§4.1), which the host _does_ enforce via `block`.
+
 ## 5. Conventions & working norms
 
 - **Package manager: pnpm** (matches the Helio org). Strict TypeScript, ESM, `tsup` build → `dist/`.

diff --git a/README.md b/README.md
@@ -92,6 +92,18 @@ adapter never sends an `Origin` header (Helio's browser-forgery guard rejects it
 
 See [`docs/adapter-api.md`](./docs/adapter-api.md) for the canonical sideband wire contract.
 
+## Known limitations
+
+This is the cooperative `host-enforced` enforcement grade — it relies on OpenClaw's hook gate.
+The `/evaluate` decision **is** enforced (a `block` result prevents the tool from running). One
+known gap: when an approval resolves, the adapter records it via `POST /approval/:id/resolve` in
+the hook's `onResolution` callback, but the OpenClaw runtime invokes that callback
+**fire-and-forget** (failures are logged, not awaited). So if recording the resolution fails (e.g.
+the Helio sideband is down) the tool can still run without Helio recording the approval. The
+adapter surfaces the failure (host log; and Helio's audit later flags it as `approval_unresolved` →
+`evaluation_expired`), but cannot **prevent** it adapter-side — closing this needs an upstream
+OpenClaw change. Tracked upstream.
+
 ## Development
 
 ```sh

diff --git a/src/correlation/registry.ts b/src/correlation/registry.ts
diff --git a/src/hooks/before-tool-call.ts b/src/hooks/before-tool-call.ts
@@ -74,7 +74,7 @@ export function createBeforeToolCallHook(deps: BeforeToolCallDeps) {
       ...(event.runId !== undefined ? { runId: event.runId } : {}),
     })
     if (!reservation.ok) {
-      return { block: true, blockReason: 'Helio cannot correlate concurrent untracked tool calls' }
+      return { block: true, blockReason: 'Helio cannot correlate ambiguous concurrent tool calls' }
     }
 
     const outcome = await client.evaluate(buildRequest(event, ctx, session))
@@ -118,10 +118,19 @@ export function createBeforeToolCallHook(deps: BeforeToolCallDeps) {
             timeoutBehavior: 'deny',
             ...(approval.timeout_ms !== undefined ? { timeoutMs: approval.timeout_ms } : {}),
             onResolution: async (decision) => {
-              await client.resolveApproval(approval.id, {
+              const outcome = await client.resolveApproval(approval.id, {
                 ...mapResolution(decision),
                 resolved_by: resolvedBy,
               })
+              if (!outcome.ok) {
+                // Best-effort signal only. The installed OpenClaw runtime invokes onResolution
+                // fire-and-forget (`Promise.resolve(onResolution(...)).catch(log.warn)`), so this
+                // throw is logged by the host but does NOT gate execution — a failed resolution
+                // recording cannot be enforced adapter-side today. See AGENTS.md "Known limitations";
+                // tracked as an upstream blocker. The gap still surfaces (host warn log, and later a
+                // Helio audit anomaly: approval_unresolved → evaluation_expired).
+                throw new Error(`Helio could not record the approval resolution: ${outcome.reason}`)
+              }
             },
           },
         }

diff --git a/tests/correlation/registry.test.ts b/tests/correlation/registry.test.ts
@@ -86,13 +86,42 @@ describe('CorrelationRegistry', () => {
       expect(reg.reserve({ session: 'oc:s1', toolName: 'send' }).ok).toBe(true)
     })
 
-    it('does not apply the ambiguity block to ID lanes (full concurrency)', () => {
+    it('does not apply the ambiguity block to the toolCallId lane (per-call ids → full concurrency)', () => {
       const reg = new CorrelationRegistry()
       expect(reg.reserve({ toolCallId: 'tc-1', session: 'oc:s1', toolName: 'send' }).ok).toBe(true)
       expect(reg.reserve({ toolCallId: 'tc-2', session: 'oc:s1', toolName: 'send' }).ok).toBe(true)
     })
   })
 
+  describe('runId lane fail-closed on ambiguity (runId is per-turn, not per-call)', () => {
+    it('blocks a second call with the same runId and no toolCallId', () => {
+      const reg = new CorrelationRegistry()
+
+      const first = reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' })
+      const second = reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' })
+
+      expect(first.ok).toBe(true)
+      expect(second).toEqual({ ok: false, reason: 'ambiguous' })
+      expect(reg.stats.ambiguityBlocks).toBe(1)
+    })
+
+    it('does not block distinct runIds', () => {
+      const reg = new CorrelationRegistry()
+      expect(reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' }).ok).toBe(true)
+      expect(reg.reserve({ runId: 'run-2', session: 'oc:s1', toolName: 'send' }).ok).toBe(true)
+    })
+
+    it('frees the runId slot after claim, allowing the next sequential call in the turn', () => {
+      const reg = new CorrelationRegistry()
+      const r = reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' })
+      if (!r.ok) throw new Error('reserve failed')
+      reg.bind(r.ticket, 'eval-1')
+      reg.claim({ runId: 'run-1', session: 'oc:s1', toolName: 'send' })
+
+      expect(reg.reserve({ runId: 'run-1', session: 'oc:s1', toolName: 'send' }).ok).toBe(true)
+    })
+  })
+
   describe('TTL eviction', () => {
     it('evicts an unclaimed entry after the TTL and frees its no-ID slot', () => {
       let clock = 0

diff --git a/tests/hooks/before-tool-call.test.ts b/tests/hooks/before-tool-call.test.ts
@@ -154,6 +154,22 @@ describe('before_tool_call', () => {
     })
   })
 
+  // NOTE: the throw is a best-effort signal — the host only logs it (see hook comment), it does
+  // not gate execution. The test asserts the adapter surfaces the failure rather than swallowing it.
+  it('onResolution surfaces a failed approval-resolution recording (does not swallow it)', async () => {
+    const { hook, resolveApproval } = setup({
+      ok: true,
+      response: { evaluation_id: 'e', decision: 'require_approval', approval: { id: 'appr-1' } },
+    })
+    resolveApproval.mockResolvedValue({ ok: false, reason: 'sideband down' })
+
+    const result = await hook(event(), ctx())
+
+    await expect(result.requireApproval?.onResolution?.('allow-once')).rejects.toThrow(
+      /could not record/i,
+    )
+  })
+
   it('fails closed and releases the reservation when evaluate fails', async () => {
     const { hook, registry } = setup({ ok: false, reason: 'down' })
 
@@ -173,7 +189,7 @@ describe('before_tool_call', () => {
 
     expect(second).toEqual({
       block: true,
-      blockReason: 'Helio cannot correlate concurrent untracked tool calls',
+      blockReason: 'Helio cannot correlate ambiguous concurrent tool calls',
     })
     await first
   })