deepsense-ai · mkoruszowic · Apr 14, 2026 · Apr 19, 2026 · Apr 19, 2026 · Apr 19, 2026
diff --git a/examples/chat/file_explorer_agent.py b/examples/chat/file_explorer_agent.py
@@ -564,12 +564,11 @@ async def chat(  # noqa: PLR0912
         """
         Chat implementation with non-blocking confirmation support.
 
-        The agent will check context.tool_confirmations for any confirmations.
-        If a hook needs confirmation but hasn't been confirmed yet, it will
-        yield a ConfirmationRequest and exit. The frontend will then send a
-        new request with the confirmation in context.tool_confirmations.
+        Confirmed tools from the previous turn are executed directly by the chat layer
+        (via ``resolve_pending_confirmations``), then their results are injected into
+        history before the agent continues. The LLM never regenerates the tool call,
+        so the confirmation_id hash stays stable and there is no approval loop.
         """
-        # Create agent with history passed explicitly
         agent: Agent = Agent(
             llm=self.llm,
             prompt=f"""
@@ -592,10 +591,15 @@ async def chat(  # noqa: PLR0912
             history=history,
         )
 
-        # Create agent context with tool_confirmations from the request context
-        agent_context: AgentRunContext = AgentRunContext()
+        # Execute tools the user just approved/declined: mutates agent.history in place
+        # with synthetic (tool_use, tool_result) pairs so the LLM continues from the
+        # results instead of re-deciding the confirmed call.
+        for response in await self.resolve_pending_confirmations(agent, context):
+            yield response
 
-        # Pass tool_confirmations from ChatContext to AgentRunContext
+        # Forward tool_confirmations to the agent context — supports any legacy
+        # hash-matched confirmations for tools not routed through direct execution.
+        agent_context: AgentRunContext = AgentRunContext()
         if context.tool_confirmations:
             agent_context.tool_confirmations = context.tool_confirmations
 
@@ -616,7 +620,10 @@ async def chat(  # noqa: PLR0912
                     yield self.create_live_update(response.id, LiveUpdateType.START, f"🔧 {response.name}")
 
                 case ConfirmationRequest():
-                    # Confirmation needed - send to frontend and wait for user response
+                    # Persist the pending confirmation so the next turn can resolve it
+                    # directly (via resolve_pending_confirmations) instead of asking
+                    # the LLM to regenerate the tool call.
+                    yield self.create_state_update(self.create_pending_confirmation_state(response))
                     yield ConfirmationRequestResponse(content=ConfirmationRequestContent(confirmation_request=response))
 
                 case ToolCallResult():

diff --git a/packages/ragbits-agents/CHANGELOG.md b/packages/ragbits-agents/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 ## Unreleased
 
+- Fix tool-confirmation loop across conversation turns caused by LLM argument drift (#969). Adds `Agent.execute_tool_directly` and an `inject_tool_call` helper so chat layers can resume confirmed tools without re-prompting the LLM. `HookManager` also falls back to matching by `tool_name` when the exact `confirmation_id` hash misses. `ConfirmationRequest` now carries `tool_call_id`.
+
 ## 1.6.2 (2026-03-26)
 
 - ragbits-core updated to version v1.6.2

diff --git a/packages/ragbits-agents/src/ragbits/agents/_main.py b/packages/ragbits-agents/src/ragbits/agents/_main.py
@@ -1,4 +1,5 @@
 import asyncio
+import json
 import types
 import uuid
 import warnings
@@ -1061,6 +1062,57 @@ async def _process_tool_output(
         else:
             yield ToolReturn(value=tool_output, metadata=None)
 
+    async def execute_tool_directly(
+        self,
+        tool_call_id: str,
+        tool_name: str,
+        arguments: dict[str, Any],
+        context: AgentRunContext,
+    ) -> ToolCallResult:
+        """
+        Execute a tool with caller-supplied arguments, returning its final result.
+
+        Intended for chat layers resuming after a user confirmation: rather than asking
+        the LLM to regenerate the tool call (which risks argument drift and a broken
+        confirmation_id match), the chat layer stores the pre-confirmation arguments
+        and replays them directly through this method.
+
+        PRE_TOOL hooks still run. If a hook requests confirmation, the caller is
+        responsible for having populated ``context.tool_confirmations`` with the
+        matching approval so the existing hash-match path resolves to ``pass``.
+        A ``deny`` decision from any PRE_TOOL hook is respected and short-circuits
+        execution. POST_TOOL hooks run on success.
+
+        Args:
+            tool_call_id: Identifier to attach to the resulting ``ToolCallResult``.
+            tool_name: Name of the tool to invoke.
+            arguments: Arguments to pass to the tool (should be the original
+                pre-confirmation arguments to keep the confirmation_id stable).
+            context: Agent run context, including any prior ``tool_confirmations``.
+
+        Returns:
+            The ``ToolCallResult`` yielded by the tool execution path.
+
+        Raises:
+            AgentToolNotAvailableError: If the tool is not registered on this agent.
+        """
+        tools_mapping = await self._get_all_tools()
+        # ToolCall declares arguments as dict but has a "before" validator that
+        # json.loads strings, so we pass the serialized form to satisfy the validator.
+        tool_call = ToolCall(
+            id=tool_call_id,
+            type="function",
+            name=tool_name,
+            arguments=json.dumps(arguments),  # type: ignore[arg-type]
+        )
+        result: ToolCallResult | None = None
+        async for item in self._execute_tool(tool_call=tool_call, tools_mapping=tools_mapping, context=context):
+            if isinstance(item, ToolCallResult):
+                result = item
+        if result is None:
+            raise RuntimeError(f"Tool {tool_name!r} produced no ToolCallResult")
+        return result
+
     async def _execute_tool(
         self,
         tool_call: ToolCall,

diff --git a/packages/ragbits-agents/src/ragbits/agents/confirmation.py b/packages/ragbits-agents/src/ragbits/agents/confirmation.py
@@ -14,6 +14,9 @@ class ConfirmationRequest(BaseModel):
 
     confirmation_id: str
     """Unique identifier for this confirmation request."""
+    tool_call_id: str
+    """Identifier of the originating ToolCall — threads the tool_use and tool_result messages
+    when the chat layer resumes execution via Agent.execute_tool_directly."""
     tool_name: str
     """Name of the tool requiring confirmation."""
     tool_description: str

diff --git a/packages/ragbits-agents/src/ragbits/agents/history.py b/packages/ragbits-agents/src/ragbits/agents/history.py
@@ -0,0 +1,51 @@
+"""Helpers for manipulating agent conversation history (ChatFormat)."""
+
+import json
+from typing import Any
+
+from ragbits.core.prompt.base import ChatFormat
+
+
+def inject_tool_call(
+    history: ChatFormat,
+    tool_call_id: str,
+    tool_name: str,
+    arguments: dict[str, Any],
+    result: Any,  # noqa: ANN401
+) -> ChatFormat:
+    """
+    Append a synthetic (tool_use, tool_result) pair to a conversation history.
+
+    Used by the chat layer when it has executed a tool on the user's behalf
+    (e.g., after a confirmation was approved) and needs the LLM to see the
+    outcome without re-deciding the call itself.
+
+    The returned list is a shallow copy with two messages appended in OpenAI's
+    tool-use format — an ``assistant`` turn carrying the ``tool_calls`` block
+    and a ``tool`` turn carrying the result keyed by ``tool_call_id``.
+
+    Args:
+        history: Current conversation history. Not mutated.
+        tool_call_id: Identifier to thread the tool_use and tool messages.
+        tool_name: Name of the tool that was invoked.
+        arguments: Arguments the tool was invoked with.
+        result: Tool output. Coerced to ``str``.
+
+    Returns:
+        A new ChatFormat with the two messages appended.
+    """
+    return [
+        *history,
+        {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": tool_call_id,
+                    "type": "function",
+                    "function": {"name": tool_name, "arguments": json.dumps(arguments)},
+                }
+            ],
+        },
+        {"role": "tool", "tool_call_id": tool_call_id, "content": str(result)},
+    ]
diff --git a/packages/ragbits-agents/src/ragbits/agents/hooks/manager.py b/packages/ragbits-agents/src/ragbits/agents/hooks/manager.py
@@ -9,7 +9,7 @@
 import json
 from collections import defaultdict
 from collections.abc import AsyncGenerator
-from typing import TYPE_CHECKING, Generic, Literal, overload
+from typing import TYPE_CHECKING, Any, Generic, Literal, overload
 
 from ragbits.agents.confirmation import ConfirmationRequest
 from ragbits.agents.hooks.base import Hook
@@ -111,6 +111,48 @@ def get_hooks(self, event_type: EventType, tool_name: str | None = None) -> list
 
         return [hook for hook in hooks if hook.matches_tool(tool_name)]
 
+    @staticmethod
+    def _compute_confirmation_id(hook_name: str, tool_name: str, arguments: dict[str, Any]) -> str:
+        """
+        Compute the confirmation_id for a given (hook, tool, arguments) triple.
+
+        Exposed so the chat layer (or tests) can reproduce the same id when resuming
+        a paused confirmation flow.
+        """
+        payload = f"{hook_name}:{tool_name}:{json.dumps(arguments, sort_keys=True)}"
+        return hashlib.sha256(payload.encode()).hexdigest()[:CONFIRMATION_ID_LENGTH]
+
+    @staticmethod
+    def _find_confirmation(
+        tool_confirmations: list[dict[str, Any]],
+        confirmation_id: str,
+        tool_name: str,
+    ) -> dict[str, Any] | None:
+        """
+        Find a matching confirmation entry.
+
+        Tries exact confirmation_id match first, then falls back to tool_name match.
+        The tool_name fallback handles cross-turn scenarios where the LLM regenerates
+        arguments with cosmetic differences, changing the hash.
+
+        Args:
+            tool_confirmations: List of confirmation entries from context
+            confirmation_id: The computed confirmation ID for this tool call
+            tool_name: The name of the tool being called
+
+        Returns:
+            The matching confirmation entry, or None if not found
+        """
+        for conf in tool_confirmations:
+            if conf.get("confirmation_id") == confirmation_id:
+                return conf
+
+        for conf in tool_confirmations:
+            if conf.get("tool_name") == tool_name:
+                return conf
+
+        return None
+
     async def execute_pre_tool(
         self,
         tool_call: ToolCall,
@@ -132,12 +174,11 @@ async def execute_pre_tool(
         current_tool_call = tool_call.model_copy()
 
         for hook in self.get_hooks(EventType.PRE_TOOL, tool_call.name):
-            # Generate confirmation_id: hash(hook_function_name + tool_name + arguments)
-            hook_name = hook.callback.__name__
-            confirmation_id_str = (
-                f"{hook_name}:{tool_call.name}:{json.dumps(current_tool_call.arguments, sort_keys=True)}"
+            confirmation_id = self._compute_confirmation_id(
+                hook_name=hook.callback.__name__,
+                tool_name=tool_call.name,
+                arguments=current_tool_call.arguments,
             )
-            confirmation_id = hashlib.sha256(confirmation_id_str.encode()).hexdigest()[:CONFIRMATION_ID_LENGTH]
 
             result: ToolCall = await hook.callback(current_tool_call)
 
@@ -148,28 +189,25 @@ async def execute_pre_tool(
                 return result, None
 
             elif result.decision == "ask":
-                # Check if already confirmed/declined in context
-                for conf in context.tool_confirmations:
-                    if conf.get("confirmation_id") == confirmation_id:
-                        if conf.get("confirmed"):
-                            # Approved → convert to "pass" and continue to next hook
-                            result = result.model_copy(update={"decision": "pass"})
-                            break
-                        else:
-                            # Declined → convert to "deny" and stop immediately
-                            return (
-                                result.model_copy(
-                                    update={
-                                        "decision": "deny",
-                                        "reason": "❌ Action declined by user",
-                                    }
-                                ),
-                                None,
-                            )
+                matched = self._find_confirmation(context.tool_confirmations, confirmation_id, tool_call.name)
+
+                if matched is not None:
+                    if matched.get("confirmed"):
+                        result = result.model_copy(update={"decision": "pass"})
+                    else:
+                        return (
+                            result.model_copy(
+                                update={
+                                    "decision": "deny",
+                                    "reason": "❌ Action declined by user",
+                                }
+                            ),
+                            None,
+                        )
                 else:
-                    # Not in context → return "ask" with ConfirmationRequest
                     confirmation_request = ConfirmationRequest(
                         confirmation_id=confirmation_id,
+                        tool_call_id=tool_call.id,
                         tool_name=tool_call.name,
                         tool_description=result.reason,  # type: ignore[arg-type]  # guaranteed non-None by ValueError check above
                         arguments=current_tool_call.arguments,

diff --git a/packages/ragbits-agents/tests/unit/hooks/test_manager.py b/packages/ragbits-agents/tests/unit/hooks/test_manager.py
@@ -140,6 +140,73 @@ async def test_ask_with_prior_confirmation(self, tool_call: ToolCall, ask_hook:
         result, _ = await manager.execute_pre_tool(tool_call, ctx_declined)
         assert result.decision == "deny"
 
+    @pytest.mark.asyncio
+    async def test_ask_with_tool_name_fallback_approved(self, tool_call: ToolCall, ask_hook: PreToolCallback):
+        """When confirmation_id doesn't match (cross-turn hash drift), fall back to tool_name match."""
+        manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)])
+
+        # Simulate cross-turn: frontend sends back tool_name but with a stale confirmation_id
+        ctx: AgentRunContext = AgentRunContext(
+            tool_confirmations=[
+                {"confirmation_id": "stale_id_from_previous_turn", "tool_name": "test_tool", "confirmed": True}
+            ]
+        )
+        result, confirmation = await manager.execute_pre_tool(tool_call, ctx)
+
+        assert result.decision == "pass"
+        assert confirmation is None
+
+    @pytest.mark.asyncio
+    async def test_ask_with_tool_name_fallback_declined(self, tool_call: ToolCall, ask_hook: PreToolCallback):
+        """When confirmation_id doesn't match but tool_name does and user declined."""
+        manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)])
+
+        ctx: AgentRunContext = AgentRunContext(
+            tool_confirmations=[
+                {"confirmation_id": "stale_id_from_previous_turn", "tool_name": "test_tool", "confirmed": False}
+            ]
+        )
+        result, confirmation = await manager.execute_pre_tool(tool_call, ctx)
+
+        assert result.decision == "deny"
+        assert confirmation is None
+
+    @pytest.mark.asyncio
+    async def test_exact_confirmation_id_takes_priority_over_tool_name(
+        self, tool_call: ToolCall, ask_hook: PreToolCallback
+    ):
+        """Exact confirmation_id match should be used even if a tool_name entry also exists."""
+        manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)])
+        exact_id = make_confirmation_id("ask_hook", "test_tool", {"arg1": "value1"})
+
+        ctx: AgentRunContext = AgentRunContext(
+            tool_confirmations=[
+                # tool_name match says declined
+                {"confirmation_id": "wrong_id", "tool_name": "test_tool", "confirmed": False},
+                # exact confirmation_id match says approved — should win
+                {"confirmation_id": exact_id, "confirmed": True},
+            ]
+        )
+        result, _ = await manager.execute_pre_tool(tool_call, ctx)
+
+        assert result.decision == "pass"
+
+    @pytest.mark.asyncio
+    async def test_tool_name_fallback_does_not_match_different_tool(
+        self, tool_call: ToolCall, ask_hook: PreToolCallback
+    ):
+        """tool_name fallback should not match confirmations for a different tool."""
+        manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)])
+
+        ctx: AgentRunContext = AgentRunContext(
+            tool_confirmations=[{"confirmation_id": "some_id", "tool_name": "other_tool", "confirmed": True}]
+        )
+        result, confirmation = await manager.execute_pre_tool(tool_call, ctx)
+
+        assert result.decision == "ask"
+        assert confirmation is not None
+        assert confirmation.tool_name == "test_tool"
+
     @pytest.mark.asyncio
     async def test_chaining(
         self, tool_call: ToolCall, context: AgentRunContext, pre_tool_add_field: Callable[..., PreToolCallback]