From ea2972e4de41d112ff6ca39ea6b0a048ad4d4670 Mon Sep 17 00:00:00 2001 From: mkoruszowic Date: Tue, 14 Apr 2026 20:02:26 +0200 Subject: [PATCH 1/8] fix confirmation hook matching across conversation turns with tool_name fallback --- .../src/ragbits/agents/hooks/manager.py | 67 +++++++++++++------ .../tests/unit/hooks/test_manager.py | 67 +++++++++++++++++++ 2 files changed, 114 insertions(+), 20 deletions(-) diff --git a/packages/ragbits-agents/src/ragbits/agents/hooks/manager.py b/packages/ragbits-agents/src/ragbits/agents/hooks/manager.py index 58550ce0f8..9accc90f1f 100644 --- a/packages/ragbits-agents/src/ragbits/agents/hooks/manager.py +++ b/packages/ragbits-agents/src/ragbits/agents/hooks/manager.py @@ -9,7 +9,7 @@ import json from collections import defaultdict from collections.abc import AsyncGenerator -from typing import TYPE_CHECKING, Generic, Literal, overload +from typing import TYPE_CHECKING, Any, Generic, Literal, overload from ragbits.agents.confirmation import ConfirmationRequest from ragbits.agents.hooks.base import Hook @@ -111,6 +111,37 @@ def get_hooks(self, event_type: EventType, tool_name: str | None = None) -> list return [hook for hook in hooks if hook.matches_tool(tool_name)] + @staticmethod + def _find_confirmation( + tool_confirmations: list[dict[str, Any]], + confirmation_id: str, + tool_name: str, + ) -> dict[str, Any] | None: + """ + Find a matching confirmation entry. + + Tries exact confirmation_id match first, then falls back to tool_name match. + The tool_name fallback handles cross-turn scenarios where the LLM regenerates + arguments with cosmetic differences, changing the hash. + + Args: + tool_confirmations: List of confirmation entries from context + confirmation_id: The computed confirmation ID for this tool call + tool_name: The name of the tool being called + + Returns: + The matching confirmation entry, or None if not found + """ + for conf in tool_confirmations: + if conf.get("confirmation_id") == confirmation_id: + return conf + + for conf in tool_confirmations: + if conf.get("tool_name") == tool_name: + return conf + + return None + async def execute_pre_tool( self, tool_call: ToolCall, @@ -148,26 +179,22 @@ async def execute_pre_tool( return result, None elif result.decision == "ask": - # Check if already confirmed/declined in context - for conf in context.tool_confirmations: - if conf.get("confirmation_id") == confirmation_id: - if conf.get("confirmed"): - # Approved → convert to "pass" and continue to next hook - result = result.model_copy(update={"decision": "pass"}) - break - else: - # Declined → convert to "deny" and stop immediately - return ( - result.model_copy( - update={ - "decision": "deny", - "reason": "❌ Action declined by user", - } - ), - None, - ) + matched = self._find_confirmation(context.tool_confirmations, confirmation_id, tool_call.name) + + if matched is not None: + if matched.get("confirmed"): + result = result.model_copy(update={"decision": "pass"}) + else: + return ( + result.model_copy( + update={ + "decision": "deny", + "reason": "❌ Action declined by user", + } + ), + None, + ) else: - # Not in context → return "ask" with ConfirmationRequest confirmation_request = ConfirmationRequest( confirmation_id=confirmation_id, tool_name=tool_call.name, diff --git a/packages/ragbits-agents/tests/unit/hooks/test_manager.py b/packages/ragbits-agents/tests/unit/hooks/test_manager.py index 7212b9a4c6..193988ae44 100644 --- a/packages/ragbits-agents/tests/unit/hooks/test_manager.py +++ b/packages/ragbits-agents/tests/unit/hooks/test_manager.py @@ -140,6 +140,73 @@ async def test_ask_with_prior_confirmation(self, tool_call: ToolCall, ask_hook: result, _ = await manager.execute_pre_tool(tool_call, ctx_declined) assert result.decision == "deny" + @pytest.mark.asyncio + async def test_ask_with_tool_name_fallback_approved(self, tool_call: ToolCall, ask_hook: PreToolCallback): + """When confirmation_id doesn't match (cross-turn hash drift), fall back to tool_name match.""" + manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)]) + + # Simulate cross-turn: frontend sends back tool_name but with a stale confirmation_id + ctx = AgentRunContext( + tool_confirmations=[ + {"confirmation_id": "stale_id_from_previous_turn", "tool_name": "test_tool", "confirmed": True} + ] + ) + result, confirmation = await manager.execute_pre_tool(tool_call, ctx) + + assert result.decision == "pass" + assert confirmation is None + + @pytest.mark.asyncio + async def test_ask_with_tool_name_fallback_declined(self, tool_call: ToolCall, ask_hook: PreToolCallback): + """When confirmation_id doesn't match but tool_name does and user declined.""" + manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)]) + + ctx = AgentRunContext( + tool_confirmations=[ + {"confirmation_id": "stale_id_from_previous_turn", "tool_name": "test_tool", "confirmed": False} + ] + ) + result, confirmation = await manager.execute_pre_tool(tool_call, ctx) + + assert result.decision == "deny" + assert confirmation is None + + @pytest.mark.asyncio + async def test_exact_confirmation_id_takes_priority_over_tool_name( + self, tool_call: ToolCall, ask_hook: PreToolCallback + ): + """Exact confirmation_id match should be used even if a tool_name entry also exists.""" + manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)]) + exact_id = make_confirmation_id("ask_hook", "test_tool", {"arg1": "value1"}) + + ctx = AgentRunContext( + tool_confirmations=[ + # tool_name match says declined + {"confirmation_id": "wrong_id", "tool_name": "test_tool", "confirmed": False}, + # exact confirmation_id match says approved — should win + {"confirmation_id": exact_id, "confirmed": True}, + ] + ) + result, _ = await manager.execute_pre_tool(tool_call, ctx) + + assert result.decision == "pass" + + @pytest.mark.asyncio + async def test_tool_name_fallback_does_not_match_different_tool( + self, tool_call: ToolCall, ask_hook: PreToolCallback + ): + """tool_name fallback should not match confirmations for a different tool.""" + manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)]) + + ctx = AgentRunContext( + tool_confirmations=[{"confirmation_id": "some_id", "tool_name": "other_tool", "confirmed": True}] + ) + result, confirmation = await manager.execute_pre_tool(tool_call, ctx) + + assert result.decision == "ask" + assert confirmation is not None + assert confirmation.tool_name == "test_tool" + @pytest.mark.asyncio async def test_chaining( self, tool_call: ToolCall, context: AgentRunContext, pre_tool_add_field: Callable[..., PreToolCallback] From 5d2fa2f2795767e5ec6f5f20c85e7c716da54278 Mon Sep 17 00:00:00 2001 From: mkoruszowic Date: Sun, 19 Apr 2026 14:06:21 +0200 Subject: [PATCH 2/8] feat(agents): add Agent.execute_tool_directly for chat-layer confirmation resume --- .../src/ragbits/agents/_main.py | 50 +++++++++++ .../src/ragbits/agents/hooks/manager.py | 20 +++-- .../ragbits-agents/tests/unit/test_agent.py | 88 +++++++++++++++++++ 3 files changed, 153 insertions(+), 5 deletions(-) diff --git a/packages/ragbits-agents/src/ragbits/agents/_main.py b/packages/ragbits-agents/src/ragbits/agents/_main.py index 59a473083d..38136ed4a4 100644 --- a/packages/ragbits-agents/src/ragbits/agents/_main.py +++ b/packages/ragbits-agents/src/ragbits/agents/_main.py @@ -1,4 +1,5 @@ import asyncio +import json import types import uuid import warnings @@ -1061,6 +1062,55 @@ async def _process_tool_output( else: yield ToolReturn(value=tool_output, metadata=None) + async def execute_tool_directly( + self, + tool_call_id: str, + tool_name: str, + arguments: dict[str, Any], + context: AgentRunContext, + ) -> ToolCallResult: + """ + Execute a tool with caller-supplied arguments, returning its final result. + + Intended for chat layers resuming after a user confirmation: rather than asking + the LLM to regenerate the tool call (which risks argument drift and a broken + confirmation_id match), the chat layer stores the pre-confirmation arguments + and replays them directly through this method. + + PRE_TOOL hooks still run. If a hook requests confirmation, the caller is + responsible for having populated ``context.tool_confirmations`` with the + matching approval so the existing hash-match path resolves to ``pass``. + A ``deny`` decision from any PRE_TOOL hook is respected and short-circuits + execution. POST_TOOL hooks run on success. + + Args: + tool_call_id: Identifier to attach to the resulting ``ToolCallResult``. + tool_name: Name of the tool to invoke. + arguments: Arguments to pass to the tool (should be the original + pre-confirmation arguments to keep the confirmation_id stable). + context: Agent run context, including any prior ``tool_confirmations``. + + Returns: + The ``ToolCallResult`` yielded by the tool execution path. + + Raises: + AgentToolNotAvailableError: If the tool is not registered on this agent. + """ + tools_mapping = await self._get_all_tools() + tool_call = ToolCall( + id=tool_call_id, + type="function", + name=tool_name, + arguments=json.dumps(arguments), + ) + result: ToolCallResult | None = None + async for item in self._execute_tool(tool_call=tool_call, tools_mapping=tools_mapping, context=context): + if isinstance(item, ToolCallResult): + result = item + if result is None: + raise RuntimeError(f"Tool {tool_name!r} produced no ToolCallResult") + return result + async def _execute_tool( self, tool_call: ToolCall, diff --git a/packages/ragbits-agents/src/ragbits/agents/hooks/manager.py b/packages/ragbits-agents/src/ragbits/agents/hooks/manager.py index 9accc90f1f..5334b71a73 100644 --- a/packages/ragbits-agents/src/ragbits/agents/hooks/manager.py +++ b/packages/ragbits-agents/src/ragbits/agents/hooks/manager.py @@ -111,6 +111,17 @@ def get_hooks(self, event_type: EventType, tool_name: str | None = None) -> list return [hook for hook in hooks if hook.matches_tool(tool_name)] + @staticmethod + def _compute_confirmation_id(hook_name: str, tool_name: str, arguments: dict[str, Any]) -> str: + """ + Compute the confirmation_id for a given (hook, tool, arguments) triple. + + Exposed so the chat layer (or tests) can reproduce the same id when resuming + a paused confirmation flow. + """ + payload = f"{hook_name}:{tool_name}:{json.dumps(arguments, sort_keys=True)}" + return hashlib.sha256(payload.encode()).hexdigest()[:CONFIRMATION_ID_LENGTH] + @staticmethod def _find_confirmation( tool_confirmations: list[dict[str, Any]], @@ -163,12 +174,11 @@ async def execute_pre_tool( current_tool_call = tool_call.model_copy() for hook in self.get_hooks(EventType.PRE_TOOL, tool_call.name): - # Generate confirmation_id: hash(hook_function_name + tool_name + arguments) - hook_name = hook.callback.__name__ - confirmation_id_str = ( - f"{hook_name}:{tool_call.name}:{json.dumps(current_tool_call.arguments, sort_keys=True)}" + confirmation_id = self._compute_confirmation_id( + hook_name=hook.callback.__name__, + tool_name=tool_call.name, + arguments=current_tool_call.arguments, ) - confirmation_id = hashlib.sha256(confirmation_id_str.encode()).hexdigest()[:CONFIRMATION_ID_LENGTH] result: ToolCall = await hook.callback(current_tool_call) diff --git a/packages/ragbits-agents/tests/unit/test_agent.py b/packages/ragbits-agents/tests/unit/test_agent.py index ad01bad27f..8648ecbb20 100644 --- a/packages/ragbits-agents/tests/unit/test_agent.py +++ b/packages/ragbits-agents/tests/unit/test_agent.py @@ -913,6 +913,94 @@ async def test_pre_tool_hook_ask_with_confirmation_approved(llm_with_tool_call: assert "72" in result.tool_calls[0].result +async def test_execute_tool_directly_runs_tool_with_given_arguments(llm_without_tool_call: MockLLM): + """execute_tool_directly runs the named tool with explicit args, bypassing LLM tool selection.""" + agent = Agent(llm=llm_without_tool_call, prompt=CustomPrompt, tools=[get_weather]) + + context = AgentRunContext() + result = await agent.execute_tool_directly( + tool_call_id="call_1", + tool_name="get_weather", + arguments={"location": "San Francisco"}, + context=context, + ) + + assert result.id == "call_1" + assert result.name == "get_weather" + assert "72" in result.result + + +async def test_execute_tool_directly_respects_prior_confirmation( + llm_without_tool_call: MockLLM, ask_hook: PreToolCallback +): + """A PRE_TOOL ask hook still runs, but a matching confirmation in context makes it pass.""" + from ragbits.agents.hooks.manager import HookManager + + hook = Hook(event_type=EventType.PRE_TOOL, callback=ask_hook) + agent = Agent(llm=llm_without_tool_call, prompt=CustomPrompt, tools=[get_weather], hooks=[hook]) + + arguments = {"location": "San Francisco"} + confirmation_id = HookManager._compute_confirmation_id( + hook_name="ask_hook", tool_name="get_weather", arguments=arguments + ) + context: AgentRunContext = AgentRunContext( + tool_confirmations=[{"confirmation_id": confirmation_id, "confirmed": True}] + ) + + result = await agent.execute_tool_directly( + tool_call_id="call_1", tool_name="get_weather", arguments=arguments, context=context + ) + + assert "72" in result.result + + +async def test_execute_tool_directly_deny_hook_blocks_execution( + llm_without_tool_call: MockLLM, deny_hook: PreToolCallback +): + """A non-confirmation PRE_TOOL hook returning deny still blocks execution.""" + hook = Hook(event_type=EventType.PRE_TOOL, callback=deny_hook) + agent = Agent(llm=llm_without_tool_call, prompt=CustomPrompt, tools=[get_weather], hooks=[hook]) + + result = await agent.execute_tool_directly( + tool_call_id="call_1", + tool_name="get_weather", + arguments={"location": "San Francisco"}, + context=AgentRunContext(), + ) + + assert result.result == "Blocked by hook" + + +async def test_execute_tool_directly_unknown_tool_raises(llm_without_tool_call: MockLLM): + """Unknown tool name raises AgentToolNotAvailableError.""" + agent = Agent(llm=llm_without_tool_call, prompt=CustomPrompt, tools=[get_weather]) + + with pytest.raises(AgentToolNotAvailableError): + await agent.execute_tool_directly( + tool_call_id="call_1", + tool_name="not_a_tool", + arguments={}, + context=AgentRunContext(), + ) + + +async def test_execute_tool_directly_runs_post_tool_hooks( + llm_without_tool_call: MockLLM, post_tool_append: Callable[..., PostToolCallback] +): + """POST_TOOL hooks must run on the direct-execution result.""" + hook = Hook(event_type=EventType.POST_TOOL, callback=post_tool_append("[PT]", prepend=True)) + agent = Agent(llm=llm_without_tool_call, prompt=CustomPrompt, tools=[get_weather], hooks=[hook]) + + result = await agent.execute_tool_directly( + tool_call_id="call_1", + tool_name="get_weather", + arguments={"location": "San Francisco"}, + context=AgentRunContext(), + ) + + assert result.result.startswith("[PT]") + + async def test_hook_priority_order(llm_with_tool_call: MockLLM): """Test that hooks execute in priority order (lower first).""" execution_order: list[int] = [] From 1849030d00cc5d940f1cf6e90a04751ff7064c17 Mon Sep 17 00:00:00 2001 From: mkoruszowic Date: Sun, 19 Apr 2026 14:08:15 +0200 Subject: [PATCH 3/8] feat(agents): add inject_tool_call helper for synthetic tool-use history injection --- .../src/ragbits/agents/history.py | 51 +++++++++++++++++ .../ragbits-agents/tests/unit/test_history.py | 55 +++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 packages/ragbits-agents/src/ragbits/agents/history.py create mode 100644 packages/ragbits-agents/tests/unit/test_history.py diff --git a/packages/ragbits-agents/src/ragbits/agents/history.py b/packages/ragbits-agents/src/ragbits/agents/history.py new file mode 100644 index 0000000000..da2cce24ce --- /dev/null +++ b/packages/ragbits-agents/src/ragbits/agents/history.py @@ -0,0 +1,51 @@ +"""Helpers for manipulating agent conversation history (ChatFormat).""" + +import json +from typing import Any + +from ragbits.core.prompt.base import ChatFormat + + +def inject_tool_call( + history: ChatFormat, + tool_call_id: str, + tool_name: str, + arguments: dict[str, Any], + result: Any, # noqa: ANN401 +) -> ChatFormat: + """ + Append a synthetic (tool_use, tool_result) pair to a conversation history. + + Used by the chat layer when it has executed a tool on the user's behalf + (e.g., after a confirmation was approved) and needs the LLM to see the + outcome without re-deciding the call itself. + + The returned list is a shallow copy with two messages appended in OpenAI's + tool-use format — an ``assistant`` turn carrying the ``tool_calls`` block + and a ``tool`` turn carrying the result keyed by ``tool_call_id``. + + Args: + history: Current conversation history. Not mutated. + tool_call_id: Identifier to thread the tool_use and tool messages. + tool_name: Name of the tool that was invoked. + arguments: Arguments the tool was invoked with. + result: Tool output. Coerced to ``str``. + + Returns: + A new ChatFormat with the two messages appended. + """ + return [ + *history, + { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": tool_call_id, + "type": "function", + "function": {"name": tool_name, "arguments": json.dumps(arguments)}, + } + ], + }, + {"role": "tool", "tool_call_id": tool_call_id, "content": str(result)}, + ] diff --git a/packages/ragbits-agents/tests/unit/test_history.py b/packages/ragbits-agents/tests/unit/test_history.py new file mode 100644 index 0000000000..6a1c853fd2 --- /dev/null +++ b/packages/ragbits-agents/tests/unit/test_history.py @@ -0,0 +1,55 @@ +"""Tests for history manipulation helpers.""" + +import json + +from ragbits.agents.history import inject_tool_call + + +def test_inject_tool_call_appends_assistant_and_tool_messages(): + """Injection produces an assistant tool_use message followed by a tool result message.""" + history = [{"role": "user", "content": "delete foo.txt"}] + + result = inject_tool_call( + history, + tool_call_id="call_42", + tool_name="delete_file", + arguments={"path": "foo.txt"}, + result="Deleted foo.txt", + ) + + assert len(result) == 3 + assert result[0] == {"role": "user", "content": "delete foo.txt"} + + assistant_msg = result[1] + assert assistant_msg["role"] == "assistant" + assert assistant_msg["content"] is None + assert assistant_msg["tool_calls"] == [ + { + "id": "call_42", + "type": "function", + "function": {"name": "delete_file", "arguments": json.dumps({"path": "foo.txt"})}, + } + ] + + tool_msg = result[2] + assert tool_msg == {"role": "tool", "tool_call_id": "call_42", "content": "Deleted foo.txt"} + + +def test_inject_tool_call_does_not_mutate_input(): + """Helper returns a new list; caller's history is untouched.""" + history = [{"role": "user", "content": "hi"}] + + inject_tool_call( + history, tool_call_id="c", tool_name="t", arguments={}, result="ok" + ) + + assert history == [{"role": "user", "content": "hi"}] + + +def test_inject_tool_call_stringifies_non_string_result(): + """Non-string results are coerced to str (matching add_tool_use_message).""" + result = inject_tool_call( + [], tool_call_id="c", tool_name="t", arguments={}, result={"key": "value"} + ) + + assert result[1]["content"] == str({"key": "value"}) From 2a41f7728d296491d124e34c829826e378877bbc Mon Sep 17 00:00:00 2001 From: mkoruszowic Date: Sun, 19 Apr 2026 14:22:29 +0200 Subject: [PATCH 4/8] feat(chat): add ChatInterface helpers to resolve pending confirmations via direct tool execution --- .../src/ragbits/agents/confirmation.py | 3 + .../src/ragbits/agents/hooks/manager.py | 1 + .../src/ragbits/chat/interface/_interface.py | 105 +++++++++++++ .../unit/test_confirmation_resolution.py | 145 ++++++++++++++++++ 4 files changed, 254 insertions(+) create mode 100644 packages/ragbits-chat/tests/unit/test_confirmation_resolution.py diff --git a/packages/ragbits-agents/src/ragbits/agents/confirmation.py b/packages/ragbits-agents/src/ragbits/agents/confirmation.py index 6cd0483b4c..15e1a4f36b 100644 --- a/packages/ragbits-agents/src/ragbits/agents/confirmation.py +++ b/packages/ragbits-agents/src/ragbits/agents/confirmation.py @@ -14,6 +14,9 @@ class ConfirmationRequest(BaseModel): confirmation_id: str """Unique identifier for this confirmation request.""" + tool_call_id: str + """Identifier of the originating ToolCall — threads the tool_use and tool_result messages + when the chat layer resumes execution via Agent.execute_tool_directly.""" tool_name: str """Name of the tool requiring confirmation.""" tool_description: str diff --git a/packages/ragbits-agents/src/ragbits/agents/hooks/manager.py b/packages/ragbits-agents/src/ragbits/agents/hooks/manager.py index 5334b71a73..00c5746688 100644 --- a/packages/ragbits-agents/src/ragbits/agents/hooks/manager.py +++ b/packages/ragbits-agents/src/ragbits/agents/hooks/manager.py @@ -207,6 +207,7 @@ async def execute_pre_tool( else: confirmation_request = ConfirmationRequest( confirmation_id=confirmation_id, + tool_call_id=tool_call.id, tool_name=tool_call.name, tool_description=result.reason, # type: ignore[arg-type] # guaranteed non-None by ValueError check above arguments=current_tool_call.arguments, diff --git a/packages/ragbits-chat/src/ragbits/chat/interface/_interface.py b/packages/ragbits-chat/src/ragbits/chat/interface/_interface.py index 40c3a83c71..5dcfc28f01 100644 --- a/packages/ragbits-chat/src/ragbits/chat/interface/_interface.py +++ b/packages/ragbits-chat/src/ragbits/chat/interface/_interface.py @@ -11,6 +11,9 @@ from fastapi import UploadFile +from ragbits.agents._main import Agent, AgentRunContext +from ragbits.agents.confirmation import ConfirmationRequest +from ragbits.agents.history import inject_tool_call from ragbits.agents.tools.planning import Task from ragbits.chat.interface.summary import HeuristicSummaryGenerator, SummaryGenerator from ragbits.chat.interface.ui_customization import UICustomization @@ -289,6 +292,108 @@ def create_usage_response(usage: Usage) -> UsageResponse: def create_plan_item_response(task: Task) -> PlanItemResponse: return PlanItemResponse(content=PlanItemContent(task=task)) + @staticmethod + def create_pending_confirmation_state(request: ConfirmationRequest) -> dict[str, Any]: + """ + Build the state-dict fragment that records a pending confirmation. + + Merge this into ``ChatContext.state`` (via ``create_state_update``) whenever a + ``ConfirmationRequest`` is streamed to the frontend. On the continuation turn, + ``resolve_pending_confirmations`` uses this fragment to replay the approved + tool with its original arguments — avoiding LLM regeneration and the fragile + confirmation_id hash match. + """ + return { + "pending_confirmations": { + request.confirmation_id: { + "tool_call_id": request.tool_call_id, + "tool_name": request.tool_name, + "arguments": request.arguments, + } + } + } + + async def resolve_pending_confirmations( + self, + agent: Agent, + context: ChatContext, + history: ChatFormat, + ) -> tuple[ChatFormat, list[ChatResponseUnion]]: + """ + Execute tools whose confirmations the user just returned, producing updated history. + + For each entry in ``context.tool_confirmations`` matched against + ``context.state["pending_confirmations"]``: + + - **confirmed**: invoke the tool via ``agent.execute_tool_directly`` with the + stored arguments, then append the (tool_use, tool_result) pair to history. + - **declined**: append a synthetic tool_result saying the user declined. + + The agent's next ``run_streaming`` call sees these pairs already in history + and continues naturally — the LLM is never asked to regenerate the confirmed + tool call, so there is no argument drift or hash-mismatch loop. + + Args: + agent: The agent whose tools should be executed for confirmed entries. + context: The chat context from this turn. + history: The conversation history passed to this chat turn. + + Returns: + Tuple of ``(new_history, responses_to_yield)``. Caller should yield the + responses (UI live updates) before continuing the agent run with + ``new_history``. + """ + pending_map: dict[str, dict[str, Any]] = context.state.get("pending_confirmations", {}) or {} + confirmations = context.tool_confirmations or [] + if not pending_map or not confirmations: + return history, [] + + agent_context: AgentRunContext = AgentRunContext(tool_confirmations=list(confirmations)) + responses: list[ChatResponseUnion] = [] + new_history = history + + for entry in confirmations: + confirmation_id = entry.get("confirmation_id") + pending = pending_map.get(confirmation_id) if confirmation_id else None + if pending is None: + continue + + tool_call_id = pending["tool_call_id"] + tool_name = pending["tool_name"] + arguments = pending["arguments"] + + if entry.get("confirmed"): + result = await agent.execute_tool_directly( + tool_call_id=tool_call_id, + tool_name=tool_name, + arguments=arguments, + context=agent_context, + ) + responses.append( + self.create_live_update( + tool_call_id, LiveUpdateType.FINISH, f"✅ {tool_name}", str(result.result)[:100] + ) + ) + new_history = inject_tool_call( + new_history, + tool_call_id=tool_call_id, + tool_name=tool_name, + arguments=arguments, + result=result.result, + ) + else: + decline_msg = "❌ User declined this action" + responses.append(self.create_live_update(tool_call_id, LiveUpdateType.FINISH, f"❌ {tool_name}")) + new_history = inject_tool_call( + new_history, + tool_call_id=tool_call_id, + tool_name=tool_name, + arguments=arguments, + result=decline_msg, + ) + + return new_history, responses + @staticmethod def _sign_state(state: dict[str, Any]) -> str: """ diff --git a/packages/ragbits-chat/tests/unit/test_confirmation_resolution.py b/packages/ragbits-chat/tests/unit/test_confirmation_resolution.py new file mode 100644 index 0000000000..dc08a689e1 --- /dev/null +++ b/packages/ragbits-chat/tests/unit/test_confirmation_resolution.py @@ -0,0 +1,145 @@ +"""Tests for ChatInterface confirmation-resolution helpers.""" + +from collections.abc import AsyncGenerator +from typing import Any +from unittest.mock import AsyncMock + +import pytest + +from ragbits.agents._main import ToolCallResult +from ragbits.agents.confirmation import ConfirmationRequest +from ragbits.chat.interface import ChatInterface +from ragbits.chat.interface.types import ChatContext, ChatResponseUnion, TextContent, TextResponse + + +class _Dummy(ChatInterface): + async def chat( # type: ignore[override] + self, message: str, history: Any, context: ChatContext + ) -> AsyncGenerator[ChatResponseUnion, None]: + yield TextResponse(content=TextContent(text="ok")) + + +def test_create_pending_confirmation_state_shape(): + """Helper returns a state dict keyed by confirmation_id with tool_call_id, name, args.""" + request = ConfirmationRequest( + confirmation_id="conf_1", + tool_call_id="call_1", + tool_name="send_slack", + tool_description="Send a slack message", + arguments={"channel": "#general", "text": "hi"}, + ) + + state = ChatInterface.create_pending_confirmation_state(request) + + assert state == { + "pending_confirmations": { + "conf_1": { + "tool_call_id": "call_1", + "tool_name": "send_slack", + "arguments": {"channel": "#general", "text": "hi"}, + } + } + } + + +async def test_resolve_pending_confirmations_confirmed_executes_and_appends_history(): + """A confirmed pending causes execute_tool_directly to run and history to grow.""" + iface = _Dummy() + + agent = AsyncMock() + agent.execute_tool_directly.return_value = ToolCallResult( + id="call_1", name="send_slack", arguments={"channel": "#general", "text": "hi"}, result="sent" + ) + + context = ChatContext( + state={ + "pending_confirmations": { + "conf_1": { + "tool_call_id": "call_1", + "tool_name": "send_slack", + "arguments": {"channel": "#general", "text": "hi"}, + } + } + }, + tool_confirmations=[{"confirmation_id": "conf_1", "confirmed": True}], + ) + history = [{"role": "user", "content": "send hi to #general"}] + + new_history, responses = await iface.resolve_pending_confirmations(agent, context, history) + + agent.execute_tool_directly.assert_awaited_once() + call_kwargs = agent.execute_tool_directly.call_args.kwargs + assert call_kwargs["tool_call_id"] == "call_1" + assert call_kwargs["tool_name"] == "send_slack" + assert call_kwargs["arguments"] == {"channel": "#general", "text": "hi"} + + assert len(new_history) == 3 + assert new_history[1]["role"] == "assistant" + assert new_history[1]["tool_calls"][0]["id"] == "call_1" + assert new_history[2] == {"role": "tool", "tool_call_id": "call_1", "content": "sent"} + assert responses # at least one UI response emitted + + +async def test_resolve_pending_confirmations_declined_skips_execution(): + """A declined pending injects a decline result; no tool execution happens.""" + iface = _Dummy() + + agent = AsyncMock() + + context = ChatContext( + state={ + "pending_confirmations": { + "conf_1": { + "tool_call_id": "call_1", + "tool_name": "delete_file", + "arguments": {"path": "x"}, + } + } + }, + tool_confirmations=[{"confirmation_id": "conf_1", "confirmed": False}], + ) + + new_history, _ = await iface.resolve_pending_confirmations(agent, context, []) + + agent.execute_tool_directly.assert_not_called() + assert new_history[-1]["role"] == "tool" + assert "declined" in new_history[-1]["content"].lower() + + +async def test_resolve_pending_confirmations_no_pending_returns_input_unchanged(): + """No pending_confirmations in state → history untouched, no responses, no execution.""" + iface = _Dummy() + + agent = AsyncMock() + + context = ChatContext(state={}, tool_confirmations=[]) + history = [{"role": "user", "content": "hi"}] + + new_history, responses = await iface.resolve_pending_confirmations(agent, context, history) + + agent.execute_tool_directly.assert_not_called() + assert new_history == history + assert responses == [] + + +async def test_resolve_pending_confirmations_unknown_confirmation_id_is_ignored(): + """A tool_confirmations entry with no matching pending entry is a no-op (legacy flow still works).""" + iface = _Dummy() + + agent = AsyncMock() + + context = ChatContext( + state={"pending_confirmations": {}}, + tool_confirmations=[{"confirmation_id": "unknown", "confirmed": True}], + ) + + new_history, responses = await iface.resolve_pending_confirmations(agent, context, []) + + agent.execute_tool_directly.assert_not_called() + assert new_history == [] + assert responses == [] + + +@pytest.fixture(autouse=True) +def _secret_key(monkeypatch): + monkeypatch.setenv("RAGBITS_SECRET_KEY", "test-secret") From 41cceed6ce79294772efe2ea59c63ddf8e8cefee Mon Sep 17 00:00:00 2001 From: mkoruszowic Date: Sun, 19 Apr 2026 14:27:27 +0200 Subject: [PATCH 5/8] docs(examples): update file_explorer_agent to resolve confirmations via direct execution --- examples/chat/file_explorer_agent.py | 45 ++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/examples/chat/file_explorer_agent.py b/examples/chat/file_explorer_agent.py index 786d245053..47d339eaa8 100644 --- a/examples/chat/file_explorer_agent.py +++ b/examples/chat/file_explorer_agent.py @@ -564,15 +564,12 @@ async def chat( # noqa: PLR0912 """ Chat implementation with non-blocking confirmation support. - The agent will check context.tool_confirmations for any confirmations. - If a hook needs confirmation but hasn't been confirmed yet, it will - yield a ConfirmationRequest and exit. The frontend will then send a - new request with the confirmation in context.tool_confirmations. + Confirmed tools from the previous turn are executed directly by the chat layer + (via ``resolve_pending_confirmations``), then their results are injected into + history before the agent continues. The LLM never regenerates the tool call, + so the confirmation_id hash stays stable and there is no approval loop. """ - # Create agent with history passed explicitly - agent: Agent = Agent( - llm=self.llm, - prompt=f""" + agent_prompt = f""" You are a file explorer agent. You have tools available. CRITICAL: When a user asks you to perform an action, you MUST IMMEDIATELY CALL THE APPROPRIATE TOOL. @@ -586,16 +583,35 @@ async def chat( # noqa: PLR0912 Available tools: {', '.join([t.__name__ for t in self.tools])} Restricted to: {TEMP_DIR} - """, + """ + + # Resolve any pending confirmations the user just responded to: execute the + # approved tools directly, append (tool_use, tool_result) pairs to history. + resolver_agent: Agent = Agent( + llm=self.llm, + prompt=agent_prompt, tools=self.tools, # type: ignore[arg-type] hooks=self.hooks, history=history, ) + history, resolution_responses = await self.resolve_pending_confirmations(resolver_agent, context, history) + for response in resolution_responses: + yield response - # Create agent context with tool_confirmations from the request context - agent_context: AgentRunContext = AgentRunContext() + # Build the run-time agent with the (possibly updated) history so the LLM + # continues from the injected tool results rather than re-deciding calls. + agent: Agent = Agent( + llm=self.llm, + prompt=agent_prompt, + tools=self.tools, # type: ignore[arg-type] + hooks=self.hooks, + history=history, + ) - # Pass tool_confirmations from ChatContext to AgentRunContext + # Forward any tool_confirmations to the agent context — supports legacy + # hash-matched confirmations for tools that don't flow through the direct + # execution path. + agent_context: AgentRunContext = AgentRunContext() if context.tool_confirmations: agent_context.tool_confirmations = context.tool_confirmations @@ -616,7 +632,10 @@ async def chat( # noqa: PLR0912 yield self.create_live_update(response.id, LiveUpdateType.START, f"🔧 {response.name}") case ConfirmationRequest(): - # Confirmation needed - send to frontend and wait for user response + # Persist the pending confirmation so the next turn can resolve it + # directly (via resolve_pending_confirmations) instead of asking + # the LLM to regenerate the tool call. + yield self.create_state_update(self.create_pending_confirmation_state(response)) yield ConfirmationRequestResponse(content=ConfirmationRequestContent(confirmation_request=response)) case ToolCallResult(): From c6526c2f7268e8e69a63d02280b0b7ece90bbcf4 Mon Sep 17 00:00:00 2001 From: mkoruszowic Date: Sun, 19 Apr 2026 17:45:11 +0200 Subject: [PATCH 6/8] style: apply ruff formatting and lint fixes to new test files --- packages/ragbits-agents/src/ragbits/agents/_main.py | 4 +++- packages/ragbits-agents/tests/unit/hooks/test_manager.py | 8 ++++---- packages/ragbits-agents/tests/unit/test_agent.py | 2 +- packages/ragbits-agents/tests/unit/test_history.py | 8 ++------ .../tests/unit/test_confirmation_resolution.py | 8 ++++---- 5 files changed, 14 insertions(+), 16 deletions(-) diff --git a/packages/ragbits-agents/src/ragbits/agents/_main.py b/packages/ragbits-agents/src/ragbits/agents/_main.py index 38136ed4a4..02987c0c4d 100644 --- a/packages/ragbits-agents/src/ragbits/agents/_main.py +++ b/packages/ragbits-agents/src/ragbits/agents/_main.py @@ -1097,11 +1097,13 @@ async def execute_tool_directly( AgentToolNotAvailableError: If the tool is not registered on this agent. """ tools_mapping = await self._get_all_tools() + # ToolCall declares arguments as dict but has a "before" validator that + # json.loads strings, so we pass the serialized form to satisfy the validator. tool_call = ToolCall( id=tool_call_id, type="function", name=tool_name, - arguments=json.dumps(arguments), + arguments=json.dumps(arguments), # type: ignore[arg-type] ) result: ToolCallResult | None = None async for item in self._execute_tool(tool_call=tool_call, tools_mapping=tools_mapping, context=context): diff --git a/packages/ragbits-agents/tests/unit/hooks/test_manager.py b/packages/ragbits-agents/tests/unit/hooks/test_manager.py index 193988ae44..e923e08001 100644 --- a/packages/ragbits-agents/tests/unit/hooks/test_manager.py +++ b/packages/ragbits-agents/tests/unit/hooks/test_manager.py @@ -146,7 +146,7 @@ async def test_ask_with_tool_name_fallback_approved(self, tool_call: ToolCall, a manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)]) # Simulate cross-turn: frontend sends back tool_name but with a stale confirmation_id - ctx = AgentRunContext( + ctx: AgentRunContext = AgentRunContext( tool_confirmations=[ {"confirmation_id": "stale_id_from_previous_turn", "tool_name": "test_tool", "confirmed": True} ] @@ -161,7 +161,7 @@ async def test_ask_with_tool_name_fallback_declined(self, tool_call: ToolCall, a """When confirmation_id doesn't match but tool_name does and user declined.""" manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)]) - ctx = AgentRunContext( + ctx: AgentRunContext = AgentRunContext( tool_confirmations=[ {"confirmation_id": "stale_id_from_previous_turn", "tool_name": "test_tool", "confirmed": False} ] @@ -179,7 +179,7 @@ async def test_exact_confirmation_id_takes_priority_over_tool_name( manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)]) exact_id = make_confirmation_id("ask_hook", "test_tool", {"arg1": "value1"}) - ctx = AgentRunContext( + ctx: AgentRunContext = AgentRunContext( tool_confirmations=[ # tool_name match says declined {"confirmation_id": "wrong_id", "tool_name": "test_tool", "confirmed": False}, @@ -198,7 +198,7 @@ async def test_tool_name_fallback_does_not_match_different_tool( """tool_name fallback should not match confirmations for a different tool.""" manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)]) - ctx = AgentRunContext( + ctx: AgentRunContext = AgentRunContext( tool_confirmations=[{"confirmation_id": "some_id", "tool_name": "other_tool", "confirmed": True}] ) result, confirmation = await manager.execute_pre_tool(tool_call, ctx) diff --git a/packages/ragbits-agents/tests/unit/test_agent.py b/packages/ragbits-agents/tests/unit/test_agent.py index 8648ecbb20..05728c2188 100644 --- a/packages/ragbits-agents/tests/unit/test_agent.py +++ b/packages/ragbits-agents/tests/unit/test_agent.py @@ -917,7 +917,7 @@ async def test_execute_tool_directly_runs_tool_with_given_arguments(llm_without_ """execute_tool_directly runs the named tool with explicit args, bypassing LLM tool selection.""" agent = Agent(llm=llm_without_tool_call, prompt=CustomPrompt, tools=[get_weather]) - context = AgentRunContext() + context: AgentRunContext = AgentRunContext() result = await agent.execute_tool_directly( tool_call_id="call_1", tool_name="get_weather", diff --git a/packages/ragbits-agents/tests/unit/test_history.py b/packages/ragbits-agents/tests/unit/test_history.py index 6a1c853fd2..dde49d49cd 100644 --- a/packages/ragbits-agents/tests/unit/test_history.py +++ b/packages/ragbits-agents/tests/unit/test_history.py @@ -39,17 +39,13 @@ def test_inject_tool_call_does_not_mutate_input(): """Helper returns a new list; caller's history is untouched.""" history = [{"role": "user", "content": "hi"}] - inject_tool_call( - history, tool_call_id="c", tool_name="t", arguments={}, result="ok" - ) + inject_tool_call(history, tool_call_id="c", tool_name="t", arguments={}, result="ok") assert history == [{"role": "user", "content": "hi"}] def test_inject_tool_call_stringifies_non_string_result(): """Non-string results are coerced to str (matching add_tool_use_message).""" - result = inject_tool_call( - [], tool_call_id="c", tool_name="t", arguments={}, result={"key": "value"} - ) + result = inject_tool_call([], tool_call_id="c", tool_name="t", arguments={}, result={"key": "value"}) assert result[1]["content"] == str({"key": "value"}) diff --git a/packages/ragbits-chat/tests/unit/test_confirmation_resolution.py b/packages/ragbits-chat/tests/unit/test_confirmation_resolution.py index dc08a689e1..dbfa7bed23 100644 --- a/packages/ragbits-chat/tests/unit/test_confirmation_resolution.py +++ b/packages/ragbits-chat/tests/unit/test_confirmation_resolution.py @@ -1,7 +1,6 @@ """Tests for ChatInterface confirmation-resolution helpers.""" from collections.abc import AsyncGenerator -from typing import Any from unittest.mock import AsyncMock import pytest @@ -10,11 +9,12 @@ from ragbits.agents.confirmation import ConfirmationRequest from ragbits.chat.interface import ChatInterface from ragbits.chat.interface.types import ChatContext, ChatResponseUnion, TextContent, TextResponse +from ragbits.core.prompt.base import ChatFormat class _Dummy(ChatInterface): - async def chat( # type: ignore[override] - self, message: str, history: Any, context: ChatContext + async def chat( # type: ignore[override] # noqa: PLR6301 + self, message: str, history: ChatFormat, context: ChatContext ) -> AsyncGenerator[ChatResponseUnion, None]: yield TextResponse(content=TextContent(text="ok")) @@ -141,5 +141,5 @@ async def test_resolve_pending_confirmations_unknown_confirmation_id_is_ignored( @pytest.fixture(autouse=True) -def _secret_key(monkeypatch): +def _secret_key(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("RAGBITS_SECRET_KEY", "test-secret") From 33ff7c22f3785e268e5d0111e0c5987f0fefcbee Mon Sep 17 00:00:00 2001 From: mkoruszowic Date: Sun, 19 Apr 2026 19:04:48 +0200 Subject: [PATCH 7/8] refactor(chat): mutate agent.history in place in resolve_pending_confirmations --- examples/chat/file_explorer_agent.py | 34 ++++++------------- .../src/ragbits/chat/interface/_interface.py | 34 +++++++++---------- .../unit/test_confirmation_resolution.py | 34 ++++++++++--------- 3 files changed, 45 insertions(+), 57 deletions(-) diff --git a/examples/chat/file_explorer_agent.py b/examples/chat/file_explorer_agent.py index 47d339eaa8..b8c591df6b 100644 --- a/examples/chat/file_explorer_agent.py +++ b/examples/chat/file_explorer_agent.py @@ -569,7 +569,9 @@ async def chat( # noqa: PLR0912 history before the agent continues. The LLM never regenerates the tool call, so the confirmation_id hash stays stable and there is no approval loop. """ - agent_prompt = f""" + agent: Agent = Agent( + llm=self.llm, + prompt=f""" You are a file explorer agent. You have tools available. CRITICAL: When a user asks you to perform an action, you MUST IMMEDIATELY CALL THE APPROPRIATE TOOL. @@ -583,34 +585,20 @@ async def chat( # noqa: PLR0912 Available tools: {', '.join([t.__name__ for t in self.tools])} Restricted to: {TEMP_DIR} - """ - - # Resolve any pending confirmations the user just responded to: execute the - # approved tools directly, append (tool_use, tool_result) pairs to history. - resolver_agent: Agent = Agent( - llm=self.llm, - prompt=agent_prompt, + """, tools=self.tools, # type: ignore[arg-type] hooks=self.hooks, history=history, ) - history, resolution_responses = await self.resolve_pending_confirmations(resolver_agent, context, history) - for response in resolution_responses: - yield response - # Build the run-time agent with the (possibly updated) history so the LLM - # continues from the injected tool results rather than re-deciding calls. - agent: Agent = Agent( - llm=self.llm, - prompt=agent_prompt, - tools=self.tools, # type: ignore[arg-type] - hooks=self.hooks, - history=history, - ) + # Execute tools the user just approved/declined: mutates agent.history in place + # with synthetic (tool_use, tool_result) pairs so the LLM continues from the + # results instead of re-deciding the confirmed call. + for response in await self.resolve_pending_confirmations(agent, context): + yield response - # Forward any tool_confirmations to the agent context — supports legacy - # hash-matched confirmations for tools that don't flow through the direct - # execution path. + # Forward tool_confirmations to the agent context — supports any legacy + # hash-matched confirmations for tools not routed through direct execution. agent_context: AgentRunContext = AgentRunContext() if context.tool_confirmations: agent_context.tool_confirmations = context.tool_confirmations diff --git a/packages/ragbits-chat/src/ragbits/chat/interface/_interface.py b/packages/ragbits-chat/src/ragbits/chat/interface/_interface.py index 5dcfc28f01..680a779476 100644 --- a/packages/ragbits-chat/src/ragbits/chat/interface/_interface.py +++ b/packages/ragbits-chat/src/ragbits/chat/interface/_interface.py @@ -317,10 +317,9 @@ async def resolve_pending_confirmations( self, agent: Agent, context: ChatContext, - history: ChatFormat, - ) -> tuple[ChatFormat, list[ChatResponseUnion]]: + ) -> list[ChatResponseUnion]: """ - Execute tools whose confirmations the user just returned, producing updated history. + Execute tools whose confirmations the user just returned, updating ``agent.history`` in place. For each entry in ``context.tool_confirmations`` matched against ``context.state["pending_confirmations"]``: @@ -329,28 +328,27 @@ async def resolve_pending_confirmations( stored arguments, then append the (tool_use, tool_result) pair to history. - **declined**: append a synthetic tool_result saying the user declined. - The agent's next ``run_streaming`` call sees these pairs already in history - and continues naturally — the LLM is never asked to regenerate the confirmed - tool call, so there is no argument drift or hash-mismatch loop. + When the caller next runs ``agent.run_streaming(...)``, the LLM sees the + injected tool results already in history and continues naturally — it is + never asked to regenerate the confirmed tool call, so there is no argument + drift or hash-mismatch loop. Args: - agent: The agent whose tools should be executed for confirmed entries. + agent: The agent whose tools should be executed. ``agent.history`` is + mutated in place to include the injected (tool_use, tool_result) pairs. context: The chat context from this turn. - history: The conversation history passed to this chat turn. Returns: - Tuple of ``(new_history, responses_to_yield)``. Caller should yield the - responses (UI live updates) before continuing the agent run with - ``new_history``. + UI responses (e.g. LiveUpdates) the caller should yield before continuing + the agent run. Empty list if there are no pending confirmations to resolve. """ pending_map: dict[str, dict[str, Any]] = context.state.get("pending_confirmations", {}) or {} confirmations = context.tool_confirmations or [] if not pending_map or not confirmations: - return history, [] + return [] agent_context: AgentRunContext = AgentRunContext(tool_confirmations=list(confirmations)) responses: list[ChatResponseUnion] = [] - new_history = history for entry in confirmations: confirmation_id = entry.get("confirmation_id") @@ -374,8 +372,8 @@ async def resolve_pending_confirmations( tool_call_id, LiveUpdateType.FINISH, f"✅ {tool_name}", str(result.result)[:100] ) ) - new_history = inject_tool_call( - new_history, + agent.history = inject_tool_call( + agent.history, tool_call_id=tool_call_id, tool_name=tool_name, arguments=arguments, @@ -384,15 +382,15 @@ async def resolve_pending_confirmations( else: decline_msg = "❌ User declined this action" responses.append(self.create_live_update(tool_call_id, LiveUpdateType.FINISH, f"❌ {tool_name}")) - new_history = inject_tool_call( - new_history, + agent.history = inject_tool_call( + agent.history, tool_call_id=tool_call_id, tool_name=tool_name, arguments=arguments, result=decline_msg, ) - return new_history, responses + return responses @staticmethod def _sign_state(state: dict[str, Any]) -> str: diff --git a/packages/ragbits-chat/tests/unit/test_confirmation_resolution.py b/packages/ragbits-chat/tests/unit/test_confirmation_resolution.py index dbfa7bed23..97eb4abf24 100644 --- a/packages/ragbits-chat/tests/unit/test_confirmation_resolution.py +++ b/packages/ragbits-chat/tests/unit/test_confirmation_resolution.py @@ -42,11 +42,12 @@ def test_create_pending_confirmation_state_shape(): } -async def test_resolve_pending_confirmations_confirmed_executes_and_appends_history(): - """A confirmed pending causes execute_tool_directly to run and history to grow.""" +async def test_resolve_pending_confirmations_confirmed_executes_and_mutates_history(): + """A confirmed pending causes execute_tool_directly to run and agent.history to grow.""" iface = _Dummy() agent = AsyncMock() + agent.history = [{"role": "user", "content": "send hi to #general"}] agent.execute_tool_directly.return_value = ToolCallResult( id="call_1", name="send_slack", arguments={"channel": "#general", "text": "hi"}, result="sent" ) @@ -63,9 +64,8 @@ async def test_resolve_pending_confirmations_confirmed_executes_and_appends_hist }, tool_confirmations=[{"confirmation_id": "conf_1", "confirmed": True}], ) - history = [{"role": "user", "content": "send hi to #general"}] - new_history, responses = await iface.resolve_pending_confirmations(agent, context, history) + responses = await iface.resolve_pending_confirmations(agent, context) agent.execute_tool_directly.assert_awaited_once() call_kwargs = agent.execute_tool_directly.call_args.kwargs @@ -73,10 +73,10 @@ async def test_resolve_pending_confirmations_confirmed_executes_and_appends_hist assert call_kwargs["tool_name"] == "send_slack" assert call_kwargs["arguments"] == {"channel": "#general", "text": "hi"} - assert len(new_history) == 3 - assert new_history[1]["role"] == "assistant" - assert new_history[1]["tool_calls"][0]["id"] == "call_1" - assert new_history[2] == {"role": "tool", "tool_call_id": "call_1", "content": "sent"} + assert len(agent.history) == 3 + assert agent.history[1]["role"] == "assistant" + assert agent.history[1]["tool_calls"][0]["id"] == "call_1" + assert agent.history[2] == {"role": "tool", "tool_call_id": "call_1", "content": "sent"} assert responses # at least one UI response emitted @@ -85,6 +85,7 @@ async def test_resolve_pending_confirmations_declined_skips_execution(): iface = _Dummy() agent = AsyncMock() + agent.history = [] context = ChatContext( state={ @@ -99,11 +100,11 @@ async def test_resolve_pending_confirmations_declined_skips_execution(): tool_confirmations=[{"confirmation_id": "conf_1", "confirmed": False}], ) - new_history, _ = await iface.resolve_pending_confirmations(agent, context, []) + await iface.resolve_pending_confirmations(agent, context) agent.execute_tool_directly.assert_not_called() - assert new_history[-1]["role"] == "tool" - assert "declined" in new_history[-1]["content"].lower() + assert agent.history[-1]["role"] == "tool" + assert "declined" in agent.history[-1]["content"].lower() async def test_resolve_pending_confirmations_no_pending_returns_input_unchanged(): @@ -111,14 +112,14 @@ async def test_resolve_pending_confirmations_no_pending_returns_input_unchanged( iface = _Dummy() agent = AsyncMock() + agent.history = [{"role": "user", "content": "hi"}] context = ChatContext(state={}, tool_confirmations=[]) - history = [{"role": "user", "content": "hi"}] - new_history, responses = await iface.resolve_pending_confirmations(agent, context, history) + responses = await iface.resolve_pending_confirmations(agent, context) agent.execute_tool_directly.assert_not_called() - assert new_history == history + assert agent.history == [{"role": "user", "content": "hi"}] assert responses == [] @@ -127,16 +128,17 @@ async def test_resolve_pending_confirmations_unknown_confirmation_id_is_ignored( iface = _Dummy() agent = AsyncMock() + agent.history = [] context = ChatContext( state={"pending_confirmations": {}}, tool_confirmations=[{"confirmation_id": "unknown", "confirmed": True}], ) - new_history, responses = await iface.resolve_pending_confirmations(agent, context, []) + responses = await iface.resolve_pending_confirmations(agent, context) agent.execute_tool_directly.assert_not_called() - assert new_history == [] + assert agent.history == [] assert responses == [] From 1b3ae55f92a37f9f0353b234b70ce37256b2fea6 Mon Sep 17 00:00:00 2001 From: mkoruszowic Date: Mon, 20 Apr 2026 11:14:50 +0200 Subject: [PATCH 8/8] docs: add changelog entries for #969 confirmation flow fix --- packages/ragbits-agents/CHANGELOG.md | 2 ++ packages/ragbits-chat/CHANGELOG.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/packages/ragbits-agents/CHANGELOG.md b/packages/ragbits-agents/CHANGELOG.md index 8d897dc4be..b75d3a8cc9 100644 --- a/packages/ragbits-agents/CHANGELOG.md +++ b/packages/ragbits-agents/CHANGELOG.md @@ -2,6 +2,8 @@ ## Unreleased +- Fix tool-confirmation loop across conversation turns caused by LLM argument drift (#969). Adds `Agent.execute_tool_directly` and an `inject_tool_call` helper so chat layers can resume confirmed tools without re-prompting the LLM. `HookManager` also falls back to matching by `tool_name` when the exact `confirmation_id` hash misses. `ConfirmationRequest` now carries `tool_call_id`. + ## 1.6.2 (2026-03-26) - ragbits-core updated to version v1.6.2 diff --git a/packages/ragbits-chat/CHANGELOG.md b/packages/ragbits-chat/CHANGELOG.md index 6ff581d382..357730f09d 100644 --- a/packages/ragbits-chat/CHANGELOG.md +++ b/packages/ragbits-chat/CHANGELOG.md @@ -2,6 +2,8 @@ ## Unreleased +- Add `ChatInterface.resolve_pending_confirmations` and `create_pending_confirmation_state` helpers so chat implementations can execute user-approved tools directly on the continuation turn, preventing the confirmation loop caused by LLM argument drift (#969). + ## 1.6.2 (2026-03-26) - ragbits-agents updated to version v1.6.2