Skip to content
Draft
25 changes: 16 additions & 9 deletions examples/chat/file_explorer_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,12 +564,11 @@ async def chat( # noqa: PLR0912
"""
Chat implementation with non-blocking confirmation support.

The agent will check context.tool_confirmations for any confirmations.
If a hook needs confirmation but hasn't been confirmed yet, it will
yield a ConfirmationRequest and exit. The frontend will then send a
new request with the confirmation in context.tool_confirmations.
Confirmed tools from the previous turn are executed directly by the chat layer
(via ``resolve_pending_confirmations``), then their results are injected into
history before the agent continues. The LLM never regenerates the tool call,
so the confirmation_id hash stays stable and there is no approval loop.
"""
# Create agent with history passed explicitly
agent: Agent = Agent(
llm=self.llm,
prompt=f"""
Expand All @@ -592,10 +591,15 @@ async def chat( # noqa: PLR0912
history=history,
)

# Create agent context with tool_confirmations from the request context
agent_context: AgentRunContext = AgentRunContext()
# Execute tools the user just approved/declined: mutates agent.history in place
# with synthetic (tool_use, tool_result) pairs so the LLM continues from the
# results instead of re-deciding the confirmed call.
for response in await self.resolve_pending_confirmations(agent, context):
yield response

# Pass tool_confirmations from ChatContext to AgentRunContext
# Forward tool_confirmations to the agent context — supports any legacy
# hash-matched confirmations for tools not routed through direct execution.
agent_context: AgentRunContext = AgentRunContext()
if context.tool_confirmations:
agent_context.tool_confirmations = context.tool_confirmations

Expand All @@ -616,7 +620,10 @@ async def chat( # noqa: PLR0912
yield self.create_live_update(response.id, LiveUpdateType.START, f"🔧 {response.name}")

case ConfirmationRequest():
# Confirmation needed - send to frontend and wait for user response
# Persist the pending confirmation so the next turn can resolve it
# directly (via resolve_pending_confirmations) instead of asking
# the LLM to regenerate the tool call.
yield self.create_state_update(self.create_pending_confirmation_state(response))
yield ConfirmationRequestResponse(content=ConfirmationRequestContent(confirmation_request=response))

case ToolCallResult():
Expand Down
2 changes: 2 additions & 0 deletions packages/ragbits-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## Unreleased

- Fix tool-confirmation loop across conversation turns caused by LLM argument drift (#969). Adds `Agent.execute_tool_directly` and an `inject_tool_call` helper so chat layers can resume confirmed tools without re-prompting the LLM. `HookManager` also falls back to matching by `tool_name` when the exact `confirmation_id` hash misses. `ConfirmationRequest` now carries `tool_call_id`.

## 1.6.2 (2026-03-26)

- ragbits-core updated to version v1.6.2
Expand Down
52 changes: 52 additions & 0 deletions packages/ragbits-agents/src/ragbits/agents/_main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import json
import types
import uuid
import warnings
Expand Down Expand Up @@ -1061,6 +1062,57 @@ async def _process_tool_output(
else:
yield ToolReturn(value=tool_output, metadata=None)

async def execute_tool_directly(
self,
tool_call_id: str,
tool_name: str,
arguments: dict[str, Any],
context: AgentRunContext,
) -> ToolCallResult:
"""
Execute a tool with caller-supplied arguments, returning its final result.

Intended for chat layers resuming after a user confirmation: rather than asking
the LLM to regenerate the tool call (which risks argument drift and a broken
confirmation_id match), the chat layer stores the pre-confirmation arguments
and replays them directly through this method.

PRE_TOOL hooks still run. If a hook requests confirmation, the caller is
responsible for having populated ``context.tool_confirmations`` with the
matching approval so the existing hash-match path resolves to ``pass``.
A ``deny`` decision from any PRE_TOOL hook is respected and short-circuits
execution. POST_TOOL hooks run on success.

Args:
tool_call_id: Identifier to attach to the resulting ``ToolCallResult``.
tool_name: Name of the tool to invoke.
arguments: Arguments to pass to the tool (should be the original
pre-confirmation arguments to keep the confirmation_id stable).
context: Agent run context, including any prior ``tool_confirmations``.

Returns:
The ``ToolCallResult`` yielded by the tool execution path.

Raises:
AgentToolNotAvailableError: If the tool is not registered on this agent.
"""
tools_mapping = await self._get_all_tools()
# ToolCall declares arguments as dict but has a "before" validator that
# json.loads strings, so we pass the serialized form to satisfy the validator.
tool_call = ToolCall(
id=tool_call_id,
type="function",
name=tool_name,
arguments=json.dumps(arguments), # type: ignore[arg-type]
)
result: ToolCallResult | None = None
async for item in self._execute_tool(tool_call=tool_call, tools_mapping=tools_mapping, context=context):
if isinstance(item, ToolCallResult):
result = item
if result is None:
raise RuntimeError(f"Tool {tool_name!r} produced no ToolCallResult")
return result

async def _execute_tool(
self,
tool_call: ToolCall,
Expand Down
3 changes: 3 additions & 0 deletions packages/ragbits-agents/src/ragbits/agents/confirmation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ class ConfirmationRequest(BaseModel):

confirmation_id: str
"""Unique identifier for this confirmation request."""
tool_call_id: str
"""Identifier of the originating ToolCall — threads the tool_use and tool_result messages
when the chat layer resumes execution via Agent.execute_tool_directly."""
tool_name: str
"""Name of the tool requiring confirmation."""
tool_description: str
Expand Down
51 changes: 51 additions & 0 deletions packages/ragbits-agents/src/ragbits/agents/history.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""Helpers for manipulating agent conversation history (ChatFormat)."""

import json
from typing import Any

from ragbits.core.prompt.base import ChatFormat


def inject_tool_call(
history: ChatFormat,
tool_call_id: str,
tool_name: str,
arguments: dict[str, Any],
result: Any, # noqa: ANN401
) -> ChatFormat:
"""
Append a synthetic (tool_use, tool_result) pair to a conversation history.

Used by the chat layer when it has executed a tool on the user's behalf
(e.g., after a confirmation was approved) and needs the LLM to see the
outcome without re-deciding the call itself.

The returned list is a shallow copy with two messages appended in OpenAI's
tool-use format — an ``assistant`` turn carrying the ``tool_calls`` block
and a ``tool`` turn carrying the result keyed by ``tool_call_id``.

Args:
history: Current conversation history. Not mutated.
tool_call_id: Identifier to thread the tool_use and tool messages.
tool_name: Name of the tool that was invoked.
arguments: Arguments the tool was invoked with.
result: Tool output. Coerced to ``str``.

Returns:
A new ChatFormat with the two messages appended.
"""
return [
*history,
{
"role": "assistant",
"content": None,
"tool_calls": [
{
"id": tool_call_id,
"type": "function",
"function": {"name": tool_name, "arguments": json.dumps(arguments)},
}
],
},
{"role": "tool", "tool_call_id": tool_call_id, "content": str(result)},
]
88 changes: 63 additions & 25 deletions packages/ragbits-agents/src/ragbits/agents/hooks/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import json
from collections import defaultdict
from collections.abc import AsyncGenerator
from typing import TYPE_CHECKING, Generic, Literal, overload
from typing import TYPE_CHECKING, Any, Generic, Literal, overload

from ragbits.agents.confirmation import ConfirmationRequest
from ragbits.agents.hooks.base import Hook
Expand Down Expand Up @@ -111,6 +111,48 @@ def get_hooks(self, event_type: EventType, tool_name: str | None = None) -> list

return [hook for hook in hooks if hook.matches_tool(tool_name)]

@staticmethod
def _compute_confirmation_id(hook_name: str, tool_name: str, arguments: dict[str, Any]) -> str:
"""
Compute the confirmation_id for a given (hook, tool, arguments) triple.

Exposed so the chat layer (or tests) can reproduce the same id when resuming
a paused confirmation flow.
"""
payload = f"{hook_name}:{tool_name}:{json.dumps(arguments, sort_keys=True)}"
return hashlib.sha256(payload.encode()).hexdigest()[:CONFIRMATION_ID_LENGTH]

@staticmethod
def _find_confirmation(
tool_confirmations: list[dict[str, Any]],
confirmation_id: str,
tool_name: str,
) -> dict[str, Any] | None:
"""
Find a matching confirmation entry.

Tries exact confirmation_id match first, then falls back to tool_name match.
The tool_name fallback handles cross-turn scenarios where the LLM regenerates
arguments with cosmetic differences, changing the hash.

Args:
tool_confirmations: List of confirmation entries from context
confirmation_id: The computed confirmation ID for this tool call
tool_name: The name of the tool being called

Returns:
The matching confirmation entry, or None if not found
"""
for conf in tool_confirmations:
if conf.get("confirmation_id") == confirmation_id:
return conf

for conf in tool_confirmations:
if conf.get("tool_name") == tool_name:
return conf

return None

async def execute_pre_tool(
self,
tool_call: ToolCall,
Expand All @@ -132,12 +174,11 @@ async def execute_pre_tool(
current_tool_call = tool_call.model_copy()

for hook in self.get_hooks(EventType.PRE_TOOL, tool_call.name):
# Generate confirmation_id: hash(hook_function_name + tool_name + arguments)
hook_name = hook.callback.__name__
confirmation_id_str = (
f"{hook_name}:{tool_call.name}:{json.dumps(current_tool_call.arguments, sort_keys=True)}"
confirmation_id = self._compute_confirmation_id(
hook_name=hook.callback.__name__,
tool_name=tool_call.name,
arguments=current_tool_call.arguments,
)
confirmation_id = hashlib.sha256(confirmation_id_str.encode()).hexdigest()[:CONFIRMATION_ID_LENGTH]

result: ToolCall = await hook.callback(current_tool_call)

Expand All @@ -148,28 +189,25 @@ async def execute_pre_tool(
return result, None

elif result.decision == "ask":
# Check if already confirmed/declined in context
for conf in context.tool_confirmations:
if conf.get("confirmation_id") == confirmation_id:
if conf.get("confirmed"):
# Approved → convert to "pass" and continue to next hook
result = result.model_copy(update={"decision": "pass"})
break
else:
# Declined → convert to "deny" and stop immediately
return (
result.model_copy(
update={
"decision": "deny",
"reason": "❌ Action declined by user",
}
),
None,
)
matched = self._find_confirmation(context.tool_confirmations, confirmation_id, tool_call.name)

if matched is not None:
if matched.get("confirmed"):
result = result.model_copy(update={"decision": "pass"})
else:
return (
result.model_copy(
update={
"decision": "deny",
"reason": "❌ Action declined by user",
}
),
None,
)
else:
# Not in context → return "ask" with ConfirmationRequest
confirmation_request = ConfirmationRequest(
confirmation_id=confirmation_id,
tool_call_id=tool_call.id,
tool_name=tool_call.name,
tool_description=result.reason, # type: ignore[arg-type] # guaranteed non-None by ValueError check above
arguments=current_tool_call.arguments,
Expand Down
67 changes: 67 additions & 0 deletions packages/ragbits-agents/tests/unit/hooks/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,73 @@ async def test_ask_with_prior_confirmation(self, tool_call: ToolCall, ask_hook:
result, _ = await manager.execute_pre_tool(tool_call, ctx_declined)
assert result.decision == "deny"

@pytest.mark.asyncio
async def test_ask_with_tool_name_fallback_approved(self, tool_call: ToolCall, ask_hook: PreToolCallback):
"""When confirmation_id doesn't match (cross-turn hash drift), fall back to tool_name match."""
manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)])

# Simulate cross-turn: frontend sends back tool_name but with a stale confirmation_id
ctx: AgentRunContext = AgentRunContext(
tool_confirmations=[
{"confirmation_id": "stale_id_from_previous_turn", "tool_name": "test_tool", "confirmed": True}
]
)
result, confirmation = await manager.execute_pre_tool(tool_call, ctx)

assert result.decision == "pass"
assert confirmation is None

@pytest.mark.asyncio
async def test_ask_with_tool_name_fallback_declined(self, tool_call: ToolCall, ask_hook: PreToolCallback):
"""When confirmation_id doesn't match but tool_name does and user declined."""
manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)])

ctx: AgentRunContext = AgentRunContext(
tool_confirmations=[
{"confirmation_id": "stale_id_from_previous_turn", "tool_name": "test_tool", "confirmed": False}
]
)
result, confirmation = await manager.execute_pre_tool(tool_call, ctx)

assert result.decision == "deny"
assert confirmation is None

@pytest.mark.asyncio
async def test_exact_confirmation_id_takes_priority_over_tool_name(
self, tool_call: ToolCall, ask_hook: PreToolCallback
):
"""Exact confirmation_id match should be used even if a tool_name entry also exists."""
manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)])
exact_id = make_confirmation_id("ask_hook", "test_tool", {"arg1": "value1"})

ctx: AgentRunContext = AgentRunContext(
tool_confirmations=[
# tool_name match says declined
{"confirmation_id": "wrong_id", "tool_name": "test_tool", "confirmed": False},
# exact confirmation_id match says approved — should win
{"confirmation_id": exact_id, "confirmed": True},
]
)
result, _ = await manager.execute_pre_tool(tool_call, ctx)

assert result.decision == "pass"

@pytest.mark.asyncio
async def test_tool_name_fallback_does_not_match_different_tool(
self, tool_call: ToolCall, ask_hook: PreToolCallback
):
"""tool_name fallback should not match confirmations for a different tool."""
manager: HookManager = HookManager(hooks=[Hook(event_type=EventType.PRE_TOOL, callback=ask_hook)])

ctx: AgentRunContext = AgentRunContext(
tool_confirmations=[{"confirmation_id": "some_id", "tool_name": "other_tool", "confirmed": True}]
)
result, confirmation = await manager.execute_pre_tool(tool_call, ctx)

assert result.decision == "ask"
assert confirmation is not None
assert confirmation.tool_name == "test_tool"

@pytest.mark.asyncio
async def test_chaining(
self, tool_call: ToolCall, context: AgentRunContext, pre_tool_add_field: Callable[..., PreToolCallback]
Expand Down
Loading
Loading