Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions openhands-sdk/openhands/sdk/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ def events_to_messages(events: list["LLMConvertibleEvent"]) -> list[Message]:
messages.append(msg)
i += 1

# Vertex Gemini thought signatures (``call_..__thought__<blob>``) are only
# consumed on the immediately following tool-result turn. Strip them from
# archival history so they aren't re-shipped in every subsequent prompt.
_strip_archival_thought_signatures(messages)

return messages


Expand Down Expand Up @@ -174,3 +179,57 @@ def _combine_action_events(events: list["ActionEvent"]) -> Message:
reasoning_content=events[0].reasoning_content, # Shared reasoning content
thinking_blocks=events[0].thinking_blocks, # Shared thinking blocks
)


def _strip_archival_thought_signatures(messages: list[Message]) -> None:
"""Drop Vertex Gemini thought-signature suffixes from archival history.

Vertex returns a ``thoughtSignature`` on each turn that uses reasoning, and
LiteLLM smuggles it through the OpenAI-shaped tool call id as
``call_<hex>__thought__<base64-blob>``. The signature is only consumed by
the *immediately following* tool-result turn so the model can resume from
its prior reasoning state — after that turn it is dead weight that the
SDK would otherwise re-ship in every subsequent prompt.

This helper keeps the signature on the most recent assistant turn with
tool calls (and on the matching tool-result messages) and strips it from
all earlier turns. Tool-call ids on assistant and tool messages are
always stripped together so the pairs stay consistent.

Non-Gemini ids are unaffected: stripping looks for the literal
``__thought__`` marker and is a no-op when absent.
"""
from openhands.sdk.llm import MessageToolCall
from openhands.sdk.llm.utils.thought_signature import (
has_thought_signature,
strip_thought_signature,
)

# Find the ids on the most recent assistant turn that issued tool calls.
keep_ids: set[str] = set()
for message in reversed(messages):
if message.role == "assistant" and message.tool_calls:
keep_ids = {tc.id for tc in message.tool_calls}
break

for message in messages:
if message.role == "assistant" and message.tool_calls:
new_calls: list[MessageToolCall] = []
mutated = False
for tc in message.tool_calls:
if tc.id in keep_ids or not has_thought_signature(tc.id):
new_calls.append(tc)
else:
new_calls.append(
tc.model_copy(update={"id": strip_thought_signature(tc.id)})
)
mutated = True
if mutated:
message.tool_calls = new_calls
elif (
message.role == "tool"
and message.tool_call_id
and message.tool_call_id not in keep_ids
and has_thought_signature(message.tool_call_id)
):
message.tool_call_id = strip_thought_signature(message.tool_call_id)
44 changes: 44 additions & 0 deletions openhands-sdk/openhands/sdk/llm/utils/thought_signature.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Helpers for handling Vertex Gemini thought signatures.

When Vertex AI Gemini is used with ``reasoning_effort`` enabled, the provider
returns a ``thoughtSignature`` field on each function-calling turn. LiteLLM
encodes that signature into the OpenAI-shaped ``tool_call.id`` by appending
``__thought__<base64-blob>`` to the canonical call id::

call_f0be918123f4462bb482dd9df123__thought__AY89a18oWjPi7IVOiw5FIMB22r9...

The signature is required on the *immediately following* tool-result turn so
the model can resume from its previous reasoning state. It is **not** consumed
on any later turn, but the SDK currently re-ships every signature in every
subsequent prompt because they live on the event log. On long agent runs this
can be the dominant cost driver: a single 278 KB signature replayed across 30
turns equals millions of prompt tokens.

The utilities in this module identify and strip the ``__thought__`` suffix so
the SDK can keep signatures on the most recent turn(s) and drop them from
archival history without changing the canonical call id.
"""

from __future__ import annotations


THOUGHT_SIGNATURE_MARKER = "__thought__"


def has_thought_signature(tool_call_id: str | None) -> bool:
"""Return True if ``tool_call_id`` carries a Vertex thought signature."""
return bool(tool_call_id) and THOUGHT_SIGNATURE_MARKER in tool_call_id


def strip_thought_signature(tool_call_id: str) -> str:
"""Return the canonical call id with any thought-signature suffix removed.

Non-Gemini ids (Anthropic ``toolu_*``, OpenAI ``call_*`` without a
signature, ACP ids, etc.) are returned unchanged.
"""
if not tool_call_id:
return tool_call_id
marker_index = tool_call_id.find(THOUGHT_SIGNATURE_MARKER)
if marker_index == -1:
return tool_call_id
return tool_call_id[:marker_index]
156 changes: 156 additions & 0 deletions tests/sdk/event/test_events_to_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,3 +586,159 @@ def test_action_event_with_none_action_round_trip_and_observation_match(self):
assert msgs[0].role == "assistant"
assert msgs[1].role == "tool"
assert msgs[1].tool_call_id == "call_ne"


class TestThoughtSignatureStripping:
"""Vertex Gemini ``thoughtSignature`` blobs must be dropped from history.

LiteLLM smuggles Gemini's thought signature through the OpenAI-shaped
tool_call id as ``call_<hex>__thought__<base64-blob>``. The signature is
only consumed by the *immediately following* tool-result turn so the
model can resume reasoning; on every turn after that it is dead weight.

These tests pin the behaviour of the strip pass that lives at the bottom
of ``events_to_messages``.
"""

@staticmethod
def _make_pair(turn: int, has_sig: bool = True) -> list[LLMConvertibleEvent]:
"""Create one action+observation pair for turn ``turn``."""
canonical = f"call_{turn:032x}"
tcid = f"{canonical}__thought__{'A' * 1000}" if has_sig else canonical
action = create_action_event(
thought_text=f"thinking about turn {turn}",
tool_name="terminal",
tool_call_id=tcid,
llm_response_id=f"resp_{turn}",
action_args={"command": f"echo {turn}"},
)
observation = ObservationEvent(
source="environment",
tool_name="terminal",
tool_call_id=tcid,
observation=EventsToMessagesMockObservation(result=f"out {turn}"),
action_id=action.id,
)
return cast(list[LLMConvertibleEvent], [action, observation])

def test_strips_signature_from_older_turns(self):
"""Signatures on earlier turns are removed; the latest turn is kept."""
events: list[LLMConvertibleEvent] = []
for turn in range(3):
events.extend(self._make_pair(turn))

messages = LLMConvertibleEvent.events_to_messages(events)

# 3 assistant + 3 tool messages
assert [m.role for m in messages] == [
"assistant",
"tool",
"assistant",
"tool",
"assistant",
"tool",
]

# The two older assistant turns lose their signature; the latest keeps it.
assert messages[0].tool_calls is not None
assert "__thought__" not in messages[0].tool_calls[0].id
assert messages[2].tool_calls is not None
assert "__thought__" not in messages[2].tool_calls[0].id
assert messages[4].tool_calls is not None
assert "__thought__" in messages[4].tool_calls[0].id

# And the tool-result ids are stripped consistently with their pair.
assert "__thought__" not in (messages[1].tool_call_id or "")
assert "__thought__" not in (messages[3].tool_call_id or "")
assert "__thought__" in (messages[5].tool_call_id or "")

def test_stripped_pairs_stay_consistent(self):
"""A tool-result id must equal its assistant tool_call id after stripping."""
events: list[LLMConvertibleEvent] = []
for turn in range(4):
events.extend(self._make_pair(turn))

messages = LLMConvertibleEvent.events_to_messages(events)

for assistant, tool in zip(messages[0::2], messages[1::2], strict=True):
assert assistant.tool_calls is not None
assert assistant.tool_calls[0].id == tool.tool_call_id

def test_does_not_mutate_source_events(self):
"""Stripping must not corrupt the underlying ActionEvent.tool_call.id."""
events = self._make_pair(0) + self._make_pair(1)
original_first_action = events[0]
assert isinstance(original_first_action, ActionEvent)
original_id = original_first_action.tool_call.id

LLMConvertibleEvent.events_to_messages(events)

# The event's tool_call id is unchanged after conversion.
assert original_first_action.tool_call.id == original_id
assert "__thought__" in original_id

def test_no_op_for_non_gemini_ids(self):
"""Ids without ``__thought__`` are returned byte-for-byte unchanged."""
events: list[LLMConvertibleEvent] = []
for turn in range(3):
events.extend(self._make_pair(turn, has_sig=False))

messages = LLMConvertibleEvent.events_to_messages(events)

for assistant, tool in zip(messages[0::2], messages[1::2], strict=True):
assert assistant.tool_calls is not None
assert assistant.tool_calls[0].id.startswith("call_")
assert assistant.tool_calls[0].id == tool.tool_call_id
assert "__thought__" not in assistant.tool_calls[0].id

def test_parallel_tool_calls_share_signature_keep_window(self):
"""Multiple tool calls in the same LLM response are kept together."""
# Two ActionEvents with the SAME llm_response_id = parallel tool calls.
sig = "__thought__" + "B" * 500
action_a = create_action_event(
thought_text="parallel",
tool_name="terminal",
tool_call_id=f"call_aaaa{sig}",
llm_response_id="resp_parallel",
action_args={"command": "a"},
)
# Batched siblings must carry no thought of their own.
action_b = create_action_event(
thought_text="",
tool_name="terminal",
tool_call_id=f"call_bbbb{sig}",
llm_response_id="resp_parallel",
action_args={"command": "b"},
)
# ``create_action_event`` always emits a thought; clear it for B.
action_b = action_b.model_copy(update={"thought": []})
obs_a = ObservationEvent(
source="environment",
tool_name="terminal",
tool_call_id=f"call_aaaa{sig}",
observation=EventsToMessagesMockObservation(result="a-out"),
action_id=action_a.id,
)
obs_b = ObservationEvent(
source="environment",
tool_name="terminal",
tool_call_id=f"call_bbbb{sig}",
observation=EventsToMessagesMockObservation(result="b-out"),
action_id=action_b.id,
)

events = cast(
list[LLMConvertibleEvent],
[action_a, action_b, obs_a, obs_b],
)
messages = LLMConvertibleEvent.events_to_messages(events)

# Combined assistant + two tool messages
assert messages[0].role == "assistant"
assert messages[0].tool_calls is not None
# Both parallel calls keep their signatures because they are part of
# the most-recent assistant turn.
assert all("__thought__" in tc.id for tc in messages[0].tool_calls)
# Tool-result ids match the kept signatures verbatim.
assert messages[1].tool_call_id == messages[0].tool_calls[0].id
assert messages[2].tool_call_id == messages[0].tool_calls[1].id
66 changes: 66 additions & 0 deletions tests/sdk/llm/test_thought_signature.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Tests for the thought_signature utility module."""

from openhands.sdk.llm.utils.thought_signature import (
THOUGHT_SIGNATURE_MARKER,
has_thought_signature,
strip_thought_signature,
)


def test_marker_constant_value():
"""The marker is the literal substring LiteLLM emits for Vertex Gemini."""
assert THOUGHT_SIGNATURE_MARKER == "__thought__"


class TestHasThoughtSignature:
def test_returns_true_for_gemini_id(self):
gemini_id = "call_f0be918123f4462bb482dd9df123__thought__AY89a18oWjPi"
assert has_thought_signature(gemini_id) is True

def test_returns_false_for_openai_id(self):
assert has_thought_signature("call_abc123def456") is False

def test_returns_false_for_anthropic_id(self):
assert has_thought_signature("toolu_01ABCdef") is False

def test_returns_false_for_empty(self):
assert has_thought_signature("") is False

def test_returns_false_for_none(self):
assert has_thought_signature(None) is False


class TestStripThoughtSignature:
def test_strips_gemini_signature(self):
gemini_id = "call_f0be918123f4462bb482dd9df123__thought__AY89a18oWjPi"
assert strip_thought_signature(gemini_id) == "call_f0be918123f4462bb482dd9df123"

def test_returns_openai_id_unchanged(self):
assert strip_thought_signature("call_abc123def456") == "call_abc123def456"

def test_returns_anthropic_id_unchanged(self):
assert strip_thought_signature("toolu_01ABCdef") == "toolu_01ABCdef"

def test_empty_string_returns_empty(self):
assert strip_thought_signature("") == ""

def test_strips_huge_signature(self):
# The pathological case observed in the wild: a 278 KB signature
# blob appended to a 32-char id.
big_blob = "A" * 278_000
gemini_id = f"call_f0be918123f4462bb482dd9df123__thought__{big_blob}"
result = strip_thought_signature(gemini_id)
assert result == "call_f0be918123f4462bb482dd9df123"
# The stripped id no longer carries a signature.
assert has_thought_signature(result) is False

def test_stripping_is_idempotent(self):
gemini_id = "call_f0be918123f4462bb482dd9df123__thought__AY89a18oWjPi"
stripped_once = strip_thought_signature(gemini_id)
assert strip_thought_signature(stripped_once) == stripped_once

def test_strips_only_first_marker_occurrence(self):
# If a signature blob ever happens to contain the marker again, we
# still want everything from the first occurrence onward removed.
weird = "call_x__thought__blob__thought__more"
assert strip_thought_signature(weird) == "call_x"
Loading