diff --git a/openhands-sdk/openhands/sdk/event/base.py b/openhands-sdk/openhands/sdk/event/base.py index e840059dc4..f5f592c742 100644 --- a/openhands-sdk/openhands/sdk/event/base.py +++ b/openhands-sdk/openhands/sdk/event/base.py @@ -135,6 +135,11 @@ def events_to_messages(events: list["LLMConvertibleEvent"]) -> list[Message]: messages.append(msg) i += 1 + # Vertex Gemini thought signatures (``call_..__thought__``) are only + # consumed on the immediately following tool-result turn. Strip them from + # archival history so they aren't re-shipped in every subsequent prompt. + _strip_archival_thought_signatures(messages) + return messages @@ -174,3 +179,57 @@ def _combine_action_events(events: list["ActionEvent"]) -> Message: reasoning_content=events[0].reasoning_content, # Shared reasoning content thinking_blocks=events[0].thinking_blocks, # Shared thinking blocks ) + + +def _strip_archival_thought_signatures(messages: list[Message]) -> None: + """Drop Vertex Gemini thought-signature suffixes from archival history. + + Vertex returns a ``thoughtSignature`` on each turn that uses reasoning, and + LiteLLM smuggles it through the OpenAI-shaped tool call id as + ``call___thought__``. The signature is only consumed by + the *immediately following* tool-result turn so the model can resume from + its prior reasoning state — after that turn it is dead weight that the + SDK would otherwise re-ship in every subsequent prompt. + + This helper keeps the signature on the most recent assistant turn with + tool calls (and on the matching tool-result messages) and strips it from + all earlier turns. Tool-call ids on assistant and tool messages are + always stripped together so the pairs stay consistent. + + Non-Gemini ids are unaffected: stripping looks for the literal + ``__thought__`` marker and is a no-op when absent. + """ + from openhands.sdk.llm import MessageToolCall + from openhands.sdk.llm.utils.thought_signature import ( + has_thought_signature, + strip_thought_signature, + ) + + # Find the ids on the most recent assistant turn that issued tool calls. + keep_ids: set[str] = set() + for message in reversed(messages): + if message.role == "assistant" and message.tool_calls: + keep_ids = {tc.id for tc in message.tool_calls} + break + + for message in messages: + if message.role == "assistant" and message.tool_calls: + new_calls: list[MessageToolCall] = [] + mutated = False + for tc in message.tool_calls: + if tc.id in keep_ids or not has_thought_signature(tc.id): + new_calls.append(tc) + else: + new_calls.append( + tc.model_copy(update={"id": strip_thought_signature(tc.id)}) + ) + mutated = True + if mutated: + message.tool_calls = new_calls + elif ( + message.role == "tool" + and message.tool_call_id + and message.tool_call_id not in keep_ids + and has_thought_signature(message.tool_call_id) + ): + message.tool_call_id = strip_thought_signature(message.tool_call_id) diff --git a/openhands-sdk/openhands/sdk/llm/utils/thought_signature.py b/openhands-sdk/openhands/sdk/llm/utils/thought_signature.py new file mode 100644 index 0000000000..9be6d0ed8d --- /dev/null +++ b/openhands-sdk/openhands/sdk/llm/utils/thought_signature.py @@ -0,0 +1,44 @@ +"""Helpers for handling Vertex Gemini thought signatures. + +When Vertex AI Gemini is used with ``reasoning_effort`` enabled, the provider +returns a ``thoughtSignature`` field on each function-calling turn. LiteLLM +encodes that signature into the OpenAI-shaped ``tool_call.id`` by appending +``__thought__`` to the canonical call id:: + + call_f0be918123f4462bb482dd9df123__thought__AY89a18oWjPi7IVOiw5FIMB22r9... + +The signature is required on the *immediately following* tool-result turn so +the model can resume from its previous reasoning state. It is **not** consumed +on any later turn, but the SDK currently re-ships every signature in every +subsequent prompt because they live on the event log. On long agent runs this +can be the dominant cost driver: a single 278 KB signature replayed across 30 +turns equals millions of prompt tokens. + +The utilities in this module identify and strip the ``__thought__`` suffix so +the SDK can keep signatures on the most recent turn(s) and drop them from +archival history without changing the canonical call id. +""" + +from __future__ import annotations + + +THOUGHT_SIGNATURE_MARKER = "__thought__" + + +def has_thought_signature(tool_call_id: str | None) -> bool: + """Return True if ``tool_call_id`` carries a Vertex thought signature.""" + return bool(tool_call_id) and THOUGHT_SIGNATURE_MARKER in tool_call_id + + +def strip_thought_signature(tool_call_id: str) -> str: + """Return the canonical call id with any thought-signature suffix removed. + + Non-Gemini ids (Anthropic ``toolu_*``, OpenAI ``call_*`` without a + signature, ACP ids, etc.) are returned unchanged. + """ + if not tool_call_id: + return tool_call_id + marker_index = tool_call_id.find(THOUGHT_SIGNATURE_MARKER) + if marker_index == -1: + return tool_call_id + return tool_call_id[:marker_index] diff --git a/tests/sdk/event/test_events_to_messages.py b/tests/sdk/event/test_events_to_messages.py index 148c144e89..dbe6d3a3be 100644 --- a/tests/sdk/event/test_events_to_messages.py +++ b/tests/sdk/event/test_events_to_messages.py @@ -586,3 +586,159 @@ def test_action_event_with_none_action_round_trip_and_observation_match(self): assert msgs[0].role == "assistant" assert msgs[1].role == "tool" assert msgs[1].tool_call_id == "call_ne" + + +class TestThoughtSignatureStripping: + """Vertex Gemini ``thoughtSignature`` blobs must be dropped from history. + + LiteLLM smuggles Gemini's thought signature through the OpenAI-shaped + tool_call id as ``call___thought__``. The signature is + only consumed by the *immediately following* tool-result turn so the + model can resume reasoning; on every turn after that it is dead weight. + + These tests pin the behaviour of the strip pass that lives at the bottom + of ``events_to_messages``. + """ + + @staticmethod + def _make_pair(turn: int, has_sig: bool = True) -> list[LLMConvertibleEvent]: + """Create one action+observation pair for turn ``turn``.""" + canonical = f"call_{turn:032x}" + tcid = f"{canonical}__thought__{'A' * 1000}" if has_sig else canonical + action = create_action_event( + thought_text=f"thinking about turn {turn}", + tool_name="terminal", + tool_call_id=tcid, + llm_response_id=f"resp_{turn}", + action_args={"command": f"echo {turn}"}, + ) + observation = ObservationEvent( + source="environment", + tool_name="terminal", + tool_call_id=tcid, + observation=EventsToMessagesMockObservation(result=f"out {turn}"), + action_id=action.id, + ) + return cast(list[LLMConvertibleEvent], [action, observation]) + + def test_strips_signature_from_older_turns(self): + """Signatures on earlier turns are removed; the latest turn is kept.""" + events: list[LLMConvertibleEvent] = [] + for turn in range(3): + events.extend(self._make_pair(turn)) + + messages = LLMConvertibleEvent.events_to_messages(events) + + # 3 assistant + 3 tool messages + assert [m.role for m in messages] == [ + "assistant", + "tool", + "assistant", + "tool", + "assistant", + "tool", + ] + + # The two older assistant turns lose their signature; the latest keeps it. + assert messages[0].tool_calls is not None + assert "__thought__" not in messages[0].tool_calls[0].id + assert messages[2].tool_calls is not None + assert "__thought__" not in messages[2].tool_calls[0].id + assert messages[4].tool_calls is not None + assert "__thought__" in messages[4].tool_calls[0].id + + # And the tool-result ids are stripped consistently with their pair. + assert "__thought__" not in (messages[1].tool_call_id or "") + assert "__thought__" not in (messages[3].tool_call_id or "") + assert "__thought__" in (messages[5].tool_call_id or "") + + def test_stripped_pairs_stay_consistent(self): + """A tool-result id must equal its assistant tool_call id after stripping.""" + events: list[LLMConvertibleEvent] = [] + for turn in range(4): + events.extend(self._make_pair(turn)) + + messages = LLMConvertibleEvent.events_to_messages(events) + + for assistant, tool in zip(messages[0::2], messages[1::2], strict=True): + assert assistant.tool_calls is not None + assert assistant.tool_calls[0].id == tool.tool_call_id + + def test_does_not_mutate_source_events(self): + """Stripping must not corrupt the underlying ActionEvent.tool_call.id.""" + events = self._make_pair(0) + self._make_pair(1) + original_first_action = events[0] + assert isinstance(original_first_action, ActionEvent) + original_id = original_first_action.tool_call.id + + LLMConvertibleEvent.events_to_messages(events) + + # The event's tool_call id is unchanged after conversion. + assert original_first_action.tool_call.id == original_id + assert "__thought__" in original_id + + def test_no_op_for_non_gemini_ids(self): + """Ids without ``__thought__`` are returned byte-for-byte unchanged.""" + events: list[LLMConvertibleEvent] = [] + for turn in range(3): + events.extend(self._make_pair(turn, has_sig=False)) + + messages = LLMConvertibleEvent.events_to_messages(events) + + for assistant, tool in zip(messages[0::2], messages[1::2], strict=True): + assert assistant.tool_calls is not None + assert assistant.tool_calls[0].id.startswith("call_") + assert assistant.tool_calls[0].id == tool.tool_call_id + assert "__thought__" not in assistant.tool_calls[0].id + + def test_parallel_tool_calls_share_signature_keep_window(self): + """Multiple tool calls in the same LLM response are kept together.""" + # Two ActionEvents with the SAME llm_response_id = parallel tool calls. + sig = "__thought__" + "B" * 500 + action_a = create_action_event( + thought_text="parallel", + tool_name="terminal", + tool_call_id=f"call_aaaa{sig}", + llm_response_id="resp_parallel", + action_args={"command": "a"}, + ) + # Batched siblings must carry no thought of their own. + action_b = create_action_event( + thought_text="", + tool_name="terminal", + tool_call_id=f"call_bbbb{sig}", + llm_response_id="resp_parallel", + action_args={"command": "b"}, + ) + # ``create_action_event`` always emits a thought; clear it for B. + action_b = action_b.model_copy(update={"thought": []}) + obs_a = ObservationEvent( + source="environment", + tool_name="terminal", + tool_call_id=f"call_aaaa{sig}", + observation=EventsToMessagesMockObservation(result="a-out"), + action_id=action_a.id, + ) + obs_b = ObservationEvent( + source="environment", + tool_name="terminal", + tool_call_id=f"call_bbbb{sig}", + observation=EventsToMessagesMockObservation(result="b-out"), + action_id=action_b.id, + ) + + events = cast( + list[LLMConvertibleEvent], + [action_a, action_b, obs_a, obs_b], + ) + messages = LLMConvertibleEvent.events_to_messages(events) + + # Combined assistant + two tool messages + assert messages[0].role == "assistant" + assert messages[0].tool_calls is not None + # Both parallel calls keep their signatures because they are part of + # the most-recent assistant turn. + assert all("__thought__" in tc.id for tc in messages[0].tool_calls) + # Tool-result ids match the kept signatures verbatim. + assert messages[1].tool_call_id == messages[0].tool_calls[0].id + assert messages[2].tool_call_id == messages[0].tool_calls[1].id diff --git a/tests/sdk/llm/test_thought_signature.py b/tests/sdk/llm/test_thought_signature.py new file mode 100644 index 0000000000..1bdafeea81 --- /dev/null +++ b/tests/sdk/llm/test_thought_signature.py @@ -0,0 +1,66 @@ +"""Tests for the thought_signature utility module.""" + +from openhands.sdk.llm.utils.thought_signature import ( + THOUGHT_SIGNATURE_MARKER, + has_thought_signature, + strip_thought_signature, +) + + +def test_marker_constant_value(): + """The marker is the literal substring LiteLLM emits for Vertex Gemini.""" + assert THOUGHT_SIGNATURE_MARKER == "__thought__" + + +class TestHasThoughtSignature: + def test_returns_true_for_gemini_id(self): + gemini_id = "call_f0be918123f4462bb482dd9df123__thought__AY89a18oWjPi" + assert has_thought_signature(gemini_id) is True + + def test_returns_false_for_openai_id(self): + assert has_thought_signature("call_abc123def456") is False + + def test_returns_false_for_anthropic_id(self): + assert has_thought_signature("toolu_01ABCdef") is False + + def test_returns_false_for_empty(self): + assert has_thought_signature("") is False + + def test_returns_false_for_none(self): + assert has_thought_signature(None) is False + + +class TestStripThoughtSignature: + def test_strips_gemini_signature(self): + gemini_id = "call_f0be918123f4462bb482dd9df123__thought__AY89a18oWjPi" + assert strip_thought_signature(gemini_id) == "call_f0be918123f4462bb482dd9df123" + + def test_returns_openai_id_unchanged(self): + assert strip_thought_signature("call_abc123def456") == "call_abc123def456" + + def test_returns_anthropic_id_unchanged(self): + assert strip_thought_signature("toolu_01ABCdef") == "toolu_01ABCdef" + + def test_empty_string_returns_empty(self): + assert strip_thought_signature("") == "" + + def test_strips_huge_signature(self): + # The pathological case observed in the wild: a 278 KB signature + # blob appended to a 32-char id. + big_blob = "A" * 278_000 + gemini_id = f"call_f0be918123f4462bb482dd9df123__thought__{big_blob}" + result = strip_thought_signature(gemini_id) + assert result == "call_f0be918123f4462bb482dd9df123" + # The stripped id no longer carries a signature. + assert has_thought_signature(result) is False + + def test_stripping_is_idempotent(self): + gemini_id = "call_f0be918123f4462bb482dd9df123__thought__AY89a18oWjPi" + stripped_once = strip_thought_signature(gemini_id) + assert strip_thought_signature(stripped_once) == stripped_once + + def test_strips_only_first_marker_occurrence(self): + # If a signature blob ever happens to contain the marker again, we + # still want everything from the first occurrence onward removed. + weird = "call_x__thought__blob__thought__more" + assert strip_thought_signature(weird) == "call_x"