OpenHands · juanmichelini · Jun 9, 2026 · Jun 9, 2026
diff --git a/openhands-sdk/openhands/sdk/event/base.py b/openhands-sdk/openhands/sdk/event/base.py
@@ -135,6 +135,11 @@ def events_to_messages(events: list["LLMConvertibleEvent"]) -> list[Message]:
                     messages.append(msg)
                 i += 1
 
+        # Vertex Gemini thought signatures (``call_..__thought__<blob>``) are only
+        # consumed on the immediately following tool-result turn. Strip them from
+        # archival history so they aren't re-shipped in every subsequent prompt.
+        _strip_archival_thought_signatures(messages)
+
         return messages
 
 
@@ -174,3 +179,57 @@ def _combine_action_events(events: list["ActionEvent"]) -> Message:
         reasoning_content=events[0].reasoning_content,  # Shared reasoning content
         thinking_blocks=events[0].thinking_blocks,  # Shared thinking blocks
     )
+
+
+def _strip_archival_thought_signatures(messages: list[Message]) -> None:
+    """Drop Vertex Gemini thought-signature suffixes from archival history.
+
+    Vertex returns a ``thoughtSignature`` on each turn that uses reasoning, and
+    LiteLLM smuggles it through the OpenAI-shaped tool call id as
+    ``call_<hex>__thought__<base64-blob>``. The signature is only consumed by
+    the *immediately following* tool-result turn so the model can resume from
+    its prior reasoning state — after that turn it is dead weight that the
+    SDK would otherwise re-ship in every subsequent prompt.
+
+    This helper keeps the signature on the most recent assistant turn with
+    tool calls (and on the matching tool-result messages) and strips it from
+    all earlier turns. Tool-call ids on assistant and tool messages are
+    always stripped together so the pairs stay consistent.
+
+    Non-Gemini ids are unaffected: stripping looks for the literal
+    ``__thought__`` marker and is a no-op when absent.
+    """
+    from openhands.sdk.llm import MessageToolCall
+    from openhands.sdk.llm.utils.thought_signature import (
+        has_thought_signature,
+        strip_thought_signature,
+    )
+
+    # Find the ids on the most recent assistant turn that issued tool calls.
+    keep_ids: set[str] = set()
+    for message in reversed(messages):
+        if message.role == "assistant" and message.tool_calls:
+            keep_ids = {tc.id for tc in message.tool_calls}
+            break
+
+    for message in messages:
+        if message.role == "assistant" and message.tool_calls:
+            new_calls: list[MessageToolCall] = []
+            mutated = False
+            for tc in message.tool_calls:
+                if tc.id in keep_ids or not has_thought_signature(tc.id):
+                    new_calls.append(tc)
+                else:
+                    new_calls.append(
+                        tc.model_copy(update={"id": strip_thought_signature(tc.id)})
+                    )
+                    mutated = True
+            if mutated:
+                message.tool_calls = new_calls
+        elif (
+            message.role == "tool"
+            and message.tool_call_id
+            and message.tool_call_id not in keep_ids
+            and has_thought_signature(message.tool_call_id)
+        ):
+            message.tool_call_id = strip_thought_signature(message.tool_call_id)
diff --git a/openhands-sdk/openhands/sdk/llm/utils/thought_signature.py b/openhands-sdk/openhands/sdk/llm/utils/thought_signature.py
@@ -0,0 +1,44 @@
+"""Helpers for handling Vertex Gemini thought signatures.
+
+When Vertex AI Gemini is used with ``reasoning_effort`` enabled, the provider
+returns a ``thoughtSignature`` field on each function-calling turn. LiteLLM
+encodes that signature into the OpenAI-shaped ``tool_call.id`` by appending
+``__thought__<base64-blob>`` to the canonical call id::
+
+    call_f0be918123f4462bb482dd9df123__thought__AY89a18oWjPi7IVOiw5FIMB22r9...
+
+The signature is required on the *immediately following* tool-result turn so
+the model can resume from its previous reasoning state. It is **not** consumed
+on any later turn, but the SDK currently re-ships every signature in every
+subsequent prompt because they live on the event log. On long agent runs this
+can be the dominant cost driver: a single 278 KB signature replayed across 30
+turns equals millions of prompt tokens.
+
+The utilities in this module identify and strip the ``__thought__`` suffix so
+the SDK can keep signatures on the most recent turn(s) and drop them from
+archival history without changing the canonical call id.
+"""
+
+from __future__ import annotations
+
+
+THOUGHT_SIGNATURE_MARKER = "__thought__"
+
+
+def has_thought_signature(tool_call_id: str | None) -> bool:
+    """Return True if ``tool_call_id`` carries a Vertex thought signature."""
+    return bool(tool_call_id) and THOUGHT_SIGNATURE_MARKER in tool_call_id
+
+
+def strip_thought_signature(tool_call_id: str) -> str:
+    """Return the canonical call id with any thought-signature suffix removed.
+
+    Non-Gemini ids (Anthropic ``toolu_*``, OpenAI ``call_*`` without a
+    signature, ACP ids, etc.) are returned unchanged.
+    """
+    if not tool_call_id:
+        return tool_call_id
+    marker_index = tool_call_id.find(THOUGHT_SIGNATURE_MARKER)
+    if marker_index == -1:
+        return tool_call_id
+    return tool_call_id[:marker_index]
diff --git a/tests/sdk/event/test_events_to_messages.py b/tests/sdk/event/test_events_to_messages.py
@@ -586,3 +586,159 @@ def test_action_event_with_none_action_round_trip_and_observation_match(self):
         assert msgs[0].role == "assistant"
         assert msgs[1].role == "tool"
         assert msgs[1].tool_call_id == "call_ne"
+
+
+class TestThoughtSignatureStripping:
+    """Vertex Gemini ``thoughtSignature`` blobs must be dropped from history.
+
+    LiteLLM smuggles Gemini's thought signature through the OpenAI-shaped
+    tool_call id as ``call_<hex>__thought__<base64-blob>``. The signature is
+    only consumed by the *immediately following* tool-result turn so the
+    model can resume reasoning; on every turn after that it is dead weight.
+
+    These tests pin the behaviour of the strip pass that lives at the bottom
+    of ``events_to_messages``.
+    """
+
+    @staticmethod
+    def _make_pair(turn: int, has_sig: bool = True) -> list[LLMConvertibleEvent]:
+        """Create one action+observation pair for turn ``turn``."""
+        canonical = f"call_{turn:032x}"
+        tcid = f"{canonical}__thought__{'A' * 1000}" if has_sig else canonical
+        action = create_action_event(
+            thought_text=f"thinking about turn {turn}",
+            tool_name="terminal",
+            tool_call_id=tcid,
+            llm_response_id=f"resp_{turn}",
+            action_args={"command": f"echo {turn}"},
+        )
+        observation = ObservationEvent(
+            source="environment",
+            tool_name="terminal",
+            tool_call_id=tcid,
+            observation=EventsToMessagesMockObservation(result=f"out {turn}"),
+            action_id=action.id,
+        )
+        return cast(list[LLMConvertibleEvent], [action, observation])
+
+    def test_strips_signature_from_older_turns(self):
+        """Signatures on earlier turns are removed; the latest turn is kept."""
+        events: list[LLMConvertibleEvent] = []
+        for turn in range(3):
+            events.extend(self._make_pair(turn))
+
+        messages = LLMConvertibleEvent.events_to_messages(events)
+
+        # 3 assistant + 3 tool messages
+        assert [m.role for m in messages] == [
+            "assistant",
+            "tool",
+            "assistant",
+            "tool",
+            "assistant",
+            "tool",
+        ]
+
+        # The two older assistant turns lose their signature; the latest keeps it.
+        assert messages[0].tool_calls is not None
+        assert "__thought__" not in messages[0].tool_calls[0].id
+        assert messages[2].tool_calls is not None
+        assert "__thought__" not in messages[2].tool_calls[0].id
+        assert messages[4].tool_calls is not None
+        assert "__thought__" in messages[4].tool_calls[0].id
+
+        # And the tool-result ids are stripped consistently with their pair.
+        assert "__thought__" not in (messages[1].tool_call_id or "")
+        assert "__thought__" not in (messages[3].tool_call_id or "")
+        assert "__thought__" in (messages[5].tool_call_id or "")
+
+    def test_stripped_pairs_stay_consistent(self):
+        """A tool-result id must equal its assistant tool_call id after stripping."""
+        events: list[LLMConvertibleEvent] = []
+        for turn in range(4):
+            events.extend(self._make_pair(turn))
+
+        messages = LLMConvertibleEvent.events_to_messages(events)
+
+        for assistant, tool in zip(messages[0::2], messages[1::2], strict=True):
+            assert assistant.tool_calls is not None
+            assert assistant.tool_calls[0].id == tool.tool_call_id
+
+    def test_does_not_mutate_source_events(self):
+        """Stripping must not corrupt the underlying ActionEvent.tool_call.id."""
+        events = self._make_pair(0) + self._make_pair(1)
+        original_first_action = events[0]
+        assert isinstance(original_first_action, ActionEvent)
+        original_id = original_first_action.tool_call.id
+
+        LLMConvertibleEvent.events_to_messages(events)
+
+        # The event's tool_call id is unchanged after conversion.
+        assert original_first_action.tool_call.id == original_id
+        assert "__thought__" in original_id
+
+    def test_no_op_for_non_gemini_ids(self):
+        """Ids without ``__thought__`` are returned byte-for-byte unchanged."""
+        events: list[LLMConvertibleEvent] = []
+        for turn in range(3):
+            events.extend(self._make_pair(turn, has_sig=False))
+
+        messages = LLMConvertibleEvent.events_to_messages(events)
+
+        for assistant, tool in zip(messages[0::2], messages[1::2], strict=True):
+            assert assistant.tool_calls is not None
+            assert assistant.tool_calls[0].id.startswith("call_")
+            assert assistant.tool_calls[0].id == tool.tool_call_id
+            assert "__thought__" not in assistant.tool_calls[0].id
+
+    def test_parallel_tool_calls_share_signature_keep_window(self):
+        """Multiple tool calls in the same LLM response are kept together."""
+        # Two ActionEvents with the SAME llm_response_id = parallel tool calls.
+        sig = "__thought__" + "B" * 500
+        action_a = create_action_event(
+            thought_text="parallel",
+            tool_name="terminal",
+            tool_call_id=f"call_aaaa{sig}",
+            llm_response_id="resp_parallel",
+            action_args={"command": "a"},
+        )
+        # Batched siblings must carry no thought of their own.
+        action_b = create_action_event(
+            thought_text="",
+            tool_name="terminal",
+            tool_call_id=f"call_bbbb{sig}",
+            llm_response_id="resp_parallel",
+            action_args={"command": "b"},
+        )
+        # ``create_action_event`` always emits a thought; clear it for B.
+        action_b = action_b.model_copy(update={"thought": []})
+        obs_a = ObservationEvent(
+            source="environment",
+            tool_name="terminal",
+            tool_call_id=f"call_aaaa{sig}",
+            observation=EventsToMessagesMockObservation(result="a-out"),
+            action_id=action_a.id,
+        )
+        obs_b = ObservationEvent(
+            source="environment",
+            tool_name="terminal",
+            tool_call_id=f"call_bbbb{sig}",
+            observation=EventsToMessagesMockObservation(result="b-out"),
+            action_id=action_b.id,
+        )
+
+        events = cast(
+            list[LLMConvertibleEvent],
+            [action_a, action_b, obs_a, obs_b],
+        )
+        messages = LLMConvertibleEvent.events_to_messages(events)
+
+        # Combined assistant + two tool messages
+        assert messages[0].role == "assistant"
+        assert messages[0].tool_calls is not None
+        # Both parallel calls keep their signatures because they are part of
+        # the most-recent assistant turn.
+        assert all("__thought__" in tc.id for tc in messages[0].tool_calls)
+        # Tool-result ids match the kept signatures verbatim.
+        assert messages[1].tool_call_id == messages[0].tool_calls[0].id
+        assert messages[2].tool_call_id == messages[0].tool_calls[1].id
diff --git a/tests/sdk/llm/test_thought_signature.py b/tests/sdk/llm/test_thought_signature.py
@@ -0,0 +1,66 @@
+"""Tests for the thought_signature utility module."""
+
+from openhands.sdk.llm.utils.thought_signature import (
+    THOUGHT_SIGNATURE_MARKER,
+    has_thought_signature,
+    strip_thought_signature,
+)
+
+
+def test_marker_constant_value():
+    """The marker is the literal substring LiteLLM emits for Vertex Gemini."""
+    assert THOUGHT_SIGNATURE_MARKER == "__thought__"
+
+
+class TestHasThoughtSignature:
+    def test_returns_true_for_gemini_id(self):
+        gemini_id = "call_f0be918123f4462bb482dd9df123__thought__AY89a18oWjPi"
+        assert has_thought_signature(gemini_id) is True
+
+    def test_returns_false_for_openai_id(self):
+        assert has_thought_signature("call_abc123def456") is False
+
+    def test_returns_false_for_anthropic_id(self):
+        assert has_thought_signature("toolu_01ABCdef") is False
+
+    def test_returns_false_for_empty(self):
+        assert has_thought_signature("") is False
+
+    def test_returns_false_for_none(self):
+        assert has_thought_signature(None) is False
+
+
+class TestStripThoughtSignature:
+    def test_strips_gemini_signature(self):
+        gemini_id = "call_f0be918123f4462bb482dd9df123__thought__AY89a18oWjPi"
+        assert strip_thought_signature(gemini_id) == "call_f0be918123f4462bb482dd9df123"
+
+    def test_returns_openai_id_unchanged(self):
+        assert strip_thought_signature("call_abc123def456") == "call_abc123def456"
+
+    def test_returns_anthropic_id_unchanged(self):
+        assert strip_thought_signature("toolu_01ABCdef") == "toolu_01ABCdef"
+
+    def test_empty_string_returns_empty(self):
+        assert strip_thought_signature("") == ""
+
+    def test_strips_huge_signature(self):
+        # The pathological case observed in the wild: a 278 KB signature
+        # blob appended to a 32-char id.
+        big_blob = "A" * 278_000
+        gemini_id = f"call_f0be918123f4462bb482dd9df123__thought__{big_blob}"
+        result = strip_thought_signature(gemini_id)
+        assert result == "call_f0be918123f4462bb482dd9df123"
+        # The stripped id no longer carries a signature.
+        assert has_thought_signature(result) is False
+
+    def test_stripping_is_idempotent(self):
+        gemini_id = "call_f0be918123f4462bb482dd9df123__thought__AY89a18oWjPi"
+        stripped_once = strip_thought_signature(gemini_id)
+        assert strip_thought_signature(stripped_once) == stripped_once
+
+    def test_strips_only_first_marker_occurrence(self):
+        # If a signature blob ever happens to contain the marker again, we
+        # still want everything from the first occurrence onward removed.
+        weird = "call_x__thought__blob__thought__more"
+        assert strip_thought_signature(weird) == "call_x"