OpenHands · juanmichelini · Jun 9, 2026
diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py
@@ -472,6 +472,25 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
         ),
     )
 
+    vertex_cached_content: str | None = Field(
+        default=None,
+        description=(
+            "Reference an existing Vertex AI ``CachedContent`` resource to use as "
+            "the cache prefix for every request. Pass the full resource name, e.g. "
+            '``"cachedContents/1234567890"`` returned by '
+            "``CachedContent.create``. The SDK threads it through to LiteLLM, "
+            "which forwards it to the Vertex Gemini ``generateContent`` API as "
+            "``cachedContent``. This bypasses the inline ``cache_control`` marker "
+            "path (which only works for ``vertex_ai/`` direct, not via "
+            "``litellm_proxy/``) and gives deterministic, explicit caching for "
+            "long-running agent runs whose system + tool prefix exceeds Vertex's "
+            "minimum cache size. The caller is responsible for creating, "
+            "refreshing the TTL, and deleting the cache resource — see "
+            "https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache. "
+            "Ignored for non-Vertex / non-Gemini providers."
+        ),
+    )
+
     fallback_strategy: FallbackStrategy | None = Field(
         default=None,
         description=(

diff --git a/openhands-sdk/openhands/sdk/llm/options/chat_options.py b/openhands-sdk/openhands/sdk/llm/options/chat_options.py
@@ -6,6 +6,19 @@
 from openhands.sdk.llm.utils.model_features import get_features
 
 
+def _model_supports_vertex_cached_content(model: str) -> bool:
+    """Return True iff sending ``cached_content`` to this model is safe.
+
+    LiteLLM forwards ``cached_content`` as a top-level kwarg through to the
+    Vertex Gemini ``generateContent`` API. Other providers (OpenAI, Anthropic,
+    etc.) will reject unknown kwargs, so we gate emission to model names that
+    look Gemini-flavoured regardless of which LiteLLM provider prefix routes
+    them: ``vertex_ai/``, ``gemini/``, ``litellm_proxy/gemini-*`` (assuming
+    the proxy forwards to a Vertex backend), and bare ``gemini-*``.
+    """
+    return "gemini" in (model or "").lower()
+
+
 def select_chat_options(
     llm, user_kwargs: dict[str, Any], has_tools: bool
 ) -> dict[str, Any]:
@@ -99,4 +112,17 @@ def select_chat_options(
     if llm._prompt_cache_key:
         out["prompt_cache_key"] = llm._prompt_cache_key
 
+    # Vertex AI explicit context cache: user pre-creates a CachedContent
+    # resource (see https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache)
+    # and references it by name. LiteLLM forwards the kwarg to the Vertex
+    # ``generateContent`` API as ``cachedContent``. We only emit when the model
+    # is Gemini-flavoured so unknown-kwarg-rejecting providers (OpenAI, etc.)
+    # are left untouched. User-supplied kwargs take precedence.
+    if (
+        llm.vertex_cached_content
+        and "cached_content" not in out
+        and _model_supports_vertex_cached_content(llm.model)
+    ):
+        out["cached_content"] = llm.vertex_cached_content
+
     return out
diff --git a/tests/sdk/llm/test_chat_options.py b/tests/sdk/llm/test_chat_options.py
@@ -21,6 +21,7 @@ class DummyLLM:
     _prompt_cache_key: str | None = None
     openrouter_site_url: str = ""
     openrouter_app_name: str = ""
+    vertex_cached_content: str | None = None
 
     def _openrouter_headers(self) -> dict[str, str]:
         headers: dict[str, str] = {}
@@ -243,3 +244,91 @@ def test_chat_options_omits_openrouter_headers_when_unset():
     llm = DummyLLM(model="gpt-4o")
     out = select_chat_options(llm, user_kwargs={}, has_tools=False)
     assert "extra_headers" not in out
+
+
+# ---------------------------------------------------------------------------
+# vertex_cached_content
+# ---------------------------------------------------------------------------
+
+
+def test_vertex_cached_content_emitted_for_vertex_gemini():
+    """``vertex_cached_content`` is forwarded as ``cached_content`` for Gemini."""
+    llm = LLM(
+        model="vertex_ai/gemini-3-flash",
+        vertex_cached_content="cachedContents/1234567890",
+    )
+    out = select_chat_options(llm, user_kwargs={}, has_tools=True)
+    assert out["cached_content"] == "cachedContents/1234567890"
+
+
+def test_vertex_cached_content_emitted_for_gemini_provider():
+    """Bare ``gemini/`` provider also gets the kwarg."""
+    llm = LLM(
+        model="gemini/gemini-3-flash",
+        vertex_cached_content="cachedContents/abc",
+    )
+    out = select_chat_options(llm, user_kwargs={}, has_tools=True)
+    assert out["cached_content"] == "cachedContents/abc"
+
+
+def test_vertex_cached_content_emitted_for_litellm_proxy_gemini():
+    """Proxy-routed Gemini models receive the kwarg too.
+
+    Whether the proxy actually forwards it to Vertex is a proxy-side concern,
+    but the SDK must pass it through so a well-configured proxy can act on it.
+    """
+    llm = LLM(
+        model="litellm_proxy/gemini-3.5-flash",
+        vertex_cached_content="cachedContents/xyz",
+    )
+    out = select_chat_options(llm, user_kwargs={}, has_tools=True)
+    assert out["cached_content"] == "cachedContents/xyz"
+
+
+def test_vertex_cached_content_suppressed_for_openai():
+    """Non-Gemini providers (OpenAI, Anthropic, …) must not see the kwarg.
+
+    They reject unknown kwargs, so emitting ``cached_content`` would break the
+    call. The user setting the field on a non-Gemini LLM is a misconfiguration
+    we silently tolerate rather than escalate to an error.
+    """
+    llm = LLM(
+        model="gpt-5-mini",
+        vertex_cached_content="cachedContents/should-be-ignored",
+    )
+    out = select_chat_options(llm, user_kwargs={}, has_tools=True)
+    assert "cached_content" not in out
+
+
+def test_vertex_cached_content_suppressed_for_claude():
+    """Anthropic models likewise."""
+    llm = LLM(
+        model="claude-sonnet-4-5",
+        vertex_cached_content="cachedContents/should-be-ignored",
+    )
+    out = select_chat_options(llm, user_kwargs={}, has_tools=True)
+    assert "cached_content" not in out
+
+
+def test_vertex_cached_content_omitted_when_unset():
+    """Default ``None`` produces no kwarg even on a Gemini model."""
+    llm = LLM(model="vertex_ai/gemini-3-flash")
+    out = select_chat_options(llm, user_kwargs={}, has_tools=True)
+    assert "cached_content" not in out
+
+
+def test_vertex_cached_content_respects_user_kwarg_override():
+    """A caller-supplied ``cached_content`` wins over the LLM config field.
+
+    This matches the precedence we apply elsewhere (extra_headers, etc.).
+    """
+    llm = LLM(
+        model="vertex_ai/gemini-3-flash",
+        vertex_cached_content="cachedContents/from-config",
+    )
+    out = select_chat_options(
+        llm,
+        user_kwargs={"cached_content": "cachedContents/from-caller"},
+        has_tools=True,
+    )
+    assert out["cached_content"] == "cachedContents/from-caller"