Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions openhands-sdk/openhands/sdk/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,25 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
),
)

vertex_cached_content: str | None = Field(
default=None,
description=(
"Reference an existing Vertex AI ``CachedContent`` resource to use as "
"the cache prefix for every request. Pass the full resource name, e.g. "
'``"cachedContents/1234567890"`` returned by '
"``CachedContent.create``. The SDK threads it through to LiteLLM, "
"which forwards it to the Vertex Gemini ``generateContent`` API as "
"``cachedContent``. This bypasses the inline ``cache_control`` marker "
"path (which only works for ``vertex_ai/`` direct, not via "
"``litellm_proxy/``) and gives deterministic, explicit caching for "
"long-running agent runs whose system + tool prefix exceeds Vertex's "
"minimum cache size. The caller is responsible for creating, "
"refreshing the TTL, and deleting the cache resource — see "
"https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache. "
"Ignored for non-Vertex / non-Gemini providers."
),
)

fallback_strategy: FallbackStrategy | None = Field(
default=None,
description=(
Expand Down
26 changes: 26 additions & 0 deletions openhands-sdk/openhands/sdk/llm/options/chat_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,19 @@
from openhands.sdk.llm.utils.model_features import get_features


def _model_supports_vertex_cached_content(model: str) -> bool:
"""Return True iff sending ``cached_content`` to this model is safe.

LiteLLM forwards ``cached_content`` as a top-level kwarg through to the
Vertex Gemini ``generateContent`` API. Other providers (OpenAI, Anthropic,
etc.) will reject unknown kwargs, so we gate emission to model names that
look Gemini-flavoured regardless of which LiteLLM provider prefix routes
them: ``vertex_ai/``, ``gemini/``, ``litellm_proxy/gemini-*`` (assuming
the proxy forwards to a Vertex backend), and bare ``gemini-*``.
"""
return "gemini" in (model or "").lower()


def select_chat_options(
llm, user_kwargs: dict[str, Any], has_tools: bool
) -> dict[str, Any]:
Expand Down Expand Up @@ -99,4 +112,17 @@ def select_chat_options(
if llm._prompt_cache_key:
out["prompt_cache_key"] = llm._prompt_cache_key

# Vertex AI explicit context cache: user pre-creates a CachedContent
# resource (see https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache)
# and references it by name. LiteLLM forwards the kwarg to the Vertex
# ``generateContent`` API as ``cachedContent``. We only emit when the model
# is Gemini-flavoured so unknown-kwarg-rejecting providers (OpenAI, etc.)
# are left untouched. User-supplied kwargs take precedence.
if (
llm.vertex_cached_content
and "cached_content" not in out
and _model_supports_vertex_cached_content(llm.model)
):
out["cached_content"] = llm.vertex_cached_content

return out
89 changes: 89 additions & 0 deletions tests/sdk/llm/test_chat_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class DummyLLM:
_prompt_cache_key: str | None = None
openrouter_site_url: str = ""
openrouter_app_name: str = ""
vertex_cached_content: str | None = None

def _openrouter_headers(self) -> dict[str, str]:
headers: dict[str, str] = {}
Expand Down Expand Up @@ -243,3 +244,91 @@ def test_chat_options_omits_openrouter_headers_when_unset():
llm = DummyLLM(model="gpt-4o")
out = select_chat_options(llm, user_kwargs={}, has_tools=False)
assert "extra_headers" not in out


# ---------------------------------------------------------------------------
# vertex_cached_content
# ---------------------------------------------------------------------------


def test_vertex_cached_content_emitted_for_vertex_gemini():
"""``vertex_cached_content`` is forwarded as ``cached_content`` for Gemini."""
llm = LLM(
model="vertex_ai/gemini-3-flash",
vertex_cached_content="cachedContents/1234567890",
)
out = select_chat_options(llm, user_kwargs={}, has_tools=True)
assert out["cached_content"] == "cachedContents/1234567890"


def test_vertex_cached_content_emitted_for_gemini_provider():
"""Bare ``gemini/`` provider also gets the kwarg."""
llm = LLM(
model="gemini/gemini-3-flash",
vertex_cached_content="cachedContents/abc",
)
out = select_chat_options(llm, user_kwargs={}, has_tools=True)
assert out["cached_content"] == "cachedContents/abc"


def test_vertex_cached_content_emitted_for_litellm_proxy_gemini():
"""Proxy-routed Gemini models receive the kwarg too.

Whether the proxy actually forwards it to Vertex is a proxy-side concern,
but the SDK must pass it through so a well-configured proxy can act on it.
"""
llm = LLM(
model="litellm_proxy/gemini-3.5-flash",
vertex_cached_content="cachedContents/xyz",
)
out = select_chat_options(llm, user_kwargs={}, has_tools=True)
assert out["cached_content"] == "cachedContents/xyz"


def test_vertex_cached_content_suppressed_for_openai():
"""Non-Gemini providers (OpenAI, Anthropic, …) must not see the kwarg.

They reject unknown kwargs, so emitting ``cached_content`` would break the
call. The user setting the field on a non-Gemini LLM is a misconfiguration
we silently tolerate rather than escalate to an error.
"""
llm = LLM(
model="gpt-5-mini",
vertex_cached_content="cachedContents/should-be-ignored",
)
out = select_chat_options(llm, user_kwargs={}, has_tools=True)
assert "cached_content" not in out


def test_vertex_cached_content_suppressed_for_claude():
"""Anthropic models likewise."""
llm = LLM(
model="claude-sonnet-4-5",
vertex_cached_content="cachedContents/should-be-ignored",
)
out = select_chat_options(llm, user_kwargs={}, has_tools=True)
assert "cached_content" not in out


def test_vertex_cached_content_omitted_when_unset():
"""Default ``None`` produces no kwarg even on a Gemini model."""
llm = LLM(model="vertex_ai/gemini-3-flash")
out = select_chat_options(llm, user_kwargs={}, has_tools=True)
assert "cached_content" not in out


def test_vertex_cached_content_respects_user_kwarg_override():
"""A caller-supplied ``cached_content`` wins over the LLM config field.

This matches the precedence we apply elsewhere (extra_headers, etc.).
"""
llm = LLM(
model="vertex_ai/gemini-3-flash",
vertex_cached_content="cachedContents/from-config",
)
out = select_chat_options(
llm,
user_kwargs={"cached_content": "cachedContents/from-caller"},
has_tools=True,
)
assert out["cached_content"] == "cachedContents/from-caller"
Loading