Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from openhands.sdk.agent.acp_agent import ACPAgent
from openhands.sdk.agent.base import AgentBase
from openhands.sdk.context.condenser import CondenserBase, LLMSummarizingCondenser
from openhands.sdk.context.prompts.prompt import render_template
from openhands.sdk.conversation.base import BaseConversation
from openhands.sdk.conversation.cancellation import CancellationToken
Expand Down Expand Up @@ -749,6 +750,34 @@ def _pin_session_affinity_header(self, llm: LLM) -> None:
**existing,
}

def _condenser_for_switched_llm(
self,
current_llm: LLM,
new_llm: LLM,
) -> CondenserBase | None:
condenser = self.agent.condenser
if not isinstance(condenser, LLMSummarizingCondenser):
return condenser

current_config = current_llm.model_dump(
mode="json",
context={"expose_secrets": True},
exclude={"usage_id"},
)
condenser_config = condenser.llm.model_dump(
mode="json",
context={"expose_secrets": True},
exclude={"usage_id"},
)
if condenser_config != current_config:
return condenser

condenser_llm = new_llm.model_copy(
update={"usage_id": condenser.llm.usage_id},
)
condenser_llm.reset_metrics()
return condenser.model_copy(update={"llm": condenser_llm})

def switch_llm(self, llm: LLM) -> None:
"""Swap the agent's LLM to the given object.

Expand Down Expand Up @@ -776,7 +805,14 @@ def switch_llm(self, llm: LLM) -> None:
skip_lock = self._step_holds_state_lock and not self._state.owned()
lock = contextlib.nullcontext() if skip_lock else self._state
with lock:
self.agent = self.agent.model_copy(update={"llm": new_llm})
updates = {
"llm": new_llm,
"condenser": self._condenser_for_switched_llm(
self.agent.llm,
new_llm,
),
}
self.agent = self.agent.model_copy(update=updates)
self._state.agent = self.agent
self._pin_prompt_cache_key()
self._pin_session_affinity_header(new_llm)
Expand Down
24 changes: 16 additions & 8 deletions openhands-sdk/openhands/sdk/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,11 +700,19 @@ def reset_metrics(self) -> None:
When an LLM is copied (e.g., to create a condenser LLM from an agent LLM),
Pydantic's model_copy() does a shallow copy of private attributes by default,
causing the original and copied LLM to share the same Metrics object.
This method allows the registry to fix this by resetting metrics to None,
which will be lazily recreated when accessed.
This method allows the registry to fix this by creating fresh metrics and
telemetry immediately, so the copied LLM is ready for the next completion
call even if callers do not access ``metrics``/``telemetry`` first.
"""
self._metrics = None
self._telemetry = None
self._metrics = Metrics(model_name=self.model)
self._telemetry = Telemetry(
model_name=self.model,
log_enabled=self.log_completions,
log_dir=self.log_completions_folder if self.log_completions else None,
input_cost_per_token=self.input_cost_per_token,
output_cost_per_token=self.output_cost_per_token,
metrics=self._metrics,
)

def _handle_error(
self,
Expand Down Expand Up @@ -960,11 +968,11 @@ def _prepare_completion_params(
# 4) request context for telemetry (always include context_window for metrics)
# Always pass context_window so metrics are tracked even when
# logging is disabled.
assert self._telemetry is not None
telemetry = self.telemetry
telemetry_ctx: dict[str, Any] = {
"context_window": self.effective_max_input_tokens or 0
}
if self._telemetry.log_enabled:
if telemetry.log_enabled:
telemetry_ctx.update(
{
"messages": formatted_messages[:], # already simple dicts
Expand Down Expand Up @@ -1031,11 +1039,11 @@ def _prepare_responses_params(
# Request context for telemetry (always include context_window for metrics)
# Always pass context_window so metrics are tracked even when
# logging is disabled.
assert self._telemetry is not None
telemetry = self.telemetry
telemetry_ctx: dict[str, Any] = {
"context_window": self.effective_max_input_tokens or 0
}
if self._telemetry.log_enabled:
if telemetry.log_enabled:
telemetry_ctx.update(
{
"llm_path": "responses",
Expand Down
155 changes: 155 additions & 0 deletions tests/sdk/conversation/test_switch_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,31 @@
from openhands.sdk.agent import Agent
from openhands.sdk.agent.acp_agent import ACPAgent
from openhands.sdk.context.condenser import LLMSummarizingCondenser
from openhands.sdk.context.view import View
from openhands.sdk.conversation.persistence_const import BASE_STATE
from openhands.sdk.conversation.state import (
ConversationExecutionStatus,
ConversationState,
)
from openhands.sdk.event.llm_convertible import MessageEvent
from openhands.sdk.llm import Message, MessageToolCall, TextContent, llm_profile_store
from openhands.sdk.llm.llm_profile_store import LLMProfileStore
from openhands.sdk.testing import TestLLM
from openhands.sdk.utils.cipher import Cipher
from tests.conftest import create_mock_litellm_response


def _make_llm(model: str, usage_id: str) -> LLM:
return TestLLM.from_messages([], model=model, usage_id=usage_id)


def _message_event(content: str) -> MessageEvent:
return MessageEvent(
llm_message=Message(role="user", content=[TextContent(text=content)]),
source="user",
)


@pytest.fixture()
def profile_store(tmp_path, monkeypatch):
"""
Expand Down Expand Up @@ -286,6 +296,151 @@ def test_switch_llm_swaps_when_store_empty(empty_profile_store):
assert conv.agent.llm._prompt_cache_key == str(conv.id)


def test_switch_llm_refreshes_llm_condenser_credentials(
empty_profile_store, tmp_path, monkeypatch
):
"""A mid-session LLM switch must also refresh the default condenser LLM.

The condenser owns a separate copy of the agent LLM. If the agent LLM is
switched but that copy is left behind, normal turns can keep working while
the next condensation request still calls the old no-credential model.
"""
initial_llm = LLM(model="litellm_proxy/old-model", usage_id="default")
initial_condenser_llm = initial_llm.model_copy(update={"usage_id": "condenser"})
initial_condenser_llm.reset_metrics()
condenser = LLMSummarizingCondenser(
llm=initial_condenser_llm,
max_size=100,
keep_first=2,
)
conv = LocalConversation(
agent=Agent(llm=initial_llm, condenser=condenser, tools=[]),
workspace=tmp_path,
)
conv._ensure_agent_ready()

switched_llm = LLM(
model="litellm_proxy/new-model",
api_key=SecretStr("new-test-key"),
usage_id="profile:new",
)

conv.switch_llm(switched_llm)

assert conv.agent.llm.model == "litellm_proxy/new-model"
assert isinstance(conv.agent.condenser, LLMSummarizingCondenser)
assert isinstance(conv.state.agent.condenser, LLMSummarizingCondenser)

condenser_llm = conv.agent.condenser.llm
state_condenser_llm = conv.state.agent.condenser.llm
assert condenser_llm is not initial_condenser_llm
assert condenser_llm.model == "litellm_proxy/new-model"
assert condenser_llm.usage_id == "condenser"
assert isinstance(condenser_llm.api_key, SecretStr)
assert condenser_llm.api_key.get_secret_value() == "new-test-key"
assert state_condenser_llm.model == condenser_llm.model
assert state_condenser_llm.api_key == condenser_llm.api_key
assert condenser_llm.metrics is not conv.agent.llm.metrics
assert condenser_llm._telemetry is not None

async def _fake_acompletion(**kwargs):
return create_mock_litellm_response(
content="condensed summary",
model=kwargs["model"],
)

monkeypatch.setattr("openhands.sdk.llm.llm.litellm_acompletion", _fake_acompletion)

response = asyncio.run(
condenser_llm.acompletion(
[Message(role="user", content=[TextContent(text="summarize")])]
)
)

content = response.message.content[0]
assert isinstance(content, TextContent)
assert content.text == "condensed summary"


def test_switch_llm_condenser_can_generate_condensation(
empty_profile_store, tmp_path, monkeypatch
):
initial_llm = LLM(model="litellm_proxy/old-model", usage_id="default")
condenser = LLMSummarizingCondenser(
llm=initial_llm.model_copy(update={"usage_id": "condenser"}),
max_size=6,
keep_first=1,
)
conv = LocalConversation(
agent=Agent(llm=initial_llm, condenser=condenser, tools=[]),
workspace=tmp_path,
)
conv._ensure_agent_ready()

switched_llm = LLM(
model="litellm_proxy/new-model",
api_key=SecretStr("new-test-key"),
usage_id="profile:new",
)
conv.switch_llm(switched_llm)

def _fake_completion(**kwargs):
return create_mock_litellm_response(
content="condensed summary",
model=kwargs["model"],
)

monkeypatch.setattr("openhands.sdk.llm.llm.litellm_completion", _fake_completion)

assert isinstance(conv.agent.condenser, LLMSummarizingCondenser)
condensation = conv.agent.condenser.get_condensation(
View.from_events([_message_event(f"event {i}") for i in range(12)]),
agent_llm=conv.agent.llm,
)

assert condensation.summary == "condensed summary"
assert len(condensation.forgotten_event_ids) > 0


def test_switch_llm_preserves_independent_condenser_profile(
empty_profile_store, tmp_path
):
initial_llm = LLM(
model="litellm_proxy/agent-old",
api_key=SecretStr("agent-old-key"),
usage_id="default",
)
independent_condenser_llm = LLM(
model="litellm_proxy/condenser-profile",
api_key=SecretStr("condenser-key"),
usage_id="condenser",
)
condenser = LLMSummarizingCondenser(
llm=independent_condenser_llm,
max_size=100,
keep_first=2,
)
conv = LocalConversation(
agent=Agent(llm=initial_llm, condenser=condenser, tools=[]),
workspace=tmp_path,
)
conv._ensure_agent_ready()

conv.switch_llm(
LLM(
model="litellm_proxy/agent-new",
api_key=SecretStr("agent-new-key"),
usage_id="profile:new",
)
)

assert isinstance(conv.agent.condenser, LLMSummarizingCondenser)
condenser_llm = conv.agent.condenser.llm
assert condenser_llm.model == "litellm_proxy/condenser-profile"
assert isinstance(condenser_llm.api_key, SecretStr)
assert condenser_llm.api_key.get_secret_value() == "condenser-key"


def test_switch_llm_then_send_message(empty_profile_store):
"""send_message triggers _ensure_agent_ready, which re-registers agent
LLMs in the registry. switch_llm adds an entry under the caller's
Expand Down
2 changes: 2 additions & 0 deletions tests/sdk/llm/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,8 @@ def test_llm_reset_metrics():
llm.reset_metrics()

# Verify new metrics are created
assert llm._metrics is not None
assert llm._telemetry is not None
assert llm.metrics is not original_metrics
assert llm.telemetry is not original_telemetry
assert llm.metrics.accumulated_cost == 0.0
Expand Down
Loading