From e4604c4fc4ba6f9796427380e40e941baecea25a Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Wed, 3 Jun 2026 20:43:09 -0400 Subject: [PATCH 1/4] Refresh condenser LLM on model switch --- .../conversation/impl/local_conversation.py | 18 +++++++- tests/sdk/conversation/test_switch_model.py | 45 +++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 3315832206..f9f89e8552 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -9,6 +9,7 @@ from openhands.sdk.agent.acp_agent import ACPAgent from openhands.sdk.agent.base import AgentBase +from openhands.sdk.context.condenser import CondenserBase, LLMSummarizingCondenser from openhands.sdk.context.prompts.prompt import render_template from openhands.sdk.conversation.base import BaseConversation from openhands.sdk.conversation.cancellation import CancellationToken @@ -749,6 +750,17 @@ def _pin_session_affinity_header(self, llm: LLM) -> None: **existing, } + def _condenser_for_switched_llm(self, llm: LLM) -> CondenserBase | None: + condenser = self.agent.condenser + if not isinstance(condenser, LLMSummarizingCondenser): + return condenser + + condenser_llm = llm.model_copy( + update={"usage_id": condenser.llm.usage_id}, + ) + condenser_llm.reset_metrics() + return condenser.model_copy(update={"llm": condenser_llm}) + def switch_llm(self, llm: LLM) -> None: """Swap the agent's LLM to the given object. @@ -776,7 +788,11 @@ def switch_llm(self, llm: LLM) -> None: skip_lock = self._step_holds_state_lock and not self._state.owned() lock = contextlib.nullcontext() if skip_lock else self._state with lock: - self.agent = self.agent.model_copy(update={"llm": new_llm}) + updates = { + "llm": new_llm, + "condenser": self._condenser_for_switched_llm(new_llm), + } + self.agent = self.agent.model_copy(update=updates) self._state.agent = self.agent self._pin_prompt_cache_key() self._pin_session_affinity_header(new_llm) diff --git a/tests/sdk/conversation/test_switch_model.py b/tests/sdk/conversation/test_switch_model.py index c1ffbc1303..84b85ffee4 100644 --- a/tests/sdk/conversation/test_switch_model.py +++ b/tests/sdk/conversation/test_switch_model.py @@ -286,6 +286,51 @@ def test_switch_llm_swaps_when_store_empty(empty_profile_store): assert conv.agent.llm._prompt_cache_key == str(conv.id) +def test_switch_llm_refreshes_llm_condenser_credentials(empty_profile_store, tmp_path): + """A mid-session LLM switch must also refresh the default condenser LLM. + + The condenser owns a separate copy of the agent LLM. If the agent LLM is + switched but that copy is left behind, normal turns can keep working while + the next condensation request still calls the old no-credential model. + """ + initial_llm = LLM(model="litellm_proxy/old-model", usage_id="default") + initial_condenser_llm = initial_llm.model_copy(update={"usage_id": "condenser"}) + initial_condenser_llm.reset_metrics() + condenser = LLMSummarizingCondenser( + llm=initial_condenser_llm, + max_size=100, + keep_first=2, + ) + conv = LocalConversation( + agent=Agent(llm=initial_llm, condenser=condenser, tools=[]), + workspace=tmp_path, + ) + conv._ensure_agent_ready() + + switched_llm = LLM( + model="litellm_proxy/new-model", + api_key=SecretStr("new-test-key"), + usage_id="profile:new", + ) + + conv.switch_llm(switched_llm) + + assert conv.agent.llm.model == "litellm_proxy/new-model" + assert isinstance(conv.agent.condenser, LLMSummarizingCondenser) + assert isinstance(conv.state.agent.condenser, LLMSummarizingCondenser) + + condenser_llm = conv.agent.condenser.llm + state_condenser_llm = conv.state.agent.condenser.llm + assert condenser_llm is not initial_condenser_llm + assert condenser_llm.model == "litellm_proxy/new-model" + assert condenser_llm.usage_id == "condenser" + assert isinstance(condenser_llm.api_key, SecretStr) + assert condenser_llm.api_key.get_secret_value() == "new-test-key" + assert state_condenser_llm.model == condenser_llm.model + assert state_condenser_llm.api_key == condenser_llm.api_key + assert condenser_llm.metrics is not conv.agent.llm.metrics + + def test_switch_llm_then_send_message(empty_profile_store): """send_message triggers _ensure_agent_ready, which re-registers agent LLMs in the registry. switch_llm adds an entry under the caller's From 6bded718046bf37e63e16de44d2f85150b8b302d Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Wed, 3 Jun 2026 22:13:26 -0400 Subject: [PATCH 2/4] Initialize reset LLM telemetry --- openhands-sdk/openhands/sdk/llm/llm.py | 24 ++++++++++++++------- tests/sdk/conversation/test_switch_model.py | 24 ++++++++++++++++++++- tests/sdk/llm/test_llm.py | 2 ++ 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py index d3060a6a10..0225cc6d42 100644 --- a/openhands-sdk/openhands/sdk/llm/llm.py +++ b/openhands-sdk/openhands/sdk/llm/llm.py @@ -700,11 +700,19 @@ def reset_metrics(self) -> None: When an LLM is copied (e.g., to create a condenser LLM from an agent LLM), Pydantic's model_copy() does a shallow copy of private attributes by default, causing the original and copied LLM to share the same Metrics object. - This method allows the registry to fix this by resetting metrics to None, - which will be lazily recreated when accessed. + This method allows the registry to fix this by creating fresh metrics and + telemetry immediately, so the copied LLM is ready for the next completion + call even if callers do not access ``metrics``/``telemetry`` first. """ - self._metrics = None - self._telemetry = None + self._metrics = Metrics(model_name=self.model) + self._telemetry = Telemetry( + model_name=self.model, + log_enabled=self.log_completions, + log_dir=self.log_completions_folder if self.log_completions else None, + input_cost_per_token=self.input_cost_per_token, + output_cost_per_token=self.output_cost_per_token, + metrics=self._metrics, + ) def _handle_error( self, @@ -960,11 +968,11 @@ def _prepare_completion_params( # 4) request context for telemetry (always include context_window for metrics) # Always pass context_window so metrics are tracked even when # logging is disabled. - assert self._telemetry is not None + telemetry = self.telemetry telemetry_ctx: dict[str, Any] = { "context_window": self.effective_max_input_tokens or 0 } - if self._telemetry.log_enabled: + if telemetry.log_enabled: telemetry_ctx.update( { "messages": formatted_messages[:], # already simple dicts @@ -1031,11 +1039,11 @@ def _prepare_responses_params( # Request context for telemetry (always include context_window for metrics) # Always pass context_window so metrics are tracked even when # logging is disabled. - assert self._telemetry is not None + telemetry = self.telemetry telemetry_ctx: dict[str, Any] = { "context_window": self.effective_max_input_tokens or 0 } - if self._telemetry.log_enabled: + if telemetry.log_enabled: telemetry_ctx.update( { "llm_path": "responses", diff --git a/tests/sdk/conversation/test_switch_model.py b/tests/sdk/conversation/test_switch_model.py index 84b85ffee4..acca934388 100644 --- a/tests/sdk/conversation/test_switch_model.py +++ b/tests/sdk/conversation/test_switch_model.py @@ -19,6 +19,7 @@ from openhands.sdk.llm.llm_profile_store import LLMProfileStore from openhands.sdk.testing import TestLLM from openhands.sdk.utils.cipher import Cipher +from tests.conftest import create_mock_litellm_response def _make_llm(model: str, usage_id: str) -> LLM: @@ -286,7 +287,9 @@ def test_switch_llm_swaps_when_store_empty(empty_profile_store): assert conv.agent.llm._prompt_cache_key == str(conv.id) -def test_switch_llm_refreshes_llm_condenser_credentials(empty_profile_store, tmp_path): +def test_switch_llm_refreshes_llm_condenser_credentials( + empty_profile_store, tmp_path, monkeypatch +): """A mid-session LLM switch must also refresh the default condenser LLM. The condenser owns a separate copy of the agent LLM. If the agent LLM is @@ -329,6 +332,25 @@ def test_switch_llm_refreshes_llm_condenser_credentials(empty_profile_store, tmp assert state_condenser_llm.model == condenser_llm.model assert state_condenser_llm.api_key == condenser_llm.api_key assert condenser_llm.metrics is not conv.agent.llm.metrics + assert condenser_llm._telemetry is not None + + async def _fake_acompletion(**kwargs): + return create_mock_litellm_response( + content="condensed summary", + model=kwargs["model"], + ) + + monkeypatch.setattr("openhands.sdk.llm.llm.litellm_acompletion", _fake_acompletion) + + response = asyncio.run( + condenser_llm.acompletion( + [Message(role="user", content=[TextContent(text="summarize")])] + ) + ) + + content = response.message.content[0] + assert isinstance(content, TextContent) + assert content.text == "condensed summary" def test_switch_llm_then_send_message(empty_profile_store): diff --git a/tests/sdk/llm/test_llm.py b/tests/sdk/llm/test_llm.py index c24715542c..32733ca864 100644 --- a/tests/sdk/llm/test_llm.py +++ b/tests/sdk/llm/test_llm.py @@ -1125,6 +1125,8 @@ def test_llm_reset_metrics(): llm.reset_metrics() # Verify new metrics are created + assert llm._metrics is not None + assert llm._telemetry is not None assert llm.metrics is not original_metrics assert llm.telemetry is not original_telemetry assert llm.metrics.accumulated_cost == 0.0 From 845dfd5e50c7376fc4fced33291598a0633b4256 Mon Sep 17 00:00:00 2001 From: Graham Neubig <398875+neubig@users.noreply.github.com> Date: Tue, 9 Jun 2026 17:46:07 -0400 Subject: [PATCH 3/4] test: cover condenser generation after LLM switch --- tests/sdk/conversation/test_switch_model.py | 49 +++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/sdk/conversation/test_switch_model.py b/tests/sdk/conversation/test_switch_model.py index acca934388..be510ddc59 100644 --- a/tests/sdk/conversation/test_switch_model.py +++ b/tests/sdk/conversation/test_switch_model.py @@ -10,11 +10,13 @@ from openhands.sdk.agent import Agent from openhands.sdk.agent.acp_agent import ACPAgent from openhands.sdk.context.condenser import LLMSummarizingCondenser +from openhands.sdk.context.view import View from openhands.sdk.conversation.persistence_const import BASE_STATE from openhands.sdk.conversation.state import ( ConversationExecutionStatus, ConversationState, ) +from openhands.sdk.event.llm_convertible import MessageEvent from openhands.sdk.llm import Message, MessageToolCall, TextContent, llm_profile_store from openhands.sdk.llm.llm_profile_store import LLMProfileStore from openhands.sdk.testing import TestLLM @@ -26,6 +28,13 @@ def _make_llm(model: str, usage_id: str) -> LLM: return TestLLM.from_messages([], model=model, usage_id=usage_id) +def _message_event(content: str) -> MessageEvent: + return MessageEvent( + llm_message=Message(role="user", content=[TextContent(text=content)]), + source="user", + ) + + @pytest.fixture() def profile_store(tmp_path, monkeypatch): """ @@ -353,6 +362,46 @@ async def _fake_acompletion(**kwargs): assert content.text == "condensed summary" +def test_switch_llm_condenser_can_generate_condensation( + empty_profile_store, tmp_path, monkeypatch +): + initial_llm = LLM(model="litellm_proxy/old-model", usage_id="default") + condenser = LLMSummarizingCondenser( + llm=initial_llm.model_copy(update={"usage_id": "condenser"}), + max_size=6, + keep_first=1, + ) + conv = LocalConversation( + agent=Agent(llm=initial_llm, condenser=condenser, tools=[]), + workspace=tmp_path, + ) + conv._ensure_agent_ready() + + switched_llm = LLM( + model="litellm_proxy/new-model", + api_key=SecretStr("new-test-key"), + usage_id="profile:new", + ) + conv.switch_llm(switched_llm) + + def _fake_completion(**kwargs): + return create_mock_litellm_response( + content="condensed summary", + model=kwargs["model"], + ) + + monkeypatch.setattr("openhands.sdk.llm.llm.litellm_completion", _fake_completion) + + assert isinstance(conv.agent.condenser, LLMSummarizingCondenser) + condensation = conv.agent.condenser.get_condensation( + View.from_events([_message_event(f"event {i}") for i in range(12)]), + agent_llm=conv.agent.llm, + ) + + assert condensation.summary == "condensed summary" + assert len(condensation.forgotten_event_ids) > 0 + + def test_switch_llm_then_send_message(empty_profile_store): """send_message triggers _ensure_agent_ready, which re-registers agent LLMs in the registry. switch_llm adds an entry under the caller's From 622d415d9b3807f7b6282cc836e2aca413877a1e Mon Sep 17 00:00:00 2001 From: Graham Neubig <398875+neubig@users.noreply.github.com> Date: Tue, 9 Jun 2026 18:01:22 -0400 Subject: [PATCH 4/4] fix: preserve independent condenser profiles --- .../conversation/impl/local_conversation.py | 26 +++++++++++-- tests/sdk/conversation/test_switch_model.py | 39 +++++++++++++++++++ 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index f9f89e8552..57cfb9359b 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -750,12 +750,29 @@ def _pin_session_affinity_header(self, llm: LLM) -> None: **existing, } - def _condenser_for_switched_llm(self, llm: LLM) -> CondenserBase | None: + def _condenser_for_switched_llm( + self, + current_llm: LLM, + new_llm: LLM, + ) -> CondenserBase | None: condenser = self.agent.condenser if not isinstance(condenser, LLMSummarizingCondenser): return condenser - condenser_llm = llm.model_copy( + current_config = current_llm.model_dump( + mode="json", + context={"expose_secrets": True}, + exclude={"usage_id"}, + ) + condenser_config = condenser.llm.model_dump( + mode="json", + context={"expose_secrets": True}, + exclude={"usage_id"}, + ) + if condenser_config != current_config: + return condenser + + condenser_llm = new_llm.model_copy( update={"usage_id": condenser.llm.usage_id}, ) condenser_llm.reset_metrics() @@ -790,7 +807,10 @@ def switch_llm(self, llm: LLM) -> None: with lock: updates = { "llm": new_llm, - "condenser": self._condenser_for_switched_llm(new_llm), + "condenser": self._condenser_for_switched_llm( + self.agent.llm, + new_llm, + ), } self.agent = self.agent.model_copy(update=updates) self._state.agent = self.agent diff --git a/tests/sdk/conversation/test_switch_model.py b/tests/sdk/conversation/test_switch_model.py index be510ddc59..f9055cee52 100644 --- a/tests/sdk/conversation/test_switch_model.py +++ b/tests/sdk/conversation/test_switch_model.py @@ -402,6 +402,45 @@ def _fake_completion(**kwargs): assert len(condensation.forgotten_event_ids) > 0 +def test_switch_llm_preserves_independent_condenser_profile( + empty_profile_store, tmp_path +): + initial_llm = LLM( + model="litellm_proxy/agent-old", + api_key=SecretStr("agent-old-key"), + usage_id="default", + ) + independent_condenser_llm = LLM( + model="litellm_proxy/condenser-profile", + api_key=SecretStr("condenser-key"), + usage_id="condenser", + ) + condenser = LLMSummarizingCondenser( + llm=independent_condenser_llm, + max_size=100, + keep_first=2, + ) + conv = LocalConversation( + agent=Agent(llm=initial_llm, condenser=condenser, tools=[]), + workspace=tmp_path, + ) + conv._ensure_agent_ready() + + conv.switch_llm( + LLM( + model="litellm_proxy/agent-new", + api_key=SecretStr("agent-new-key"), + usage_id="profile:new", + ) + ) + + assert isinstance(conv.agent.condenser, LLMSummarizingCondenser) + condenser_llm = conv.agent.condenser.llm + assert condenser_llm.model == "litellm_proxy/condenser-profile" + assert isinstance(condenser_llm.api_key, SecretStr) + assert condenser_llm.api_key.get_secret_value() == "condenser-key" + + def test_switch_llm_then_send_message(empty_profile_store): """send_message triggers _ensure_agent_ready, which re-registers agent LLMs in the registry. switch_llm adds an entry under the caller's