From bcbecd70e5dc20e7ed81c70d9be2d936c2464a92 Mon Sep 17 00:00:00 2001 From: RheagalFire Date: Tue, 19 May 2026 20:55:33 +0530 Subject: [PATCH 1/7] feat: add LiteLLM as chat model provider for 100+ LLM backends --- src/xagent/core/model/chat/basic/__init__.py | 2 + src/xagent/core/model/chat/basic/adapter.py | 11 + src/xagent/core/model/chat/basic/litellm.py | 200 +++++++++++++++++++ 3 files changed, 213 insertions(+) create mode 100644 src/xagent/core/model/chat/basic/litellm.py diff --git a/src/xagent/core/model/chat/basic/__init__.py b/src/xagent/core/model/chat/basic/__init__.py index d3eef90e1..681c0a65f 100644 --- a/src/xagent/core/model/chat/basic/__init__.py +++ b/src/xagent/core/model/chat/basic/__init__.py @@ -4,6 +4,7 @@ from .claude import ClaudeLLM from .deepseek import DeepSeekLLM from .gemini import GeminiLLM +from .litellm import LiteLLMLLM from .openai import OpenAILLM from .zhipu import ZhipuLLM @@ -15,5 +16,6 @@ "ZhipuLLM", "GeminiLLM", "ClaudeLLM", + "LiteLLMLLM", "create_base_llm", ] diff --git a/src/xagent/core/model/chat/basic/adapter.py b/src/xagent/core/model/chat/basic/adapter.py index 35f68e985..7dc673a17 100644 --- a/src/xagent/core/model/chat/basic/adapter.py +++ b/src/xagent/core/model/chat/basic/adapter.py @@ -10,6 +10,7 @@ from .deepseek import DeepSeekLLM from .gemini import GeminiLLM from .openai import OpenAILLM +from .litellm import LiteLLMLLM from .xinference import XinferenceLLM from .zhipu import ZhipuLLM @@ -86,6 +87,16 @@ def create_base_llm(model: ModelConfig) -> BaseLLM: timeout=model.timeout, abilities=model.abilities, ) + elif provider == "litellm": + llm = LiteLLMLLM( + model_name=model.model_name, + api_key=model.api_key, + api_base=model.base_url, + default_temperature=model.default_temperature, + default_max_tokens=model.default_max_tokens, + timeout=model.timeout, + abilities=model.abilities, + ) elif provider == "xinference": llm = XinferenceLLM( model_name=model.model_name, diff --git a/src/xagent/core/model/chat/basic/litellm.py b/src/xagent/core/model/chat/basic/litellm.py new file mode 100644 index 000000000..beb412d4b --- /dev/null +++ b/src/xagent/core/model/chat/basic/litellm.py @@ -0,0 +1,200 @@ +import logging +from typing import Any, AsyncIterator, Dict, List, Optional, Union + +from ..exceptions import LLMRetryableError, LLMTimeoutError +from ..timeout_config import TimeoutConfig +from ..token_context import add_token_usage +from ..types import ChunkType, StreamChunk +from .base import BaseLLM + +logger = logging.getLogger(__name__) + + +class LiteLLMLLM(BaseLLM): + """ + LiteLLM client providing access to 100+ LLM providers through a unified interface. + Uses provider-prefixed model names (e.g. openai/gpt-4o, anthropic/claude-sonnet-4-6). + """ + + def __init__( + self, + model_name: str = "openai/gpt-4o-mini", + api_key: Optional[str] = None, + api_base: Optional[str] = None, + default_temperature: Optional[float] = None, + default_max_tokens: Optional[int] = None, + timeout: float = 180.0, + abilities: Optional[List[str]] = None, + timeout_config: Optional[TimeoutConfig] = None, + ): + self._model_name = model_name + self._api_key = api_key + self._api_base = api_base + self.default_temperature = default_temperature + self.default_max_tokens = default_max_tokens + self.timeout = timeout + self.timeout_config = timeout_config or TimeoutConfig() + + if abilities: + self._abilities = abilities + else: + self._abilities = ["chat", "tool_calling"] + + @property + def model_name(self) -> str: + return self._model_name + + @property + def abilities(self) -> List[str]: + return self._abilities + + @property + def supports_thinking_mode(self) -> bool: + return False + + async def chat( + self, + messages: List[Dict[str, str]], + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + tools: Optional[List[Dict[str, Any]]] = None, + tool_choice: Optional[Union[str, Dict[str, Any]]] = None, + response_format: Optional[Dict[str, Any]] = None, + thinking: Optional[Dict[str, Any]] = None, + output_config: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> Any: + """Perform a chat completion via LiteLLM.""" + import litellm + + completion_params: Dict[str, Any] = { + "model": self._model_name, + "messages": self._sanitize_unicode_content(messages), + "drop_params": True, + "timeout": self.timeout, + **kwargs, + } + + if max_tokens is not None: + completion_params["max_tokens"] = max_tokens + elif self.default_max_tokens is not None: + completion_params["max_tokens"] = self.default_max_tokens + + if temperature is not None: + completion_params["temperature"] = temperature + elif self.default_temperature is not None: + completion_params["temperature"] = self.default_temperature + + if tools: + completion_params["tools"] = tools + if tool_choice: + completion_params["tool_choice"] = tool_choice + if response_format: + completion_params["response_format"] = response_format + + if self._api_key: + completion_params["api_key"] = self._api_key + if self._api_base: + completion_params["api_base"] = self._api_base + + try: + response = await litellm.acompletion(**completion_params) + except litellm.Timeout as e: + raise LLMTimeoutError(str(e)) from e + except ( + litellm.RateLimitError, + litellm.APIConnectionError, + litellm.ServiceUnavailableError, + litellm.InternalServerError, + ) as e: + raise LLMRetryableError(str(e)) from e + + choice = response.choices[0] + message = choice.message + + if hasattr(response, "usage") and response.usage: + add_token_usage( + input_tokens=getattr(response.usage, "prompt_tokens", 0) or 0, + output_tokens=getattr(response.usage, "completion_tokens", 0) or 0, + ) + + if hasattr(message, "tool_calls") and message.tool_calls: + tool_calls = [] + for tc in message.tool_calls: + tool_calls.append( + { + "id": tc.id, + "type": "function", + "function": { + "name": tc.function.name, + "arguments": tc.function.arguments, + }, + } + ) + return {"type": "tool_call", "tool_calls": tool_calls} + + return message.content or "" + + async def stream_chat( + self, + messages: List[Dict[str, str]], + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + tools: Optional[List[Dict[str, Any]]] = None, + tool_choice: Optional[Union[str, Dict[str, Any]]] = None, + response_format: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> AsyncIterator[StreamChunk]: + """Stream a chat completion via LiteLLM.""" + import litellm + + completion_params: Dict[str, Any] = { + "model": self._model_name, + "messages": self._sanitize_unicode_content(messages), + "stream": True, + "drop_params": True, + "timeout": self.timeout, + **kwargs, + } + + if max_tokens is not None: + completion_params["max_tokens"] = max_tokens + elif self.default_max_tokens is not None: + completion_params["max_tokens"] = self.default_max_tokens + + if temperature is not None: + completion_params["temperature"] = temperature + elif self.default_temperature is not None: + completion_params["temperature"] = self.default_temperature + + if tools: + completion_params["tools"] = tools + if tool_choice: + completion_params["tool_choice"] = tool_choice + if response_format: + completion_params["response_format"] = response_format + + if self._api_key: + completion_params["api_key"] = self._api_key + if self._api_base: + completion_params["api_base"] = self._api_base + + try: + response = await litellm.acompletion(**completion_params) + except litellm.Timeout as e: + raise LLMTimeoutError(str(e)) from e + except ( + litellm.RateLimitError, + litellm.APIConnectionError, + litellm.ServiceUnavailableError, + litellm.InternalServerError, + ) as e: + raise LLMRetryableError(str(e)) from e + + async for chunk in response: + delta = chunk.choices[0].delta if chunk.choices else None + if delta is None: + continue + content = getattr(delta, "content", None) + if content: + yield StreamChunk(type=ChunkType.TEXT, content=content) From 6c109d9bfa7995a953fa83c92dc1c71aa2b127cd Mon Sep 17 00:00:00 2001 From: RheagalFire Date: Tue, 19 May 2026 20:57:03 +0530 Subject: [PATCH 2/7] fix: rename LiteLLMLLM to LiteLLMChat --- src/xagent/core/model/chat/basic/__init__.py | 4 ++-- src/xagent/core/model/chat/basic/adapter.py | 4 ++-- src/xagent/core/model/chat/basic/litellm.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/xagent/core/model/chat/basic/__init__.py b/src/xagent/core/model/chat/basic/__init__.py index 681c0a65f..5b6158e0b 100644 --- a/src/xagent/core/model/chat/basic/__init__.py +++ b/src/xagent/core/model/chat/basic/__init__.py @@ -4,7 +4,7 @@ from .claude import ClaudeLLM from .deepseek import DeepSeekLLM from .gemini import GeminiLLM -from .litellm import LiteLLMLLM +from .litellm import LiteLLMChat from .openai import OpenAILLM from .zhipu import ZhipuLLM @@ -16,6 +16,6 @@ "ZhipuLLM", "GeminiLLM", "ClaudeLLM", - "LiteLLMLLM", + "LiteLLMChat", "create_base_llm", ] diff --git a/src/xagent/core/model/chat/basic/adapter.py b/src/xagent/core/model/chat/basic/adapter.py index 7dc673a17..7ce0fcf53 100644 --- a/src/xagent/core/model/chat/basic/adapter.py +++ b/src/xagent/core/model/chat/basic/adapter.py @@ -10,7 +10,7 @@ from .deepseek import DeepSeekLLM from .gemini import GeminiLLM from .openai import OpenAILLM -from .litellm import LiteLLMLLM +from .litellm import LiteLLMChat from .xinference import XinferenceLLM from .zhipu import ZhipuLLM @@ -88,7 +88,7 @@ def create_base_llm(model: ModelConfig) -> BaseLLM: abilities=model.abilities, ) elif provider == "litellm": - llm = LiteLLMLLM( + llm = LiteLLMChat( model_name=model.model_name, api_key=model.api_key, api_base=model.base_url, diff --git a/src/xagent/core/model/chat/basic/litellm.py b/src/xagent/core/model/chat/basic/litellm.py index beb412d4b..96e32ca27 100644 --- a/src/xagent/core/model/chat/basic/litellm.py +++ b/src/xagent/core/model/chat/basic/litellm.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) -class LiteLLMLLM(BaseLLM): +class LiteLLMChat(BaseLLM): """ LiteLLM client providing access to 100+ LLM providers through a unified interface. Uses provider-prefixed model names (e.g. openai/gpt-4o, anthropic/claude-sonnet-4-6). From cc9ca25832066f1ec16acafd8c4a199691e8a98f Mon Sep 17 00:00:00 2001 From: RheagalFire Date: Tue, 19 May 2026 20:58:24 +0530 Subject: [PATCH 3/7] fix: rename class to LiteLLM --- src/xagent/core/model/chat/basic/__init__.py | 4 ++-- src/xagent/core/model/chat/basic/adapter.py | 4 ++-- src/xagent/core/model/chat/basic/litellm.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/xagent/core/model/chat/basic/__init__.py b/src/xagent/core/model/chat/basic/__init__.py index 5b6158e0b..3f86b3e85 100644 --- a/src/xagent/core/model/chat/basic/__init__.py +++ b/src/xagent/core/model/chat/basic/__init__.py @@ -4,7 +4,7 @@ from .claude import ClaudeLLM from .deepseek import DeepSeekLLM from .gemini import GeminiLLM -from .litellm import LiteLLMChat +from .litellm import LiteLLM from .openai import OpenAILLM from .zhipu import ZhipuLLM @@ -16,6 +16,6 @@ "ZhipuLLM", "GeminiLLM", "ClaudeLLM", - "LiteLLMChat", + "LiteLLM", "create_base_llm", ] diff --git a/src/xagent/core/model/chat/basic/adapter.py b/src/xagent/core/model/chat/basic/adapter.py index 7ce0fcf53..4bd1b0d23 100644 --- a/src/xagent/core/model/chat/basic/adapter.py +++ b/src/xagent/core/model/chat/basic/adapter.py @@ -10,7 +10,7 @@ from .deepseek import DeepSeekLLM from .gemini import GeminiLLM from .openai import OpenAILLM -from .litellm import LiteLLMChat +from .litellm import LiteLLM from .xinference import XinferenceLLM from .zhipu import ZhipuLLM @@ -88,7 +88,7 @@ def create_base_llm(model: ModelConfig) -> BaseLLM: abilities=model.abilities, ) elif provider == "litellm": - llm = LiteLLMChat( + llm = LiteLLM( model_name=model.model_name, api_key=model.api_key, api_base=model.base_url, diff --git a/src/xagent/core/model/chat/basic/litellm.py b/src/xagent/core/model/chat/basic/litellm.py index 96e32ca27..d726206a5 100644 --- a/src/xagent/core/model/chat/basic/litellm.py +++ b/src/xagent/core/model/chat/basic/litellm.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) -class LiteLLMChat(BaseLLM): +class LiteLLM(BaseLLM): """ LiteLLM client providing access to 100+ LLM providers through a unified interface. Uses provider-prefixed model names (e.g. openai/gpt-4o, anthropic/claude-sonnet-4-6). From 4455ca0265e3f18844bffdc259f87ff1f88d4e85 Mon Sep 17 00:00:00 2001 From: RheagalFire Date: Tue, 19 May 2026 21:11:29 +0530 Subject: [PATCH 4/7] test: add 20 unit tests for LiteLLM provider --- tests/core/model/chat/basic/test_litellm.py | 192 ++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 tests/core/model/chat/basic/test_litellm.py diff --git a/tests/core/model/chat/basic/test_litellm.py b/tests/core/model/chat/basic/test_litellm.py new file mode 100644 index 000000000..dd951eb74 --- /dev/null +++ b/tests/core/model/chat/basic/test_litellm.py @@ -0,0 +1,192 @@ +"""Test cases for LiteLLM chat model implementation.""" + +import pytest +from unittest.mock import AsyncMock, MagicMock, patch + +from xagent.core.model.chat.basic.litellm import LiteLLM +from xagent.core.model.chat.exceptions import LLMRetryableError, LLMTimeoutError + + +def _mock_response(content="Hello", prompt_tokens=10, completion_tokens=5): + usage = MagicMock() + usage.prompt_tokens = prompt_tokens + usage.completion_tokens = completion_tokens + choice = MagicMock() + choice.message.content = content + choice.message.tool_calls = None + resp = MagicMock() + resp.choices = [choice] + resp.usage = usage + return resp + + +def _mock_tool_response(name="get_weather", arguments='{"city": "Paris"}'): + tc = MagicMock() + tc.id = "call_123" + tc.function.name = name + tc.function.arguments = arguments + choice = MagicMock() + choice.message.content = None + choice.message.tool_calls = [tc] + resp = MagicMock() + resp.choices = [choice] + resp.usage = MagicMock(prompt_tokens=20, completion_tokens=10) + return resp + + +class TestLiteLLMInit: + def test_default_model(self): + llm = LiteLLM() + assert llm.model_name == "openai/gpt-4o-mini" + + def test_custom_model(self): + llm = LiteLLM(model_name="anthropic/claude-sonnet-4-6") + assert llm.model_name == "anthropic/claude-sonnet-4-6" + + def test_abilities_default(self): + llm = LiteLLM() + assert "chat" in llm.abilities + assert "tool_calling" in llm.abilities + + def test_abilities_custom(self): + llm = LiteLLM(abilities=["chat", "vision"]) + assert llm.abilities == ["chat", "vision"] + + def test_api_key_stored(self): + llm = LiteLLM(api_key="sk-test") + assert llm._api_key == "sk-test" + + def test_api_base_stored(self): + llm = LiteLLM(api_base="http://localhost:4000") + assert llm._api_base == "http://localhost:4000" + + def test_supports_thinking_mode_false(self): + llm = LiteLLM() + assert llm.supports_thinking_mode is False + + +class TestLiteLLMChat: + @pytest.mark.asyncio + async def test_basic_chat(self): + llm = LiteLLM(model_name="openai/gpt-4o") + resp = _mock_response("The answer is 4.") + with patch("litellm.acompletion", new_callable=AsyncMock, return_value=resp) as mock: + result = await llm.chat([{"role": "user", "content": "What is 2+2?"}]) + assert result == "The answer is 4." + call_kwargs = mock.call_args.kwargs + assert call_kwargs["model"] == "openai/gpt-4o" + assert call_kwargs["drop_params"] is True + + @pytest.mark.asyncio + async def test_api_key_forwarded(self): + llm = LiteLLM(api_key="sk-test") + with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()): + await llm.chat([{"role": "user", "content": "test"}]) + from litellm import acompletion + call_kwargs = acompletion.call_args.kwargs + assert call_kwargs["api_key"] == "sk-test" + + @pytest.mark.asyncio + async def test_api_key_omitted_when_none(self): + llm = LiteLLM() + with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock: + await llm.chat([{"role": "user", "content": "test"}]) + assert "api_key" not in mock.call_args.kwargs + + @pytest.mark.asyncio + async def test_api_base_forwarded(self): + llm = LiteLLM(api_base="http://proxy:4000") + with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock: + await llm.chat([{"role": "user", "content": "test"}]) + assert mock.call_args.kwargs["api_base"] == "http://proxy:4000" + + @pytest.mark.asyncio + async def test_temperature_forwarded(self): + llm = LiteLLM() + with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock: + await llm.chat([{"role": "user", "content": "test"}], temperature=0.5) + assert mock.call_args.kwargs["temperature"] == 0.5 + + @pytest.mark.asyncio + async def test_default_temperature_used(self): + llm = LiteLLM(default_temperature=0.3) + with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock: + await llm.chat([{"role": "user", "content": "test"}]) + assert mock.call_args.kwargs["temperature"] == 0.3 + + @pytest.mark.asyncio + async def test_null_content_returns_empty(self): + resp = _mock_response(content=None) + llm = LiteLLM() + with patch("litellm.acompletion", new_callable=AsyncMock, return_value=resp): + result = await llm.chat([{"role": "user", "content": "test"}]) + assert result == "" + + +class TestLiteLLMToolCalling: + @pytest.mark.asyncio + async def test_tool_call_returned(self): + llm = LiteLLM() + with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_tool_response()): + result = await llm.chat( + [{"role": "user", "content": "Weather?"}], + tools=[{"type": "function", "function": {"name": "get_weather"}}], + ) + assert result["type"] == "tool_call" + assert result["tool_calls"][0]["function"]["name"] == "get_weather" + + +class TestLiteLLMErrors: + @pytest.mark.asyncio + async def test_timeout_raises_llm_timeout_error(self): + import litellm as _litellm + llm = LiteLLM() + with patch("litellm.acompletion", new_callable=AsyncMock, + side_effect=_litellm.Timeout( + message="Request timed out", model="gpt-4o", llm_provider="openai")): + with pytest.raises(LLMTimeoutError): + await llm.chat([{"role": "user", "content": "test"}]) + + @pytest.mark.asyncio + async def test_rate_limit_raises_retryable_error(self): + import litellm as _litellm + llm = LiteLLM() + with patch("litellm.acompletion", new_callable=AsyncMock, + side_effect=_litellm.RateLimitError( + message="429", llm_provider="openai", model="gpt-4o")): + with pytest.raises(LLMRetryableError): + await llm.chat([{"role": "user", "content": "test"}]) + + @pytest.mark.asyncio + async def test_connection_error_raises_retryable_error(self): + import litellm as _litellm + llm = LiteLLM() + with patch("litellm.acompletion", new_callable=AsyncMock, + side_effect=_litellm.APIConnectionError( + message="Connection failed", llm_provider="openai", model="gpt-4o")): + with pytest.raises(LLMRetryableError): + await llm.chat([{"role": "user", "content": "test"}]) + + @pytest.mark.asyncio + async def test_auth_error_propagates(self): + import litellm as _litellm + llm = LiteLLM() + with patch("litellm.acompletion", new_callable=AsyncMock, + side_effect=_litellm.AuthenticationError( + message="Invalid key", llm_provider="openai", model="gpt-4o")): + with pytest.raises(_litellm.AuthenticationError): + await llm.chat([{"role": "user", "content": "test"}]) + + +class TestLiteLLMFactory: + def test_adapter_creates_litellm(self): + from xagent.core.model import ChatModelConfig + from xagent.core.model.chat.basic.adapter import create_base_llm + + config = ChatModelConfig( + id="test-litellm", + model_name="anthropic/claude-sonnet-4-6", + model_provider="litellm", + ) + llm = create_base_llm(config) + assert llm is not None From 67988a5e810cf8eee7779853b75fd82719a2ab79 Mon Sep 17 00:00:00 2001 From: RheagalFire Date: Thu, 4 Jun 2026 03:35:45 +0530 Subject: [PATCH 5/7] fix: address gemini review - add raw field, use ChunkType.TOKEN, guard empty choices --- src/xagent/core/model/chat/basic/litellm.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/xagent/core/model/chat/basic/litellm.py b/src/xagent/core/model/chat/basic/litellm.py index d726206a5..54afc6198 100644 --- a/src/xagent/core/model/chat/basic/litellm.py +++ b/src/xagent/core/model/chat/basic/litellm.py @@ -109,6 +109,8 @@ async def chat( ) as e: raise LLMRetryableError(str(e)) from e + if not response.choices: + return "" choice = response.choices[0] message = choice.message @@ -131,7 +133,13 @@ async def chat( }, } ) - return {"type": "tool_call", "tool_calls": tool_calls} + return { + "type": "tool_call", + "tool_calls": tool_calls, + "raw": response.model_dump() + if hasattr(response, "model_dump") + else str(response), + } return message.content or "" @@ -197,4 +205,4 @@ async def stream_chat( continue content = getattr(delta, "content", None) if content: - yield StreamChunk(type=ChunkType.TEXT, content=content) + yield StreamChunk(type=ChunkType.TOKEN, content=content) From 242e4892d53cff15ef309ca44850b4bf76fd222b Mon Sep 17 00:00:00 2001 From: RheagalFire Date: Thu, 4 Jun 2026 03:41:28 +0530 Subject: [PATCH 6/7] fix: full stream_chat tool call support and raise on empty choices --- src/xagent/core/model/chat/basic/litellm.py | 29 +++++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/xagent/core/model/chat/basic/litellm.py b/src/xagent/core/model/chat/basic/litellm.py index 54afc6198..a119d6318 100644 --- a/src/xagent/core/model/chat/basic/litellm.py +++ b/src/xagent/core/model/chat/basic/litellm.py @@ -110,7 +110,7 @@ async def chat( raise LLMRetryableError(str(e)) from e if not response.choices: - return "" + raise LLMRetryableError("LiteLLM returned an empty response (no choices).") choice = response.choices[0] message = choice.message @@ -200,9 +200,28 @@ async def stream_chat( raise LLMRetryableError(str(e)) from e async for chunk in response: - delta = chunk.choices[0].delta if chunk.choices else None - if delta is None: + if not chunk.choices: continue - content = getattr(delta, "content", None) + delta = chunk.choices[0].delta + + content = delta.content if hasattr(delta, "content") else None if content: - yield StreamChunk(type=ChunkType.TOKEN, content=content) + yield StreamChunk(type=ChunkType.TOKEN, content=content, delta=content) + + if hasattr(delta, "tool_calls") and delta.tool_calls: + tool_calls = [] + for tc in delta.tool_calls: + tool_calls.append({ + "index": tc.index if hasattr(tc, "index") and tc.index is not None else 0, + "id": tc.id if hasattr(tc, "id") else None, + "type": "function", + "function": { + "name": tc.function.name if hasattr(tc, "function") and hasattr(tc.function, "name") else None, + "arguments": tc.function.arguments if hasattr(tc, "function") and hasattr(tc.function, "arguments") else "", + }, + }) + yield StreamChunk( + type=ChunkType.TOOL_CALL, + tool_calls=tool_calls, + raw=chunk.model_dump() if hasattr(chunk, "model_dump") else str(chunk), + ) From dbc2cb3319dfed63e96abba8e3cdf5eb937aa98c Mon Sep 17 00:00:00 2001 From: RheagalFire Date: Wed, 10 Jun 2026 02:12:00 +0530 Subject: [PATCH 7/7] fix: isort import ordering and stream_chat tool call formatting --- src/xagent/core/model/chat/basic/adapter.py | 2 +- src/xagent/core/model/chat/basic/litellm.py | 32 ++++++--- tests/core/model/chat/basic/test_litellm.py | 78 +++++++++++++++------ 3 files changed, 81 insertions(+), 31 deletions(-) diff --git a/src/xagent/core/model/chat/basic/adapter.py b/src/xagent/core/model/chat/basic/adapter.py index 4bd1b0d23..618086ac4 100644 --- a/src/xagent/core/model/chat/basic/adapter.py +++ b/src/xagent/core/model/chat/basic/adapter.py @@ -9,8 +9,8 @@ from .claude import ClaudeLLM from .deepseek import DeepSeekLLM from .gemini import GeminiLLM -from .openai import OpenAILLM from .litellm import LiteLLM +from .openai import OpenAILLM from .xinference import XinferenceLLM from .zhipu import ZhipuLLM diff --git a/src/xagent/core/model/chat/basic/litellm.py b/src/xagent/core/model/chat/basic/litellm.py index a119d6318..dd9a7e0ef 100644 --- a/src/xagent/core/model/chat/basic/litellm.py +++ b/src/xagent/core/model/chat/basic/litellm.py @@ -211,17 +211,29 @@ async def stream_chat( if hasattr(delta, "tool_calls") and delta.tool_calls: tool_calls = [] for tc in delta.tool_calls: - tool_calls.append({ - "index": tc.index if hasattr(tc, "index") and tc.index is not None else 0, - "id": tc.id if hasattr(tc, "id") else None, - "type": "function", - "function": { - "name": tc.function.name if hasattr(tc, "function") and hasattr(tc.function, "name") else None, - "arguments": tc.function.arguments if hasattr(tc, "function") and hasattr(tc.function, "arguments") else "", - }, - }) + tool_calls.append( + { + "index": tc.index + if hasattr(tc, "index") and tc.index is not None + else 0, + "id": tc.id if hasattr(tc, "id") else None, + "type": "function", + "function": { + "name": tc.function.name + if hasattr(tc, "function") + and hasattr(tc.function, "name") + else None, + "arguments": tc.function.arguments + if hasattr(tc, "function") + and hasattr(tc.function, "arguments") + else "", + }, + } + ) yield StreamChunk( type=ChunkType.TOOL_CALL, tool_calls=tool_calls, - raw=chunk.model_dump() if hasattr(chunk, "model_dump") else str(chunk), + raw=chunk.model_dump() + if hasattr(chunk, "model_dump") + else str(chunk), ) diff --git a/tests/core/model/chat/basic/test_litellm.py b/tests/core/model/chat/basic/test_litellm.py index dd951eb74..af69e1065 100644 --- a/tests/core/model/chat/basic/test_litellm.py +++ b/tests/core/model/chat/basic/test_litellm.py @@ -1,8 +1,9 @@ """Test cases for LiteLLM chat model implementation.""" -import pytest from unittest.mock import AsyncMock, MagicMock, patch +import pytest + from xagent.core.model.chat.basic.litellm import LiteLLM from xagent.core.model.chat.exceptions import LLMRetryableError, LLMTimeoutError @@ -70,7 +71,9 @@ class TestLiteLLMChat: async def test_basic_chat(self): llm = LiteLLM(model_name="openai/gpt-4o") resp = _mock_response("The answer is 4.") - with patch("litellm.acompletion", new_callable=AsyncMock, return_value=resp) as mock: + with patch( + "litellm.acompletion", new_callable=AsyncMock, return_value=resp + ) as mock: result = await llm.chat([{"role": "user", "content": "What is 2+2?"}]) assert result == "The answer is 4." call_kwargs = mock.call_args.kwargs @@ -80,37 +83,48 @@ async def test_basic_chat(self): @pytest.mark.asyncio async def test_api_key_forwarded(self): llm = LiteLLM(api_key="sk-test") - with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()): + with patch( + "litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response() + ): await llm.chat([{"role": "user", "content": "test"}]) from litellm import acompletion + call_kwargs = acompletion.call_args.kwargs assert call_kwargs["api_key"] == "sk-test" @pytest.mark.asyncio async def test_api_key_omitted_when_none(self): llm = LiteLLM() - with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock: + with patch( + "litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response() + ) as mock: await llm.chat([{"role": "user", "content": "test"}]) assert "api_key" not in mock.call_args.kwargs @pytest.mark.asyncio async def test_api_base_forwarded(self): llm = LiteLLM(api_base="http://proxy:4000") - with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock: + with patch( + "litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response() + ) as mock: await llm.chat([{"role": "user", "content": "test"}]) assert mock.call_args.kwargs["api_base"] == "http://proxy:4000" @pytest.mark.asyncio async def test_temperature_forwarded(self): llm = LiteLLM() - with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock: + with patch( + "litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response() + ) as mock: await llm.chat([{"role": "user", "content": "test"}], temperature=0.5) assert mock.call_args.kwargs["temperature"] == 0.5 @pytest.mark.asyncio async def test_default_temperature_used(self): llm = LiteLLM(default_temperature=0.3) - with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock: + with patch( + "litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response() + ) as mock: await llm.chat([{"role": "user", "content": "test"}]) assert mock.call_args.kwargs["temperature"] == 0.3 @@ -127,7 +141,11 @@ class TestLiteLLMToolCalling: @pytest.mark.asyncio async def test_tool_call_returned(self): llm = LiteLLM() - with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_tool_response()): + with patch( + "litellm.acompletion", + new_callable=AsyncMock, + return_value=_mock_tool_response(), + ): result = await llm.chat( [{"role": "user", "content": "Weather?"}], tools=[{"type": "function", "function": {"name": "get_weather"}}], @@ -140,40 +158,60 @@ class TestLiteLLMErrors: @pytest.mark.asyncio async def test_timeout_raises_llm_timeout_error(self): import litellm as _litellm + llm = LiteLLM() - with patch("litellm.acompletion", new_callable=AsyncMock, - side_effect=_litellm.Timeout( - message="Request timed out", model="gpt-4o", llm_provider="openai")): + with patch( + "litellm.acompletion", + new_callable=AsyncMock, + side_effect=_litellm.Timeout( + message="Request timed out", model="gpt-4o", llm_provider="openai" + ), + ): with pytest.raises(LLMTimeoutError): await llm.chat([{"role": "user", "content": "test"}]) @pytest.mark.asyncio async def test_rate_limit_raises_retryable_error(self): import litellm as _litellm + llm = LiteLLM() - with patch("litellm.acompletion", new_callable=AsyncMock, - side_effect=_litellm.RateLimitError( - message="429", llm_provider="openai", model="gpt-4o")): + with patch( + "litellm.acompletion", + new_callable=AsyncMock, + side_effect=_litellm.RateLimitError( + message="429", llm_provider="openai", model="gpt-4o" + ), + ): with pytest.raises(LLMRetryableError): await llm.chat([{"role": "user", "content": "test"}]) @pytest.mark.asyncio async def test_connection_error_raises_retryable_error(self): import litellm as _litellm + llm = LiteLLM() - with patch("litellm.acompletion", new_callable=AsyncMock, - side_effect=_litellm.APIConnectionError( - message="Connection failed", llm_provider="openai", model="gpt-4o")): + with patch( + "litellm.acompletion", + new_callable=AsyncMock, + side_effect=_litellm.APIConnectionError( + message="Connection failed", llm_provider="openai", model="gpt-4o" + ), + ): with pytest.raises(LLMRetryableError): await llm.chat([{"role": "user", "content": "test"}]) @pytest.mark.asyncio async def test_auth_error_propagates(self): import litellm as _litellm + llm = LiteLLM() - with patch("litellm.acompletion", new_callable=AsyncMock, - side_effect=_litellm.AuthenticationError( - message="Invalid key", llm_provider="openai", model="gpt-4o")): + with patch( + "litellm.acompletion", + new_callable=AsyncMock, + side_effect=_litellm.AuthenticationError( + message="Invalid key", llm_provider="openai", model="gpt-4o" + ), + ): with pytest.raises(_litellm.AuthenticationError): await llm.chat([{"role": "user", "content": "test"}])