From bcbecd70e5dc20e7ed81c70d9be2d936c2464a92 Mon Sep 17 00:00:00 2001
From: RheagalFire <arishalam121@gmail.com>
Date: Tue, 19 May 2026 20:55:33 +0530
Subject: [PATCH 1/7] feat: add LiteLLM as chat model provider for 100+ LLM
 backends

---
 src/xagent/core/model/chat/basic/__init__.py |   2 +
 src/xagent/core/model/chat/basic/adapter.py  |  11 +
 src/xagent/core/model/chat/basic/litellm.py  | 200 +++++++++++++++++++
 3 files changed, 213 insertions(+)
 create mode 100644 src/xagent/core/model/chat/basic/litellm.py

diff --git a/src/xagent/core/model/chat/basic/__init__.py b/src/xagent/core/model/chat/basic/__init__.py
index d3eef90e1..681c0a65f 100644
--- a/src/xagent/core/model/chat/basic/__init__.py
+++ b/src/xagent/core/model/chat/basic/__init__.py
@@ -4,6 +4,7 @@
 from .claude import ClaudeLLM
 from .deepseek import DeepSeekLLM
 from .gemini import GeminiLLM
+from .litellm import LiteLLMLLM
 from .openai import OpenAILLM
 from .zhipu import ZhipuLLM
 
@@ -15,5 +16,6 @@
     "ZhipuLLM",
     "GeminiLLM",
     "ClaudeLLM",
+    "LiteLLMLLM",
     "create_base_llm",
 ]
diff --git a/src/xagent/core/model/chat/basic/adapter.py b/src/xagent/core/model/chat/basic/adapter.py
index 35f68e985..7dc673a17 100644
--- a/src/xagent/core/model/chat/basic/adapter.py
+++ b/src/xagent/core/model/chat/basic/adapter.py
@@ -10,6 +10,7 @@
 from .deepseek import DeepSeekLLM
 from .gemini import GeminiLLM
 from .openai import OpenAILLM
+from .litellm import LiteLLMLLM
 from .xinference import XinferenceLLM
 from .zhipu import ZhipuLLM
 
@@ -86,6 +87,16 @@ def create_base_llm(model: ModelConfig) -> BaseLLM:
             timeout=model.timeout,
             abilities=model.abilities,
         )
+    elif provider == "litellm":
+        llm = LiteLLMLLM(
+            model_name=model.model_name,
+            api_key=model.api_key,
+            api_base=model.base_url,
+            default_temperature=model.default_temperature,
+            default_max_tokens=model.default_max_tokens,
+            timeout=model.timeout,
+            abilities=model.abilities,
+        )
     elif provider == "xinference":
         llm = XinferenceLLM(
             model_name=model.model_name,
diff --git a/src/xagent/core/model/chat/basic/litellm.py b/src/xagent/core/model/chat/basic/litellm.py
new file mode 100644
index 000000000..beb412d4b
--- /dev/null
+++ b/src/xagent/core/model/chat/basic/litellm.py
@@ -0,0 +1,200 @@
+import logging
+from typing import Any, AsyncIterator, Dict, List, Optional, Union
+
+from ..exceptions import LLMRetryableError, LLMTimeoutError
+from ..timeout_config import TimeoutConfig
+from ..token_context import add_token_usage
+from ..types import ChunkType, StreamChunk
+from .base import BaseLLM
+
+logger = logging.getLogger(__name__)
+
+
+class LiteLLMLLM(BaseLLM):
+    """
+    LiteLLM client providing access to 100+ LLM providers through a unified interface.
+    Uses provider-prefixed model names (e.g. openai/gpt-4o, anthropic/claude-sonnet-4-6).
+    """
+
+    def __init__(
+        self,
+        model_name: str = "openai/gpt-4o-mini",
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        default_temperature: Optional[float] = None,
+        default_max_tokens: Optional[int] = None,
+        timeout: float = 180.0,
+        abilities: Optional[List[str]] = None,
+        timeout_config: Optional[TimeoutConfig] = None,
+    ):
+        self._model_name = model_name
+        self._api_key = api_key
+        self._api_base = api_base
+        self.default_temperature = default_temperature
+        self.default_max_tokens = default_max_tokens
+        self.timeout = timeout
+        self.timeout_config = timeout_config or TimeoutConfig()
+
+        if abilities:
+            self._abilities = abilities
+        else:
+            self._abilities = ["chat", "tool_calling"]
+
+    @property
+    def model_name(self) -> str:
+        return self._model_name
+
+    @property
+    def abilities(self) -> List[str]:
+        return self._abilities
+
+    @property
+    def supports_thinking_mode(self) -> bool:
+        return False
+
+    async def chat(
+        self,
+        messages: List[Dict[str, str]],
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        response_format: Optional[Dict[str, Any]] = None,
+        thinking: Optional[Dict[str, Any]] = None,
+        output_config: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Perform a chat completion via LiteLLM."""
+        import litellm
+
+        completion_params: Dict[str, Any] = {
+            "model": self._model_name,
+            "messages": self._sanitize_unicode_content(messages),
+            "drop_params": True,
+            "timeout": self.timeout,
+            **kwargs,
+        }
+
+        if max_tokens is not None:
+            completion_params["max_tokens"] = max_tokens
+        elif self.default_max_tokens is not None:
+            completion_params["max_tokens"] = self.default_max_tokens
+
+        if temperature is not None:
+            completion_params["temperature"] = temperature
+        elif self.default_temperature is not None:
+            completion_params["temperature"] = self.default_temperature
+
+        if tools:
+            completion_params["tools"] = tools
+        if tool_choice:
+            completion_params["tool_choice"] = tool_choice
+        if response_format:
+            completion_params["response_format"] = response_format
+
+        if self._api_key:
+            completion_params["api_key"] = self._api_key
+        if self._api_base:
+            completion_params["api_base"] = self._api_base
+
+        try:
+            response = await litellm.acompletion(**completion_params)
+        except litellm.Timeout as e:
+            raise LLMTimeoutError(str(e)) from e
+        except (
+            litellm.RateLimitError,
+            litellm.APIConnectionError,
+            litellm.ServiceUnavailableError,
+            litellm.InternalServerError,
+        ) as e:
+            raise LLMRetryableError(str(e)) from e
+
+        choice = response.choices[0]
+        message = choice.message
+
+        if hasattr(response, "usage") and response.usage:
+            add_token_usage(
+                input_tokens=getattr(response.usage, "prompt_tokens", 0) or 0,
+                output_tokens=getattr(response.usage, "completion_tokens", 0) or 0,
+            )
+
+        if hasattr(message, "tool_calls") and message.tool_calls:
+            tool_calls = []
+            for tc in message.tool_calls:
+                tool_calls.append(
+                    {
+                        "id": tc.id,
+                        "type": "function",
+                        "function": {
+                            "name": tc.function.name,
+                            "arguments": tc.function.arguments,
+                        },
+                    }
+                )
+            return {"type": "tool_call", "tool_calls": tool_calls}
+
+        return message.content or ""
+
+    async def stream_chat(
+        self,
+        messages: List[Dict[str, str]],
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        response_format: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[StreamChunk]:
+        """Stream a chat completion via LiteLLM."""
+        import litellm
+
+        completion_params: Dict[str, Any] = {
+            "model": self._model_name,
+            "messages": self._sanitize_unicode_content(messages),
+            "stream": True,
+            "drop_params": True,
+            "timeout": self.timeout,
+            **kwargs,
+        }
+
+        if max_tokens is not None:
+            completion_params["max_tokens"] = max_tokens
+        elif self.default_max_tokens is not None:
+            completion_params["max_tokens"] = self.default_max_tokens
+
+        if temperature is not None:
+            completion_params["temperature"] = temperature
+        elif self.default_temperature is not None:
+            completion_params["temperature"] = self.default_temperature
+
+        if tools:
+            completion_params["tools"] = tools
+        if tool_choice:
+            completion_params["tool_choice"] = tool_choice
+        if response_format:
+            completion_params["response_format"] = response_format
+
+        if self._api_key:
+            completion_params["api_key"] = self._api_key
+        if self._api_base:
+            completion_params["api_base"] = self._api_base
+
+        try:
+            response = await litellm.acompletion(**completion_params)
+        except litellm.Timeout as e:
+            raise LLMTimeoutError(str(e)) from e
+        except (
+            litellm.RateLimitError,
+            litellm.APIConnectionError,
+            litellm.ServiceUnavailableError,
+            litellm.InternalServerError,
+        ) as e:
+            raise LLMRetryableError(str(e)) from e
+
+        async for chunk in response:
+            delta = chunk.choices[0].delta if chunk.choices else None
+            if delta is None:
+                continue
+            content = getattr(delta, "content", None)
+            if content:
+                yield StreamChunk(type=ChunkType.TEXT, content=content)

From 6c109d9bfa7995a953fa83c92dc1c71aa2b127cd Mon Sep 17 00:00:00 2001
From: RheagalFire <arishalam121@gmail.com>
Date: Tue, 19 May 2026 20:57:03 +0530
Subject: [PATCH 2/7] fix: rename LiteLLMLLM to LiteLLMChat

---
 src/xagent/core/model/chat/basic/__init__.py | 4 ++--
 src/xagent/core/model/chat/basic/adapter.py  | 4 ++--
 src/xagent/core/model/chat/basic/litellm.py  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/xagent/core/model/chat/basic/__init__.py b/src/xagent/core/model/chat/basic/__init__.py
index 681c0a65f..5b6158e0b 100644
--- a/src/xagent/core/model/chat/basic/__init__.py
+++ b/src/xagent/core/model/chat/basic/__init__.py
@@ -4,7 +4,7 @@
 from .claude import ClaudeLLM
 from .deepseek import DeepSeekLLM
 from .gemini import GeminiLLM
-from .litellm import LiteLLMLLM
+from .litellm import LiteLLMChat
 from .openai import OpenAILLM
 from .zhipu import ZhipuLLM
 
@@ -16,6 +16,6 @@
     "ZhipuLLM",
     "GeminiLLM",
     "ClaudeLLM",
-    "LiteLLMLLM",
+    "LiteLLMChat",
     "create_base_llm",
 ]
diff --git a/src/xagent/core/model/chat/basic/adapter.py b/src/xagent/core/model/chat/basic/adapter.py
index 7dc673a17..7ce0fcf53 100644
--- a/src/xagent/core/model/chat/basic/adapter.py
+++ b/src/xagent/core/model/chat/basic/adapter.py
@@ -10,7 +10,7 @@
 from .deepseek import DeepSeekLLM
 from .gemini import GeminiLLM
 from .openai import OpenAILLM
-from .litellm import LiteLLMLLM
+from .litellm import LiteLLMChat
 from .xinference import XinferenceLLM
 from .zhipu import ZhipuLLM
 
@@ -88,7 +88,7 @@ def create_base_llm(model: ModelConfig) -> BaseLLM:
             abilities=model.abilities,
         )
     elif provider == "litellm":
-        llm = LiteLLMLLM(
+        llm = LiteLLMChat(
             model_name=model.model_name,
             api_key=model.api_key,
             api_base=model.base_url,
diff --git a/src/xagent/core/model/chat/basic/litellm.py b/src/xagent/core/model/chat/basic/litellm.py
index beb412d4b..96e32ca27 100644
--- a/src/xagent/core/model/chat/basic/litellm.py
+++ b/src/xagent/core/model/chat/basic/litellm.py
@@ -10,7 +10,7 @@
 logger = logging.getLogger(__name__)
 
 
-class LiteLLMLLM(BaseLLM):
+class LiteLLMChat(BaseLLM):
     """
     LiteLLM client providing access to 100+ LLM providers through a unified interface.
     Uses provider-prefixed model names (e.g. openai/gpt-4o, anthropic/claude-sonnet-4-6).

From cc9ca25832066f1ec16acafd8c4a199691e8a98f Mon Sep 17 00:00:00 2001
From: RheagalFire <arishalam121@gmail.com>
Date: Tue, 19 May 2026 20:58:24 +0530
Subject: [PATCH 3/7] fix: rename class to LiteLLM

---
 src/xagent/core/model/chat/basic/__init__.py | 4 ++--
 src/xagent/core/model/chat/basic/adapter.py  | 4 ++--
 src/xagent/core/model/chat/basic/litellm.py  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/xagent/core/model/chat/basic/__init__.py b/src/xagent/core/model/chat/basic/__init__.py
index 5b6158e0b..3f86b3e85 100644
--- a/src/xagent/core/model/chat/basic/__init__.py
+++ b/src/xagent/core/model/chat/basic/__init__.py
@@ -4,7 +4,7 @@
 from .claude import ClaudeLLM
 from .deepseek import DeepSeekLLM
 from .gemini import GeminiLLM
-from .litellm import LiteLLMChat
+from .litellm import LiteLLM
 from .openai import OpenAILLM
 from .zhipu import ZhipuLLM
 
@@ -16,6 +16,6 @@
     "ZhipuLLM",
     "GeminiLLM",
     "ClaudeLLM",
-    "LiteLLMChat",
+    "LiteLLM",
     "create_base_llm",
 ]
diff --git a/src/xagent/core/model/chat/basic/adapter.py b/src/xagent/core/model/chat/basic/adapter.py
index 7ce0fcf53..4bd1b0d23 100644
--- a/src/xagent/core/model/chat/basic/adapter.py
+++ b/src/xagent/core/model/chat/basic/adapter.py
@@ -10,7 +10,7 @@
 from .deepseek import DeepSeekLLM
 from .gemini import GeminiLLM
 from .openai import OpenAILLM
-from .litellm import LiteLLMChat
+from .litellm import LiteLLM
 from .xinference import XinferenceLLM
 from .zhipu import ZhipuLLM
 
@@ -88,7 +88,7 @@ def create_base_llm(model: ModelConfig) -> BaseLLM:
             abilities=model.abilities,
         )
     elif provider == "litellm":
-        llm = LiteLLMChat(
+        llm = LiteLLM(
             model_name=model.model_name,
             api_key=model.api_key,
             api_base=model.base_url,
diff --git a/src/xagent/core/model/chat/basic/litellm.py b/src/xagent/core/model/chat/basic/litellm.py
index 96e32ca27..d726206a5 100644
--- a/src/xagent/core/model/chat/basic/litellm.py
+++ b/src/xagent/core/model/chat/basic/litellm.py
@@ -10,7 +10,7 @@
 logger = logging.getLogger(__name__)
 
 
-class LiteLLMChat(BaseLLM):
+class LiteLLM(BaseLLM):
     """
     LiteLLM client providing access to 100+ LLM providers through a unified interface.
     Uses provider-prefixed model names (e.g. openai/gpt-4o, anthropic/claude-sonnet-4-6).

From 4455ca0265e3f18844bffdc259f87ff1f88d4e85 Mon Sep 17 00:00:00 2001
From: RheagalFire <arishalam121@gmail.com>
Date: Tue, 19 May 2026 21:11:29 +0530
Subject: [PATCH 4/7] test: add 20 unit tests for LiteLLM provider

---
 tests/core/model/chat/basic/test_litellm.py | 192 ++++++++++++++++++++
 1 file changed, 192 insertions(+)
 create mode 100644 tests/core/model/chat/basic/test_litellm.py

diff --git a/tests/core/model/chat/basic/test_litellm.py b/tests/core/model/chat/basic/test_litellm.py
new file mode 100644
index 000000000..dd951eb74
--- /dev/null
+++ b/tests/core/model/chat/basic/test_litellm.py
@@ -0,0 +1,192 @@
+"""Test cases for LiteLLM chat model implementation."""
+
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+
+from xagent.core.model.chat.basic.litellm import LiteLLM
+from xagent.core.model.chat.exceptions import LLMRetryableError, LLMTimeoutError
+
+
+def _mock_response(content="Hello", prompt_tokens=10, completion_tokens=5):
+    usage = MagicMock()
+    usage.prompt_tokens = prompt_tokens
+    usage.completion_tokens = completion_tokens
+    choice = MagicMock()
+    choice.message.content = content
+    choice.message.tool_calls = None
+    resp = MagicMock()
+    resp.choices = [choice]
+    resp.usage = usage
+    return resp
+
+
+def _mock_tool_response(name="get_weather", arguments='{"city": "Paris"}'):
+    tc = MagicMock()
+    tc.id = "call_123"
+    tc.function.name = name
+    tc.function.arguments = arguments
+    choice = MagicMock()
+    choice.message.content = None
+    choice.message.tool_calls = [tc]
+    resp = MagicMock()
+    resp.choices = [choice]
+    resp.usage = MagicMock(prompt_tokens=20, completion_tokens=10)
+    return resp
+
+
+class TestLiteLLMInit:
+    def test_default_model(self):
+        llm = LiteLLM()
+        assert llm.model_name == "openai/gpt-4o-mini"
+
+    def test_custom_model(self):
+        llm = LiteLLM(model_name="anthropic/claude-sonnet-4-6")
+        assert llm.model_name == "anthropic/claude-sonnet-4-6"
+
+    def test_abilities_default(self):
+        llm = LiteLLM()
+        assert "chat" in llm.abilities
+        assert "tool_calling" in llm.abilities
+
+    def test_abilities_custom(self):
+        llm = LiteLLM(abilities=["chat", "vision"])
+        assert llm.abilities == ["chat", "vision"]
+
+    def test_api_key_stored(self):
+        llm = LiteLLM(api_key="sk-test")
+        assert llm._api_key == "sk-test"
+
+    def test_api_base_stored(self):
+        llm = LiteLLM(api_base="http://localhost:4000")
+        assert llm._api_base == "http://localhost:4000"
+
+    def test_supports_thinking_mode_false(self):
+        llm = LiteLLM()
+        assert llm.supports_thinking_mode is False
+
+
+class TestLiteLLMChat:
+    @pytest.mark.asyncio
+    async def test_basic_chat(self):
+        llm = LiteLLM(model_name="openai/gpt-4o")
+        resp = _mock_response("The answer is 4.")
+        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=resp) as mock:
+            result = await llm.chat([{"role": "user", "content": "What is 2+2?"}])
+            assert result == "The answer is 4."
+            call_kwargs = mock.call_args.kwargs
+            assert call_kwargs["model"] == "openai/gpt-4o"
+            assert call_kwargs["drop_params"] is True
+
+    @pytest.mark.asyncio
+    async def test_api_key_forwarded(self):
+        llm = LiteLLM(api_key="sk-test")
+        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()):
+            await llm.chat([{"role": "user", "content": "test"}])
+            from litellm import acompletion
+            call_kwargs = acompletion.call_args.kwargs
+            assert call_kwargs["api_key"] == "sk-test"
+
+    @pytest.mark.asyncio
+    async def test_api_key_omitted_when_none(self):
+        llm = LiteLLM()
+        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock:
+            await llm.chat([{"role": "user", "content": "test"}])
+            assert "api_key" not in mock.call_args.kwargs
+
+    @pytest.mark.asyncio
+    async def test_api_base_forwarded(self):
+        llm = LiteLLM(api_base="http://proxy:4000")
+        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock:
+            await llm.chat([{"role": "user", "content": "test"}])
+            assert mock.call_args.kwargs["api_base"] == "http://proxy:4000"
+
+    @pytest.mark.asyncio
+    async def test_temperature_forwarded(self):
+        llm = LiteLLM()
+        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock:
+            await llm.chat([{"role": "user", "content": "test"}], temperature=0.5)
+            assert mock.call_args.kwargs["temperature"] == 0.5
+
+    @pytest.mark.asyncio
+    async def test_default_temperature_used(self):
+        llm = LiteLLM(default_temperature=0.3)
+        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock:
+            await llm.chat([{"role": "user", "content": "test"}])
+            assert mock.call_args.kwargs["temperature"] == 0.3
+
+    @pytest.mark.asyncio
+    async def test_null_content_returns_empty(self):
+        resp = _mock_response(content=None)
+        llm = LiteLLM()
+        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=resp):
+            result = await llm.chat([{"role": "user", "content": "test"}])
+            assert result == ""
+
+
+class TestLiteLLMToolCalling:
+    @pytest.mark.asyncio
+    async def test_tool_call_returned(self):
+        llm = LiteLLM()
+        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_tool_response()):
+            result = await llm.chat(
+                [{"role": "user", "content": "Weather?"}],
+                tools=[{"type": "function", "function": {"name": "get_weather"}}],
+            )
+            assert result["type"] == "tool_call"
+            assert result["tool_calls"][0]["function"]["name"] == "get_weather"
+
+
+class TestLiteLLMErrors:
+    @pytest.mark.asyncio
+    async def test_timeout_raises_llm_timeout_error(self):
+        import litellm as _litellm
+        llm = LiteLLM()
+        with patch("litellm.acompletion", new_callable=AsyncMock,
+                   side_effect=_litellm.Timeout(
+                       message="Request timed out", model="gpt-4o", llm_provider="openai")):
+            with pytest.raises(LLMTimeoutError):
+                await llm.chat([{"role": "user", "content": "test"}])
+
+    @pytest.mark.asyncio
+    async def test_rate_limit_raises_retryable_error(self):
+        import litellm as _litellm
+        llm = LiteLLM()
+        with patch("litellm.acompletion", new_callable=AsyncMock,
+                   side_effect=_litellm.RateLimitError(
+                       message="429", llm_provider="openai", model="gpt-4o")):
+            with pytest.raises(LLMRetryableError):
+                await llm.chat([{"role": "user", "content": "test"}])
+
+    @pytest.mark.asyncio
+    async def test_connection_error_raises_retryable_error(self):
+        import litellm as _litellm
+        llm = LiteLLM()
+        with patch("litellm.acompletion", new_callable=AsyncMock,
+                   side_effect=_litellm.APIConnectionError(
+                       message="Connection failed", llm_provider="openai", model="gpt-4o")):
+            with pytest.raises(LLMRetryableError):
+                await llm.chat([{"role": "user", "content": "test"}])
+
+    @pytest.mark.asyncio
+    async def test_auth_error_propagates(self):
+        import litellm as _litellm
+        llm = LiteLLM()
+        with patch("litellm.acompletion", new_callable=AsyncMock,
+                   side_effect=_litellm.AuthenticationError(
+                       message="Invalid key", llm_provider="openai", model="gpt-4o")):
+            with pytest.raises(_litellm.AuthenticationError):
+                await llm.chat([{"role": "user", "content": "test"}])
+
+
+class TestLiteLLMFactory:
+    def test_adapter_creates_litellm(self):
+        from xagent.core.model import ChatModelConfig
+        from xagent.core.model.chat.basic.adapter import create_base_llm
+
+        config = ChatModelConfig(
+            id="test-litellm",
+            model_name="anthropic/claude-sonnet-4-6",
+            model_provider="litellm",
+        )
+        llm = create_base_llm(config)
+        assert llm is not None

From 67988a5e810cf8eee7779853b75fd82719a2ab79 Mon Sep 17 00:00:00 2001
From: RheagalFire <arishalam121@gmail.com>
Date: Thu, 4 Jun 2026 03:35:45 +0530
Subject: [PATCH 5/7] fix: address gemini review - add raw field, use
 ChunkType.TOKEN, guard empty choices

---
 src/xagent/core/model/chat/basic/litellm.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/xagent/core/model/chat/basic/litellm.py b/src/xagent/core/model/chat/basic/litellm.py
index d726206a5..54afc6198 100644
--- a/src/xagent/core/model/chat/basic/litellm.py
+++ b/src/xagent/core/model/chat/basic/litellm.py
@@ -109,6 +109,8 @@ async def chat(
         ) as e:
             raise LLMRetryableError(str(e)) from e
 
+        if not response.choices:
+            return ""
         choice = response.choices[0]
         message = choice.message
 
@@ -131,7 +133,13 @@ async def chat(
                         },
                     }
                 )
-            return {"type": "tool_call", "tool_calls": tool_calls}
+            return {
+                "type": "tool_call",
+                "tool_calls": tool_calls,
+                "raw": response.model_dump()
+                if hasattr(response, "model_dump")
+                else str(response),
+            }
 
         return message.content or ""
 
@@ -197,4 +205,4 @@ async def stream_chat(
                 continue
             content = getattr(delta, "content", None)
             if content:
-                yield StreamChunk(type=ChunkType.TEXT, content=content)
+                yield StreamChunk(type=ChunkType.TOKEN, content=content)

From 242e4892d53cff15ef309ca44850b4bf76fd222b Mon Sep 17 00:00:00 2001
From: RheagalFire <arishalam121@gmail.com>
Date: Thu, 4 Jun 2026 03:41:28 +0530
Subject: [PATCH 6/7] fix: full stream_chat tool call support and raise on
 empty choices

---
 src/xagent/core/model/chat/basic/litellm.py | 29 +++++++++++++++++----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/xagent/core/model/chat/basic/litellm.py b/src/xagent/core/model/chat/basic/litellm.py
index 54afc6198..a119d6318 100644
--- a/src/xagent/core/model/chat/basic/litellm.py
+++ b/src/xagent/core/model/chat/basic/litellm.py
@@ -110,7 +110,7 @@ async def chat(
             raise LLMRetryableError(str(e)) from e
 
         if not response.choices:
-            return ""
+            raise LLMRetryableError("LiteLLM returned an empty response (no choices).")
         choice = response.choices[0]
         message = choice.message
 
@@ -200,9 +200,28 @@ async def stream_chat(
             raise LLMRetryableError(str(e)) from e
 
         async for chunk in response:
-            delta = chunk.choices[0].delta if chunk.choices else None
-            if delta is None:
+            if not chunk.choices:
                 continue
-            content = getattr(delta, "content", None)
+            delta = chunk.choices[0].delta
+
+            content = delta.content if hasattr(delta, "content") else None
             if content:
-                yield StreamChunk(type=ChunkType.TOKEN, content=content)
+                yield StreamChunk(type=ChunkType.TOKEN, content=content, delta=content)
+
+            if hasattr(delta, "tool_calls") and delta.tool_calls:
+                tool_calls = []
+                for tc in delta.tool_calls:
+                    tool_calls.append({
+                        "index": tc.index if hasattr(tc, "index") and tc.index is not None else 0,
+                        "id": tc.id if hasattr(tc, "id") else None,
+                        "type": "function",
+                        "function": {
+                            "name": tc.function.name if hasattr(tc, "function") and hasattr(tc.function, "name") else None,
+                            "arguments": tc.function.arguments if hasattr(tc, "function") and hasattr(tc.function, "arguments") else "",
+                        },
+                    })
+                yield StreamChunk(
+                    type=ChunkType.TOOL_CALL,
+                    tool_calls=tool_calls,
+                    raw=chunk.model_dump() if hasattr(chunk, "model_dump") else str(chunk),
+                )

From dbc2cb3319dfed63e96abba8e3cdf5eb937aa98c Mon Sep 17 00:00:00 2001
From: RheagalFire <arishalam121@gmail.com>
Date: Wed, 10 Jun 2026 02:12:00 +0530
Subject: [PATCH 7/7] fix: isort import ordering and stream_chat tool call
 formatting

---
 src/xagent/core/model/chat/basic/adapter.py |  2 +-
 src/xagent/core/model/chat/basic/litellm.py | 32 ++++++---
 tests/core/model/chat/basic/test_litellm.py | 78 +++++++++++++++------
 3 files changed, 81 insertions(+), 31 deletions(-)

diff --git a/src/xagent/core/model/chat/basic/adapter.py b/src/xagent/core/model/chat/basic/adapter.py
index 4bd1b0d23..618086ac4 100644
--- a/src/xagent/core/model/chat/basic/adapter.py
+++ b/src/xagent/core/model/chat/basic/adapter.py
@@ -9,8 +9,8 @@
 from .claude import ClaudeLLM
 from .deepseek import DeepSeekLLM
 from .gemini import GeminiLLM
-from .openai import OpenAILLM
 from .litellm import LiteLLM
+from .openai import OpenAILLM
 from .xinference import XinferenceLLM
 from .zhipu import ZhipuLLM
 
diff --git a/src/xagent/core/model/chat/basic/litellm.py b/src/xagent/core/model/chat/basic/litellm.py
index a119d6318..dd9a7e0ef 100644
--- a/src/xagent/core/model/chat/basic/litellm.py
+++ b/src/xagent/core/model/chat/basic/litellm.py
@@ -211,17 +211,29 @@ async def stream_chat(
             if hasattr(delta, "tool_calls") and delta.tool_calls:
                 tool_calls = []
                 for tc in delta.tool_calls:
-                    tool_calls.append({
-                        "index": tc.index if hasattr(tc, "index") and tc.index is not None else 0,
-                        "id": tc.id if hasattr(tc, "id") else None,
-                        "type": "function",
-                        "function": {
-                            "name": tc.function.name if hasattr(tc, "function") and hasattr(tc.function, "name") else None,
-                            "arguments": tc.function.arguments if hasattr(tc, "function") and hasattr(tc.function, "arguments") else "",
-                        },
-                    })
+                    tool_calls.append(
+                        {
+                            "index": tc.index
+                            if hasattr(tc, "index") and tc.index is not None
+                            else 0,
+                            "id": tc.id if hasattr(tc, "id") else None,
+                            "type": "function",
+                            "function": {
+                                "name": tc.function.name
+                                if hasattr(tc, "function")
+                                and hasattr(tc.function, "name")
+                                else None,
+                                "arguments": tc.function.arguments
+                                if hasattr(tc, "function")
+                                and hasattr(tc.function, "arguments")
+                                else "",
+                            },
+                        }
+                    )
                 yield StreamChunk(
                     type=ChunkType.TOOL_CALL,
                     tool_calls=tool_calls,
-                    raw=chunk.model_dump() if hasattr(chunk, "model_dump") else str(chunk),
+                    raw=chunk.model_dump()
+                    if hasattr(chunk, "model_dump")
+                    else str(chunk),
                 )
diff --git a/tests/core/model/chat/basic/test_litellm.py b/tests/core/model/chat/basic/test_litellm.py
index dd951eb74..af69e1065 100644
--- a/tests/core/model/chat/basic/test_litellm.py
+++ b/tests/core/model/chat/basic/test_litellm.py
@@ -1,8 +1,9 @@
 """Test cases for LiteLLM chat model implementation."""
 
-import pytest
 from unittest.mock import AsyncMock, MagicMock, patch
 
+import pytest
+
 from xagent.core.model.chat.basic.litellm import LiteLLM
 from xagent.core.model.chat.exceptions import LLMRetryableError, LLMTimeoutError
 
@@ -70,7 +71,9 @@ class TestLiteLLMChat:
     async def test_basic_chat(self):
         llm = LiteLLM(model_name="openai/gpt-4o")
         resp = _mock_response("The answer is 4.")
-        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=resp) as mock:
+        with patch(
+            "litellm.acompletion", new_callable=AsyncMock, return_value=resp
+        ) as mock:
             result = await llm.chat([{"role": "user", "content": "What is 2+2?"}])
             assert result == "The answer is 4."
             call_kwargs = mock.call_args.kwargs
@@ -80,37 +83,48 @@ async def test_basic_chat(self):
     @pytest.mark.asyncio
     async def test_api_key_forwarded(self):
         llm = LiteLLM(api_key="sk-test")
-        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()):
+        with patch(
+            "litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()
+        ):
             await llm.chat([{"role": "user", "content": "test"}])
             from litellm import acompletion
+
             call_kwargs = acompletion.call_args.kwargs
             assert call_kwargs["api_key"] == "sk-test"
 
     @pytest.mark.asyncio
     async def test_api_key_omitted_when_none(self):
         llm = LiteLLM()
-        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock:
+        with patch(
+            "litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()
+        ) as mock:
             await llm.chat([{"role": "user", "content": "test"}])
             assert "api_key" not in mock.call_args.kwargs
 
     @pytest.mark.asyncio
     async def test_api_base_forwarded(self):
         llm = LiteLLM(api_base="http://proxy:4000")
-        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock:
+        with patch(
+            "litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()
+        ) as mock:
             await llm.chat([{"role": "user", "content": "test"}])
             assert mock.call_args.kwargs["api_base"] == "http://proxy:4000"
 
     @pytest.mark.asyncio
     async def test_temperature_forwarded(self):
         llm = LiteLLM()
-        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock:
+        with patch(
+            "litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()
+        ) as mock:
             await llm.chat([{"role": "user", "content": "test"}], temperature=0.5)
             assert mock.call_args.kwargs["temperature"] == 0.5
 
     @pytest.mark.asyncio
     async def test_default_temperature_used(self):
         llm = LiteLLM(default_temperature=0.3)
-        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()) as mock:
+        with patch(
+            "litellm.acompletion", new_callable=AsyncMock, return_value=_mock_response()
+        ) as mock:
             await llm.chat([{"role": "user", "content": "test"}])
             assert mock.call_args.kwargs["temperature"] == 0.3
 
@@ -127,7 +141,11 @@ class TestLiteLLMToolCalling:
     @pytest.mark.asyncio
     async def test_tool_call_returned(self):
         llm = LiteLLM()
-        with patch("litellm.acompletion", new_callable=AsyncMock, return_value=_mock_tool_response()):
+        with patch(
+            "litellm.acompletion",
+            new_callable=AsyncMock,
+            return_value=_mock_tool_response(),
+        ):
             result = await llm.chat(
                 [{"role": "user", "content": "Weather?"}],
                 tools=[{"type": "function", "function": {"name": "get_weather"}}],
@@ -140,40 +158,60 @@ class TestLiteLLMErrors:
     @pytest.mark.asyncio
     async def test_timeout_raises_llm_timeout_error(self):
         import litellm as _litellm
+
         llm = LiteLLM()
-        with patch("litellm.acompletion", new_callable=AsyncMock,
-                   side_effect=_litellm.Timeout(
-                       message="Request timed out", model="gpt-4o", llm_provider="openai")):
+        with patch(
+            "litellm.acompletion",
+            new_callable=AsyncMock,
+            side_effect=_litellm.Timeout(
+                message="Request timed out", model="gpt-4o", llm_provider="openai"
+            ),
+        ):
             with pytest.raises(LLMTimeoutError):
                 await llm.chat([{"role": "user", "content": "test"}])
 
     @pytest.mark.asyncio
     async def test_rate_limit_raises_retryable_error(self):
         import litellm as _litellm
+
         llm = LiteLLM()
-        with patch("litellm.acompletion", new_callable=AsyncMock,
-                   side_effect=_litellm.RateLimitError(
-                       message="429", llm_provider="openai", model="gpt-4o")):
+        with patch(
+            "litellm.acompletion",
+            new_callable=AsyncMock,
+            side_effect=_litellm.RateLimitError(
+                message="429", llm_provider="openai", model="gpt-4o"
+            ),
+        ):
             with pytest.raises(LLMRetryableError):
                 await llm.chat([{"role": "user", "content": "test"}])
 
     @pytest.mark.asyncio
     async def test_connection_error_raises_retryable_error(self):
         import litellm as _litellm
+
         llm = LiteLLM()
-        with patch("litellm.acompletion", new_callable=AsyncMock,
-                   side_effect=_litellm.APIConnectionError(
-                       message="Connection failed", llm_provider="openai", model="gpt-4o")):
+        with patch(
+            "litellm.acompletion",
+            new_callable=AsyncMock,
+            side_effect=_litellm.APIConnectionError(
+                message="Connection failed", llm_provider="openai", model="gpt-4o"
+            ),
+        ):
             with pytest.raises(LLMRetryableError):
                 await llm.chat([{"role": "user", "content": "test"}])
 
     @pytest.mark.asyncio
     async def test_auth_error_propagates(self):
         import litellm as _litellm
+
         llm = LiteLLM()
-        with patch("litellm.acompletion", new_callable=AsyncMock,
-                   side_effect=_litellm.AuthenticationError(
-                       message="Invalid key", llm_provider="openai", model="gpt-4o")):
+        with patch(
+            "litellm.acompletion",
+            new_callable=AsyncMock,
+            side_effect=_litellm.AuthenticationError(
+                message="Invalid key", llm_provider="openai", model="gpt-4o"
+            ),
+        ):
             with pytest.raises(_litellm.AuthenticationError):
                 await llm.chat([{"role": "user", "content": "test"}])