diff --git a/docs/features/tool_calling.md b/docs/features/tool_calling.md
index 9c60255d6928..ee543c47d4af 100644
--- a/docs/features/tool_calling.md
+++ b/docs/features/tool_calling.md
@@ -324,6 +324,17 @@ Supported models:
Flags: `--tool-call-parser minimax --chat-template examples/tool_chat_template_minimax_m1.jinja`
+### Nemotron Nano v2 Models (`nemotron_nano_v2`)
+
+Supported models:
+
+* `nvidia/NVIDIA-Nemotron-Nano-9B-v2` (and FP8/NVFP4 variants; use with [examples/tool_chat_template_nemotron_nano_v2.jinja](../../examples/tool_chat_template_nemotron_nano_v2.jinja))
+* `nvidia/NVIDIA-Nemotron-Nano-12B-v2` (and FP8/NVFP4 variants; use with [examples/tool_chat_template_nemotron_nano_v2.jinja](../../examples/tool_chat_template_nemotron_nano_v2.jinja))
+
+The parser handles the `[{"name": ..., "arguments": ...}, ...]` envelope emitted by the Nemotron chat template, and works with the model's hybrid thinking mode: any `...` prefix is preserved as message content (or stripped by a reasoning parser if one is configured).
+
+Flags: `--tool-call-parser nemotron_nano_v2 --chat-template examples/tool_chat_template_nemotron_nano_v2.jinja`
+
### DeepSeek-V3 Models (`deepseek_v3`)
Supported models:
@@ -510,8 +521,8 @@ Here is a summary of a plugin file:
# in --tool-call-parser. you can define as many
# tool parsers as you want here.
class ExampleToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools=None):
+ super().__init__(tokenizer, tools)
# adjust request. e.g.: set skip special tokens
# to False for tool call output.
diff --git a/examples/tool_chat_template_nemotron_nano_v2.jinja b/examples/tool_chat_template_nemotron_nano_v2.jinja
new file mode 100644
index 000000000000..5bb9478d9974
--- /dev/null
+++ b/examples/tool_chat_template_nemotron_nano_v2.jinja
@@ -0,0 +1,137 @@
+{%- set ns = namespace(enable_thinking=true) -%}
+
+{%- for message in messages -%}
+ {%- set content = message['content'] -%}
+ {%- if message['role'] == 'user' or message['role'] == 'system' -%}
+ {%- if '/think' in content -%}
+ {%- set ns.enable_thinking = true -%}
+ {%- elif '/no_think' in content -%}
+ {%- set ns.enable_thinking = false -%}
+ {%- endif -%}
+ {%- endif -%}
+{%- endfor -%}
+
+{%- if messages[0]['role'] != 'system' -%}
+ {%- set ns.non_tool_system_content = '' -%}
+ {{- 'System\n' -}}
+{%- else -%}
+ {%- set ns.non_tool_system_content = messages[0]['content']
+ .replace('/think', '')
+ .replace('/no_think', '')
+ .strip()
+ -%}
+ {{- 'System\n' + ns.non_tool_system_content }}
+{%- endif -%}
+
+{%- if tools -%}
+ {%- if ns.non_tool_system_content is defined
+ and ns.non_tool_system_content != '' -%}
+ {{- '\n\n' -}}
+ {%- endif -%}
+
+ {{- 'You can use the following tools to assist the user if required:' -}}
+ {{- '\n[' -}}
+ {%- for tool in tools -%}
+ {{- (tool.function if tool.function is defined else tool) | tojson -}}
+ {{- ', ' if not loop.last else '' -}}
+ {%- endfor -%}
+ {{- ']\n\n' -}}
+
+ {{- 'If you decide to call any tool(s), use the following format:\n' -}}
+ {{- '[{{"name": "tool_name1", "arguments": "tool_args1"}}, ' -}}
+ {{- '{{"name": "tool_name2", "arguments": "tool_args2"}}]\n\n' -}}
+
+ {{- 'The user will execute tool-calls and return responses from tool(s) in this format:\n' -}}
+ {{- '[{{"tool_response1"}}, {{"tool_response2"}}]\n\n' -}}
+
+ {{- 'Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user.' -}}
+{%- endif -%}
+
+{{- '\n' -}}
+
+{%- set messages = messages[1:] if messages[0]['role'] == 'system' else messages -%}
+
+{%- if messages[-1]['role'] == 'assistant' -%}
+ {%- set ns.last_turn_assistant_content = messages[-1]['content'].strip() -%}
+ {%- set messages = messages[:-1] -%}
+{%- endif -%}
+
+{%- for message in messages -%}
+ {%- set content = message['content'] -%}
+
+ {%- if message['role'] == 'user' -%}
+ {{- 'User\n' + content.replace('/think', '').replace('/no_think', '').strip() + '\n' }}
+
+ {%- elif message['role'] == 'tool' -%}
+ {%- if loop.first or (messages[loop.index0 - 1].role != 'tool') -%}
+ {{- 'User\n' + '[' }}
+ {%- endif -%}
+ {{- message['content'] -}}
+ {{- ', ' if not loop.last and (messages[loop.index0 + 1].role == 'tool') else '' -}}
+ {%- if loop.last or (messages[loop.index0 + 1].role != 'tool') -%}
+ {{- ']\n' -}}
+ {%- endif -%}
+
+ {%- elif message['role'] == 'assistant' -%}
+ {%- if '' in content -%}
+ {%- set content = content.split('')[1].strip() -%}
+ {%- endif -%}
+
+ {{- 'Assistant\n' + content.strip() }}
+
+ {%- if message.tool_calls -%}
+ {%- if content.strip() != '' -%}
+ {{- '\n\n' -}}
+ {%- endif -%}
+ {{- '[' -}}
+ {%- for call in message.tool_calls -%}
+ {%- set fn = call.function if call.function is defined else call -%}
+ {{- '{"name": ' -}}
+ {{- fn.name | tojson -}}
+ {{- ', "arguments": ' -}}
+ {%- if fn.arguments is string -%}
+ {{- fn.arguments -}}
+ {%- else -%}
+ {{- fn.arguments | tojson -}}
+ {%- endif -%}
+ {{- '}' + (', ' if not loop.last else '') -}}
+ {%- endfor -%}
+ {{- ']' -}}
+ {%- endif -%}
+
+ {{- '\n\n' -}}
+ {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt -%}
+ {{- 'Assistant\n' -}}
+ {%- if ns.enable_thinking is defined and ns.enable_thinking is false -%}
+ {{- '' -}}
+ {%- else -%}
+ {{- '\n' -}}
+ {%- endif -%}
+ {%- if ns.last_turn_assistant_content is defined
+ and ns.last_turn_assistant_content != '' -%}
+ {{- ns.last_turn_assistant_content -}}
+ {%- endif -%}
+
+{%- else -%}
+ {%- if ns.last_turn_assistant_content is defined
+ and ns.last_turn_assistant_content != '' -%}
+ {{- 'Assistant\n' -}}
+ {%- if ns.enable_thinking is defined and ns.enable_thinking is false -%}
+ {{- '' -}}
+ {%- else -%}
+ {{- '\n' -}}
+ {%- endif -%}
+ {{- ns.last_turn_assistant_content -}}
+
+ {%- if continue_final_message is defined -%}
+ {%- if continue_final_message is false -%}
+ {{- '\n\n' -}}
+ {%- endif -%}
+ {%- else -%}
+ {{- '\n\n' -}}
+ {%- endif -%}
+ {%- endif -%}
+{%- endif -%}
diff --git a/tests/renderers/test_nemotron_nano_v2_chat_template.py b/tests/renderers/test_nemotron_nano_v2_chat_template.py
new file mode 100644
index 000000000000..4b4ce37ca035
--- /dev/null
+++ b/tests/renderers/test_nemotron_nano_v2_chat_template.py
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+from pathlib import Path
+
+import jinja2.sandbox
+
+TEMPLATE_PATH = (
+ Path(__file__).resolve().parent.parent.parent
+ / "examples"
+ / "tool_chat_template_nemotron_nano_v2.jinja"
+)
+
+
+def test_tool_call_name_is_json_escaped():
+ template = jinja2.sandbox.ImmutableSandboxedEnvironment().from_string(
+ TEMPLATE_PATH.read_text()
+ )
+ tool_name = 'search"quoted\\name'
+ rendered = template.render(
+ messages=[
+ {"role": "user", "content": "Search docs"},
+ {
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [
+ {
+ "function": {
+ "name": tool_name,
+ "arguments": {"query": "vllm"},
+ },
+ }
+ ],
+ },
+ {"role": "tool", "content": '{"result": "ok"}'},
+ ],
+ add_generation_prompt=False,
+ )
+
+ payload = rendered.split("", 1)[1].split("", 1)[0]
+ tool_calls = json.loads(payload)
+
+ assert tool_calls[0]["name"] == tool_name
+ assert tool_calls[0]["arguments"] == {"query": "vllm"}
diff --git a/tests/tool_parsers/test_nemotron_nano_v2_tool_parser.py b/tests/tool_parsers/test_nemotron_nano_v2_tool_parser.py
new file mode 100644
index 000000000000..f693ef6aa941
--- /dev/null
+++ b/tests/tool_parsers/test_nemotron_nano_v2_tool_parser.py
@@ -0,0 +1,250 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from tests.tool_parsers.utils import run_tool_extraction
+from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+from vllm.entrypoints.openai.engine.protocol import ExtractedToolCallInformation
+from vllm.tool_parsers import ToolParserManager
+
+
+@pytest.fixture
+def mock_tokenizer():
+ tokenizer = MagicMock()
+ tokenizer.get_vocab.return_value = {}
+ tokenizer.tokenize.side_effect = lambda text: list(text)
+ return tokenizer
+
+
+@pytest.fixture
+def mock_request():
+ request = MagicMock(spec=ChatCompletionRequest)
+ request.tools = []
+ request.tool_choice = "auto"
+ return request
+
+
+@pytest.fixture
+def parser(mock_tokenizer):
+ parser_cls = ToolParserManager.get_tool_parser("nemotron_nano_v2")
+ return parser_cls(mock_tokenizer, tools=[])
+
+
+def test_nemotron_nano_v2_registered_and_accepts_tools(mock_tokenizer):
+ parser_cls = ToolParserManager.get_tool_parser("nemotron_nano_v2")
+
+ parser = parser_cls(mock_tokenizer, tools=[])
+
+ assert parser.tool_call_start_token == ""
+
+
+def test_extract_tool_calls_returns_content_without_tool_call(parser, mock_request):
+ model_output = "No tool call here."
+
+ result = parser.extract_tool_calls(model_output, mock_request)
+
+ assert isinstance(result, ExtractedToolCallInformation)
+ assert result.tools_called is False
+ assert result.tool_calls == []
+ assert result.content == model_output
+
+
+def test_extract_tool_calls_from_nemotron_array(parser, mock_request):
+ model_output = (
+ "Let me check that."
+ '[{"name": "get_weather", '
+ '"arguments": {"city": "Tokyo", "unit": "celsius"}}]'
+ )
+
+ result = parser.extract_tool_calls(model_output, mock_request)
+
+ assert result.tools_called is True
+ assert result.content == "Let me check that."
+ assert len(result.tool_calls) == 1
+ assert result.tool_calls[0].type == "function"
+ assert result.tool_calls[0].function.name == "get_weather"
+ assert result.tool_calls[0].function.arguments == (
+ '{"city": "Tokyo", "unit": "celsius"}'
+ )
+
+
+def test_extract_tool_calls_wraps_single_object(parser, mock_request):
+ model_output = (
+ '{"name": "lookup", "arguments": {"query": "vllm"}}'
+ )
+
+ result = parser.extract_tool_calls(model_output, mock_request)
+
+ assert result.tools_called is True
+ assert len(result.tool_calls) == 1
+ assert result.tool_calls[0].function.name == "lookup"
+ assert result.tool_calls[0].function.arguments == '{"query": "vllm"}'
+
+
+def test_extract_tool_calls_supports_string_arguments(parser, mock_request):
+ model_output = (
+ '[{"name": "run_query", '
+ '"arguments": "{\\"sql\\": \\"select 1\\"}"}]'
+ )
+
+ result = parser.extract_tool_calls(model_output, mock_request)
+
+ assert result.tools_called is True
+ assert result.tool_calls[0].function.name == "run_query"
+ assert result.tool_calls[0].function.arguments == '{"sql": "select 1"}'
+
+
+def test_extract_tool_calls_returns_original_for_malformed(parser, mock_request):
+ model_output = '[{"name": "broken", "arguments": {}'
+
+ result = parser.extract_tool_calls(model_output, mock_request)
+
+ assert result.tools_called is False
+ assert result.tool_calls == []
+ assert result.content == model_output
+
+
+def test_streaming_reconstructs_tool_call(parser, mock_request):
+ model_output = (
+ "Let me check."
+ '[{"name": "get_weather", '
+ '"arguments": {"city": "Tokyo", "unit": "celsius"}}]'
+ )
+
+ content, tool_calls = run_tool_extraction(
+ parser,
+ list(model_output),
+ request=mock_request,
+ streaming=True,
+ )
+
+ assert content == "Let me check."
+ assert len(tool_calls) == 1
+ assert tool_calls[0].function.name == "get_weather"
+ assert tool_calls[0].function.arguments == ('{"city": "Tokyo", "unit": "celsius"}')
+
+
+def test_streaming_handles_nested_json_arguments(parser, mock_request):
+ model_output = (
+ '[{"name": "search", '
+ '"arguments": {"filters": {"city": "Tokyo"}, '
+ '"items": [{"name": "rain", "value": true}]}}]'
+ )
+
+ content, tool_calls = run_tool_extraction(
+ parser,
+ list(model_output),
+ request=mock_request,
+ streaming=True,
+ )
+
+ assert content is None
+ assert len(tool_calls) == 1
+ assert tool_calls[0].function.name == "search"
+ assert tool_calls[0].function.arguments == (
+ '{"filters": {"city": "Tokyo"}, "items": [{"name": "rain", "value": true}]}'
+ )
+
+
+def test_extract_tool_calls_keeps_think_block_as_content(parser, mock_request):
+ model_output = (
+ "\nI need the weather for Tokyo.\n\n"
+ '[{"name": "get_weather", "arguments": {"city": "Tokyo"}}]'
+ )
+
+ result = parser.extract_tool_calls(model_output, mock_request)
+
+ assert result.tools_called is True
+ assert result.content == "\nI need the weather for Tokyo.\n\n"
+ assert result.tool_calls[0].function.name == "get_weather"
+ assert result.tool_calls[0].function.arguments == '{"city": "Tokyo"}'
+
+
+def test_streaming_keeps_think_block_as_content(parser, mock_request):
+ model_output = (
+ "\nI need the weather for Tokyo.\n\n"
+ '[{"name": "get_weather", "arguments": {"city": "Tokyo"}}]'
+ )
+
+ content, tool_calls = run_tool_extraction(
+ parser,
+ list(model_output),
+ request=mock_request,
+ streaming=True,
+ )
+
+ assert content == "\nI need the weather for Tokyo.\n\n"
+ assert len(tool_calls) == 1
+ assert tool_calls[0].function.name == "get_weather"
+ assert tool_calls[0].function.arguments == '{"city": "Tokyo"}'
+
+
+def test_streaming_handles_multiple_tool_calls(parser, mock_request):
+ model_output = (
+ '[{"name": "get_weather", '
+ '"arguments": {"city": "Tokyo"}}, '
+ '{"name": "lookup_timezone", '
+ '"arguments": {"city": "Tokyo"}}]'
+ )
+
+ content, tool_calls = run_tool_extraction(
+ parser,
+ list(model_output),
+ request=mock_request,
+ streaming=True,
+ assert_one_tool_per_delta=False,
+ )
+
+ assert content is None
+ assert len(tool_calls) == 2
+ assert tool_calls[0].function.name == "get_weather"
+ assert tool_calls[0].function.arguments == '{"city": "Tokyo"}'
+ assert tool_calls[1].function.name == "lookup_timezone"
+ assert tool_calls[1].function.arguments == '{"city": "Tokyo"}'
+
+
+def test_streaming_single_delta_handles_content_and_tool_call(parser, mock_request):
+ model_output = (
+ "Let me check."
+ '[{"name": "get_weather", '
+ '"arguments": {"city": "Tokyo"}}]'
+ )
+
+ content, tool_calls = run_tool_extraction(
+ parser,
+ [model_output],
+ request=mock_request,
+ streaming=True,
+ )
+
+ assert content == "Let me check."
+ assert len(tool_calls) == 1
+ assert tool_calls[0].function.name == "get_weather"
+ assert tool_calls[0].function.arguments == '{"city": "Tokyo"}'
+
+
+def test_streaming_single_delta_handles_multiple_tool_calls(parser, mock_request):
+ model_output = (
+ '[{"name": "get_weather", '
+ '"arguments": {"city": "Tokyo"}}, '
+ '{"name": "lookup_timezone", '
+ '"arguments": {"city": "Tokyo"}}]'
+ )
+
+ content, tool_calls = run_tool_extraction(
+ parser,
+ [model_output],
+ request=mock_request,
+ streaming=True,
+ assert_one_tool_per_delta=False,
+ )
+
+ assert content is None
+ assert len(tool_calls) == 2
+ assert tool_calls[0].function.name == "get_weather"
+ assert tool_calls[0].function.arguments == '{"city": "Tokyo"}'
+ assert tool_calls[1].function.name == "lookup_timezone"
+ assert tool_calls[1].function.arguments == '{"city": "Tokyo"}'
diff --git a/vllm/tool_parsers/__init__.py b/vllm/tool_parsers/__init__.py
index 7c5f45d2022e..45f026888894 100644
--- a/vllm/tool_parsers/__init__.py
+++ b/vllm/tool_parsers/__init__.py
@@ -130,6 +130,10 @@
"minimax_tool_parser",
"MinimaxToolParser",
),
+ "nemotron_nano_v2": (
+ "nemotron_nano_v2_tool_parser",
+ "NemotronNanoV2ToolParser",
+ ),
"mistral": (
"mistral_tool_parser",
"MistralToolParser",
diff --git a/vllm/tool_parsers/nemotron_nano_v2_tool_parser.py b/vllm/tool_parsers/nemotron_nano_v2_tool_parser.py
new file mode 100644
index 000000000000..0e1221228f24
--- /dev/null
+++ b/vllm/tool_parsers/nemotron_nano_v2_tool_parser.py
@@ -0,0 +1,317 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+from collections.abc import Sequence
+from typing import Any
+
+import partial_json_parser
+import regex as re
+from partial_json_parser.core.options import Allow
+
+from vllm.entrypoints.chat_utils import make_tool_call_id
+from vllm.entrypoints.openai.chat_completion.protocol import (
+ ChatCompletionRequest,
+)
+from vllm.entrypoints.openai.engine.protocol import (
+ DeltaFunctionCall,
+ DeltaMessage,
+ DeltaToolCall,
+ ExtractedToolCallInformation,
+ FunctionCall,
+ ToolCall,
+)
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+from vllm.logger import init_logger
+from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
+from vllm.tool_parsers.utils import partial_tag_overlap
+
+logger = init_logger(__name__)
+
+
+class NemotronNanoV2ToolParser(ToolParser):
+ """Tool parser for Nemotron Nano v2 models that emit JSON."""
+
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
+
+ self.tool_call_start_token = ""
+ self.tool_call_end_token = ""
+ self.tool_call_regex = re.compile(
+ rf"{self.tool_call_start_token}(.*?){self.tool_call_end_token}",
+ re.DOTALL,
+ )
+ self._sent_content_idx = 0
+ self._tool_args_emitted: list[bool] = []
+
+ def adjust_request(
+ self, request: ChatCompletionRequest | ResponsesRequest
+ ) -> ChatCompletionRequest | ResponsesRequest:
+ request = super().adjust_request(request)
+ if request.tools and request.tool_choice != "none":
+ request.skip_special_tokens = False
+ return request
+
+ @staticmethod
+ def _normalize_tool_call_payload(payload: str) -> list[dict[str, Any]]:
+ payload = payload.strip()
+ if not payload.startswith("["):
+ payload = "[" + payload
+ if not payload.endswith("]"):
+ payload = payload + "]"
+
+ parsed = json.loads(payload)
+ if isinstance(parsed, dict):
+ return [parsed]
+ if isinstance(parsed, list):
+ return [item for item in parsed if isinstance(item, dict)]
+ return []
+
+ @staticmethod
+ def _serialize_arguments(arguments: Any) -> str:
+ if isinstance(arguments, str):
+ return arguments
+ return json.dumps(arguments, ensure_ascii=False)
+
+ @staticmethod
+ def _strip_trailing_auto_closers(chunk: str) -> str:
+ idx = len(chunk)
+ while idx > 0 and chunk[idx - 1] in " \t\r\n}]":
+ idx -= 1
+ while idx > 0 and chunk[idx - 1] == '"':
+ if idx - 2 >= 0 and chunk[idx - 2] == "\\":
+ break
+ idx -= 1
+ return chunk[:idx]
+
+ @staticmethod
+ def _common_prefix_len(left: str, right: str) -> int:
+ max_len = min(len(left), len(right))
+ idx = 0
+ while idx < max_len and left[idx] == right[idx]:
+ idx += 1
+ return idx
+
+ def _compute_arguments_delta(self, arguments: Any, end_of_call: bool) -> str:
+ if self.current_tool_id < 0:
+ return ""
+
+ while len(self.streamed_args_for_tool) <= self.current_tool_id:
+ self.streamed_args_for_tool.append("")
+ while len(self._tool_args_emitted) <= self.current_tool_id:
+ self._tool_args_emitted.append(False)
+
+ cur_arguments = self._serialize_arguments(arguments)
+ streamed_prefix = self.streamed_args_for_tool[self.current_tool_id]
+ emitted_any = self._tool_args_emitted[self.current_tool_id]
+
+ lcp_len = self._common_prefix_len(cur_arguments, streamed_prefix)
+ if lcp_len != len(streamed_prefix):
+ streamed_prefix = streamed_prefix[:lcp_len]
+ self.streamed_args_for_tool[self.current_tool_id] = streamed_prefix
+
+ arguments_delta = cur_arguments[lcp_len:]
+ if not arguments_delta:
+ return ""
+
+ if not end_of_call:
+ arguments_delta = self._strip_trailing_auto_closers(arguments_delta)
+
+ if (
+ not emitted_any
+ and not end_of_call
+ and arguments_delta
+ and arguments_delta.endswith("}")
+ ):
+ arguments_delta = arguments_delta[:-1]
+ if arguments_delta.endswith('"'):
+ arguments_delta = arguments_delta[:-1]
+
+ return arguments_delta
+
+ def extract_tool_calls(
+ self,
+ model_output: str,
+ request: ChatCompletionRequest,
+ ) -> ExtractedToolCallInformation:
+ if self.tool_call_start_token not in model_output:
+ return ExtractedToolCallInformation(
+ tools_called=False,
+ tool_calls=[],
+ content=model_output,
+ )
+
+ try:
+ payloads = self.tool_call_regex.findall(model_output)
+ tool_calls: list[ToolCall] = []
+ for payload in payloads:
+ for raw_tool_call in self._normalize_tool_call_payload(payload):
+ try:
+ tool_calls.append(
+ ToolCall(
+ type="function",
+ function=FunctionCall(
+ name=raw_tool_call["name"],
+ arguments=self._serialize_arguments(
+ raw_tool_call["arguments"]
+ ),
+ ),
+ )
+ )
+ except Exception:
+ continue
+
+ if not tool_calls:
+ return ExtractedToolCallInformation(
+ tools_called=False,
+ tool_calls=[],
+ content=model_output,
+ )
+
+ content = model_output[: model_output.find(self.tool_call_start_token)]
+ return ExtractedToolCallInformation(
+ tools_called=True,
+ tool_calls=tool_calls,
+ content=content if content else None,
+ )
+ except Exception:
+ logger.exception("Error extracting tool call from response.")
+ return ExtractedToolCallInformation(
+ tools_called=False,
+ tool_calls=[],
+ content=model_output,
+ )
+
+ def extract_tool_calls_streaming(
+ self,
+ previous_text: str,
+ current_text: str,
+ delta_text: str,
+ previous_token_ids: Sequence[int],
+ current_token_ids: Sequence[int],
+ delta_token_ids: Sequence[int],
+ request: ChatCompletionRequest,
+ ) -> DeltaMessage | None:
+ if not previous_text:
+ self.current_tool_id = -1
+ self.current_tool_name_sent = False
+ self.streamed_args_for_tool = []
+ self._tool_args_emitted = []
+ self._sent_content_idx = 0
+
+ start_idx = current_text.find(self.tool_call_start_token)
+ if start_idx == -1:
+ overlap = partial_tag_overlap(current_text, self.tool_call_start_token)
+ sendable_idx = len(current_text) - overlap
+ if sendable_idx > self._sent_content_idx:
+ content = current_text[self._sent_content_idx : sendable_idx]
+ self._sent_content_idx = sendable_idx
+ return DeltaMessage(content=content)
+ return None
+
+ content_delta: str | None = None
+ if self._sent_content_idx < start_idx:
+ content_delta = current_text[self._sent_content_idx : start_idx]
+ self._sent_content_idx = start_idx
+
+ payload_start = start_idx + len(self.tool_call_start_token)
+ payload_end = current_text.find(self.tool_call_end_token, payload_start)
+ end_of_call = payload_end != -1
+ payload = current_text[
+ payload_start : payload_end if end_of_call else len(current_text)
+ ]
+ if not payload.strip():
+ return None
+
+ flags = Allow.ALL if self.current_tool_name_sent else Allow.ALL & ~Allow.STR
+ try:
+ parsed_tool_calls = partial_json_parser.loads(payload, flags)
+ except (
+ partial_json_parser.core.exceptions.MalformedJSON,
+ json.JSONDecodeError,
+ ValueError,
+ ):
+ return None
+
+ if isinstance(parsed_tool_calls, dict):
+ parsed_tool_calls = [parsed_tool_calls]
+ if not isinstance(parsed_tool_calls, list) or not parsed_tool_calls:
+ return None
+
+ if self.current_tool_id < 0:
+ self.current_tool_id = 0
+ self.current_tool_name_sent = False
+ self.streamed_args_for_tool.append("")
+ self._tool_args_emitted.append(False)
+
+ tool_call_deltas: list[DeltaToolCall] = []
+ while self.current_tool_id < len(parsed_tool_calls):
+ current_tool_call = parsed_tool_calls[self.current_tool_id]
+ if not isinstance(current_tool_call, dict):
+ break
+
+ call_complete = end_of_call or self.current_tool_id + 1 < len(
+ parsed_tool_calls
+ )
+
+ if not self.current_tool_name_sent:
+ function_name = current_tool_call.get("name")
+ if not function_name:
+ break
+
+ arguments_delta = ""
+ if "arguments" in current_tool_call:
+ arguments_delta = self._compute_arguments_delta(
+ current_tool_call["arguments"], call_complete
+ )
+ if arguments_delta:
+ self.streamed_args_for_tool[self.current_tool_id] += (
+ arguments_delta
+ )
+ self._tool_args_emitted[self.current_tool_id] = True
+
+ self.current_tool_name_sent = True
+ tool_call_deltas.append(
+ DeltaToolCall(
+ index=self.current_tool_id,
+ id=make_tool_call_id(),
+ type="function",
+ function=DeltaFunctionCall(
+ name=function_name,
+ arguments=arguments_delta or None,
+ ),
+ )
+ )
+ elif "arguments" in current_tool_call:
+ arguments_delta = self._compute_arguments_delta(
+ current_tool_call["arguments"], call_complete
+ )
+ if arguments_delta:
+ self.streamed_args_for_tool[self.current_tool_id] += arguments_delta
+ self._tool_args_emitted[self.current_tool_id] = True
+ tool_call_deltas.append(
+ DeltaToolCall(
+ index=self.current_tool_id,
+ function=DeltaFunctionCall(arguments=arguments_delta),
+ )
+ )
+ elif not call_complete:
+ break
+
+ if self.current_tool_id + 1 >= len(parsed_tool_calls):
+ break
+
+ self.current_tool_id += 1
+ self.current_tool_name_sent = False
+ while len(self.streamed_args_for_tool) <= self.current_tool_id:
+ self.streamed_args_for_tool.append("")
+ while len(self._tool_args_emitted) <= self.current_tool_id:
+ self._tool_args_emitted.append(False)
+
+ if content_delta is not None or tool_call_deltas:
+ return DeltaMessage(
+ content=content_delta,
+ tool_calls=tool_call_deltas or None,
+ )
+ return None