diff --git a/docs/features/tool_calling.md b/docs/features/tool_calling.md
index 9c60255d6928..ee543c47d4af 100644
--- a/docs/features/tool_calling.md
+++ b/docs/features/tool_calling.md
@@ -324,6 +324,17 @@ Supported models:
 
 Flags: `--tool-call-parser minimax --chat-template examples/tool_chat_template_minimax_m1.jinja`
 
+### Nemotron Nano v2 Models (`nemotron_nano_v2`)
+
+Supported models:
+
+* `nvidia/NVIDIA-Nemotron-Nano-9B-v2` (and FP8/NVFP4 variants; use with [examples/tool_chat_template_nemotron_nano_v2.jinja](../../examples/tool_chat_template_nemotron_nano_v2.jinja))
+* `nvidia/NVIDIA-Nemotron-Nano-12B-v2` (and FP8/NVFP4 variants; use with [examples/tool_chat_template_nemotron_nano_v2.jinja](../../examples/tool_chat_template_nemotron_nano_v2.jinja))
+
+The parser handles the `<TOOLCALL>[{"name": ..., "arguments": ...}, ...]</TOOLCALL>` envelope emitted by the Nemotron chat template, and works with the model's hybrid thinking mode: any `<think>...</think>` prefix is preserved as message content (or stripped by a reasoning parser if one is configured).
+
+Flags: `--tool-call-parser nemotron_nano_v2 --chat-template examples/tool_chat_template_nemotron_nano_v2.jinja`
+
 ### DeepSeek-V3 Models (`deepseek_v3`)
 
 Supported models:
@@ -510,8 +521,8 @@ Here is a summary of a plugin file:
     # in --tool-call-parser. you can define as many
     # tool parsers as you want here.
     class ExampleToolParser(ToolParser):
-        def __init__(self, tokenizer: TokenizerLike):
-            super().__init__(tokenizer)
+        def __init__(self, tokenizer: TokenizerLike, tools=None):
+            super().__init__(tokenizer, tools)
 
         # adjust request. e.g.: set skip special tokens
         # to False for tool call output.
diff --git a/examples/tool_chat_template_nemotron_nano_v2.jinja b/examples/tool_chat_template_nemotron_nano_v2.jinja
new file mode 100644
index 000000000000..5bb9478d9974
--- /dev/null
+++ b/examples/tool_chat_template_nemotron_nano_v2.jinja
@@ -0,0 +1,137 @@
+{%- set ns = namespace(enable_thinking=true) -%}
+
+{%- for message in messages -%}
+    {%- set content = message['content'] -%}
+    {%- if message['role'] == 'user' or message['role'] == 'system' -%}
+        {%- if '/think' in content -%}
+            {%- set ns.enable_thinking = true -%}
+        {%- elif '/no_think' in content -%}
+            {%- set ns.enable_thinking = false -%}
+        {%- endif -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- if messages[0]['role'] != 'system' -%}
+    {%- set ns.non_tool_system_content = '' -%}
+    {{- '<SPECIAL_10>System\n' -}}
+{%- else -%}
+    {%- set ns.non_tool_system_content = messages[0]['content']
+        .replace('/think', '')
+        .replace('/no_think', '')
+        .strip()
+    -%}
+    {{- '<SPECIAL_10>System\n' + ns.non_tool_system_content }}
+{%- endif -%}
+
+{%- if tools -%}
+    {%- if ns.non_tool_system_content is defined
+        and ns.non_tool_system_content != '' -%}
+        {{- '\n\n' -}}
+    {%- endif -%}
+
+    {{- 'You can use the following tools to assist the user if required:' -}}
+    {{- '\n<AVAILABLE_TOOLS>[' -}}
+    {%- for tool in tools -%}
+        {{- (tool.function if tool.function is defined else tool) | tojson -}}
+        {{- ', ' if not loop.last else '' -}}
+    {%- endfor -%}
+    {{- ']</AVAILABLE_TOOLS>\n\n' -}}
+
+    {{- 'If you decide to call any tool(s), use the following format:\n' -}}
+    {{- '<TOOLCALL>[{{"name": "tool_name1", "arguments": "tool_args1"}}, ' -}}
+    {{- '{{"name": "tool_name2", "arguments": "tool_args2"}}]</TOOLCALL>\n\n' -}}
+
+    {{- 'The user will execute tool-calls and return responses from tool(s) in this format:\n' -}}
+    {{- '<TOOL_RESPONSE>[{{"tool_response1"}}, {{"tool_response2"}}]</TOOL_RESPONSE>\n\n' -}}
+
+    {{- 'Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user.' -}}
+{%- endif -%}
+
+{{- '\n' -}}
+
+{%- set messages = messages[1:] if messages[0]['role'] == 'system' else messages -%}
+
+{%- if messages[-1]['role'] == 'assistant' -%}
+    {%- set ns.last_turn_assistant_content = messages[-1]['content'].strip() -%}
+    {%- set messages = messages[:-1] -%}
+{%- endif -%}
+
+{%- for message in messages -%}
+    {%- set content = message['content'] -%}
+
+    {%- if message['role'] == 'user' -%}
+        {{- '<SPECIAL_11>User\n' + content.replace('/think', '').replace('/no_think', '').strip() + '\n' }}
+
+    {%- elif message['role'] == 'tool' -%}
+        {%- if loop.first or (messages[loop.index0 - 1].role != 'tool') -%}
+            {{- '<SPECIAL_11>User\n' + '<TOOL_RESPONSE>[' }}
+        {%- endif -%}
+        {{- message['content'] -}}
+        {{- ', ' if not loop.last and (messages[loop.index0 + 1].role == 'tool') else '' -}}
+        {%- if loop.last or (messages[loop.index0 + 1].role != 'tool') -%}
+            {{- ']</TOOL_RESPONSE>\n' -}}
+        {%- endif -%}
+
+    {%- elif message['role'] == 'assistant' -%}
+        {%- if '</think>' in content -%}
+            {%- set content = content.split('</think>')[1].strip() -%}
+        {%- endif -%}
+
+        {{- '<SPECIAL_11>Assistant\n' + content.strip() }}
+
+        {%- if message.tool_calls -%}
+            {%- if content.strip() != '' -%}
+                {{- '\n\n' -}}
+            {%- endif -%}
+            {{- '<TOOLCALL>[' -}}
+            {%- for call in message.tool_calls -%}
+                {%- set fn = call.function if call.function is defined else call -%}
+                {{- '{"name": ' -}}
+                {{- fn.name | tojson -}}
+                {{- ', "arguments": ' -}}
+                {%- if fn.arguments is string -%}
+                    {{- fn.arguments -}}
+                {%- else -%}
+                    {{- fn.arguments | tojson -}}
+                {%- endif -%}
+                {{- '}' + (', ' if not loop.last else '') -}}
+            {%- endfor -%}
+            {{- ']</TOOLCALL>' -}}
+        {%- endif -%}
+
+        {{- '\n<SPECIAL_12>\n' -}}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt -%}
+    {{- '<SPECIAL_11>Assistant\n' -}}
+    {%- if ns.enable_thinking is defined and ns.enable_thinking is false -%}
+        {{- '<think></think>' -}}
+    {%- else -%}
+        {{- '<think>\n' -}}
+    {%- endif -%}
+    {%- if ns.last_turn_assistant_content is defined
+        and ns.last_turn_assistant_content != '' -%}
+        {{- ns.last_turn_assistant_content -}}
+    {%- endif -%}
+
+{%- else -%}
+    {%- if ns.last_turn_assistant_content is defined
+        and ns.last_turn_assistant_content != '' -%}
+        {{- '<SPECIAL_11>Assistant\n' -}}
+        {%- if ns.enable_thinking is defined and ns.enable_thinking is false -%}
+            {{- '<think></think>' -}}
+        {%- else -%}
+            {{- '<think>\n' -}}
+        {%- endif -%}
+        {{- ns.last_turn_assistant_content -}}
+
+        {%- if continue_final_message is defined -%}
+            {%- if continue_final_message is false -%}
+                {{- '\n<SPECIAL_12>\n' -}}
+            {%- endif -%}
+        {%- else -%}
+            {{- '\n<SPECIAL_12>\n' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endif -%}
diff --git a/tests/renderers/test_nemotron_nano_v2_chat_template.py b/tests/renderers/test_nemotron_nano_v2_chat_template.py
new file mode 100644
index 000000000000..4b4ce37ca035
--- /dev/null
+++ b/tests/renderers/test_nemotron_nano_v2_chat_template.py
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+from pathlib import Path
+
+import jinja2.sandbox
+
+TEMPLATE_PATH = (
+    Path(__file__).resolve().parent.parent.parent
+    / "examples"
+    / "tool_chat_template_nemotron_nano_v2.jinja"
+)
+
+
+def test_tool_call_name_is_json_escaped():
+    template = jinja2.sandbox.ImmutableSandboxedEnvironment().from_string(
+        TEMPLATE_PATH.read_text()
+    )
+    tool_name = 'search"quoted\\name'
+    rendered = template.render(
+        messages=[
+            {"role": "user", "content": "Search docs"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "function": {
+                            "name": tool_name,
+                            "arguments": {"query": "vllm"},
+                        },
+                    }
+                ],
+            },
+            {"role": "tool", "content": '{"result": "ok"}'},
+        ],
+        add_generation_prompt=False,
+    )
+
+    payload = rendered.split("<TOOLCALL>", 1)[1].split("</TOOLCALL>", 1)[0]
+    tool_calls = json.loads(payload)
+
+    assert tool_calls[0]["name"] == tool_name
+    assert tool_calls[0]["arguments"] == {"query": "vllm"}
diff --git a/tests/tool_parsers/test_nemotron_nano_v2_tool_parser.py b/tests/tool_parsers/test_nemotron_nano_v2_tool_parser.py
new file mode 100644
index 000000000000..f693ef6aa941
--- /dev/null
+++ b/tests/tool_parsers/test_nemotron_nano_v2_tool_parser.py
@@ -0,0 +1,250 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from tests.tool_parsers.utils import run_tool_extraction
+from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+from vllm.entrypoints.openai.engine.protocol import ExtractedToolCallInformation
+from vllm.tool_parsers import ToolParserManager
+
+
+@pytest.fixture
+def mock_tokenizer():
+    tokenizer = MagicMock()
+    tokenizer.get_vocab.return_value = {}
+    tokenizer.tokenize.side_effect = lambda text: list(text)
+    return tokenizer
+
+
+@pytest.fixture
+def mock_request():
+    request = MagicMock(spec=ChatCompletionRequest)
+    request.tools = []
+    request.tool_choice = "auto"
+    return request
+
+
+@pytest.fixture
+def parser(mock_tokenizer):
+    parser_cls = ToolParserManager.get_tool_parser("nemotron_nano_v2")
+    return parser_cls(mock_tokenizer, tools=[])
+
+
+def test_nemotron_nano_v2_registered_and_accepts_tools(mock_tokenizer):
+    parser_cls = ToolParserManager.get_tool_parser("nemotron_nano_v2")
+
+    parser = parser_cls(mock_tokenizer, tools=[])
+
+    assert parser.tool_call_start_token == "<TOOLCALL>"
+
+
+def test_extract_tool_calls_returns_content_without_tool_call(parser, mock_request):
+    model_output = "No tool call here."
+
+    result = parser.extract_tool_calls(model_output, mock_request)
+
+    assert isinstance(result, ExtractedToolCallInformation)
+    assert result.tools_called is False
+    assert result.tool_calls == []
+    assert result.content == model_output
+
+
+def test_extract_tool_calls_from_nemotron_array(parser, mock_request):
+    model_output = (
+        "Let me check that."
+        '<TOOLCALL>[{"name": "get_weather", '
+        '"arguments": {"city": "Tokyo", "unit": "celsius"}}]</TOOLCALL>'
+    )
+
+    result = parser.extract_tool_calls(model_output, mock_request)
+
+    assert result.tools_called is True
+    assert result.content == "Let me check that."
+    assert len(result.tool_calls) == 1
+    assert result.tool_calls[0].type == "function"
+    assert result.tool_calls[0].function.name == "get_weather"
+    assert result.tool_calls[0].function.arguments == (
+        '{"city": "Tokyo", "unit": "celsius"}'
+    )
+
+
+def test_extract_tool_calls_wraps_single_object(parser, mock_request):
+    model_output = (
+        '<TOOLCALL>{"name": "lookup", "arguments": {"query": "vllm"}}</TOOLCALL>'
+    )
+
+    result = parser.extract_tool_calls(model_output, mock_request)
+
+    assert result.tools_called is True
+    assert len(result.tool_calls) == 1
+    assert result.tool_calls[0].function.name == "lookup"
+    assert result.tool_calls[0].function.arguments == '{"query": "vllm"}'
+
+
+def test_extract_tool_calls_supports_string_arguments(parser, mock_request):
+    model_output = (
+        '<TOOLCALL>[{"name": "run_query", '
+        '"arguments": "{\\"sql\\": \\"select 1\\"}"}]</TOOLCALL>'
+    )
+
+    result = parser.extract_tool_calls(model_output, mock_request)
+
+    assert result.tools_called is True
+    assert result.tool_calls[0].function.name == "run_query"
+    assert result.tool_calls[0].function.arguments == '{"sql": "select 1"}'
+
+
+def test_extract_tool_calls_returns_original_for_malformed(parser, mock_request):
+    model_output = '<TOOLCALL>[{"name": "broken", "arguments": {}</TOOLCALL>'
+
+    result = parser.extract_tool_calls(model_output, mock_request)
+
+    assert result.tools_called is False
+    assert result.tool_calls == []
+    assert result.content == model_output
+
+
+def test_streaming_reconstructs_tool_call(parser, mock_request):
+    model_output = (
+        "Let me check."
+        '<TOOLCALL>[{"name": "get_weather", '
+        '"arguments": {"city": "Tokyo", "unit": "celsius"}}]</TOOLCALL>'
+    )
+
+    content, tool_calls = run_tool_extraction(
+        parser,
+        list(model_output),
+        request=mock_request,
+        streaming=True,
+    )
+
+    assert content == "Let me check."
+    assert len(tool_calls) == 1
+    assert tool_calls[0].function.name == "get_weather"
+    assert tool_calls[0].function.arguments == ('{"city": "Tokyo", "unit": "celsius"}')
+
+
+def test_streaming_handles_nested_json_arguments(parser, mock_request):
+    model_output = (
+        '<TOOLCALL>[{"name": "search", '
+        '"arguments": {"filters": {"city": "Tokyo"}, '
+        '"items": [{"name": "rain", "value": true}]}}]</TOOLCALL>'
+    )
+
+    content, tool_calls = run_tool_extraction(
+        parser,
+        list(model_output),
+        request=mock_request,
+        streaming=True,
+    )
+
+    assert content is None
+    assert len(tool_calls) == 1
+    assert tool_calls[0].function.name == "search"
+    assert tool_calls[0].function.arguments == (
+        '{"filters": {"city": "Tokyo"}, "items": [{"name": "rain", "value": true}]}'
+    )
+
+
+def test_extract_tool_calls_keeps_think_block_as_content(parser, mock_request):
+    model_output = (
+        "<think>\nI need the weather for Tokyo.\n</think>\n"
+        '<TOOLCALL>[{"name": "get_weather", "arguments": {"city": "Tokyo"}}]</TOOLCALL>'
+    )
+
+    result = parser.extract_tool_calls(model_output, mock_request)
+
+    assert result.tools_called is True
+    assert result.content == "<think>\nI need the weather for Tokyo.\n</think>\n"
+    assert result.tool_calls[0].function.name == "get_weather"
+    assert result.tool_calls[0].function.arguments == '{"city": "Tokyo"}'
+
+
+def test_streaming_keeps_think_block_as_content(parser, mock_request):
+    model_output = (
+        "<think>\nI need the weather for Tokyo.\n</think>\n"
+        '<TOOLCALL>[{"name": "get_weather", "arguments": {"city": "Tokyo"}}]</TOOLCALL>'
+    )
+
+    content, tool_calls = run_tool_extraction(
+        parser,
+        list(model_output),
+        request=mock_request,
+        streaming=True,
+    )
+
+    assert content == "<think>\nI need the weather for Tokyo.\n</think>\n"
+    assert len(tool_calls) == 1
+    assert tool_calls[0].function.name == "get_weather"
+    assert tool_calls[0].function.arguments == '{"city": "Tokyo"}'
+
+
+def test_streaming_handles_multiple_tool_calls(parser, mock_request):
+    model_output = (
+        '<TOOLCALL>[{"name": "get_weather", '
+        '"arguments": {"city": "Tokyo"}}, '
+        '{"name": "lookup_timezone", '
+        '"arguments": {"city": "Tokyo"}}]</TOOLCALL>'
+    )
+
+    content, tool_calls = run_tool_extraction(
+        parser,
+        list(model_output),
+        request=mock_request,
+        streaming=True,
+        assert_one_tool_per_delta=False,
+    )
+
+    assert content is None
+    assert len(tool_calls) == 2
+    assert tool_calls[0].function.name == "get_weather"
+    assert tool_calls[0].function.arguments == '{"city": "Tokyo"}'
+    assert tool_calls[1].function.name == "lookup_timezone"
+    assert tool_calls[1].function.arguments == '{"city": "Tokyo"}'
+
+
+def test_streaming_single_delta_handles_content_and_tool_call(parser, mock_request):
+    model_output = (
+        "Let me check."
+        '<TOOLCALL>[{"name": "get_weather", '
+        '"arguments": {"city": "Tokyo"}}]</TOOLCALL>'
+    )
+
+    content, tool_calls = run_tool_extraction(
+        parser,
+        [model_output],
+        request=mock_request,
+        streaming=True,
+    )
+
+    assert content == "Let me check."
+    assert len(tool_calls) == 1
+    assert tool_calls[0].function.name == "get_weather"
+    assert tool_calls[0].function.arguments == '{"city": "Tokyo"}'
+
+
+def test_streaming_single_delta_handles_multiple_tool_calls(parser, mock_request):
+    model_output = (
+        '<TOOLCALL>[{"name": "get_weather", '
+        '"arguments": {"city": "Tokyo"}}, '
+        '{"name": "lookup_timezone", '
+        '"arguments": {"city": "Tokyo"}}]</TOOLCALL>'
+    )
+
+    content, tool_calls = run_tool_extraction(
+        parser,
+        [model_output],
+        request=mock_request,
+        streaming=True,
+        assert_one_tool_per_delta=False,
+    )
+
+    assert content is None
+    assert len(tool_calls) == 2
+    assert tool_calls[0].function.name == "get_weather"
+    assert tool_calls[0].function.arguments == '{"city": "Tokyo"}'
+    assert tool_calls[1].function.name == "lookup_timezone"
+    assert tool_calls[1].function.arguments == '{"city": "Tokyo"}'
diff --git a/vllm/tool_parsers/__init__.py b/vllm/tool_parsers/__init__.py
index 7c5f45d2022e..45f026888894 100644
--- a/vllm/tool_parsers/__init__.py
+++ b/vllm/tool_parsers/__init__.py
@@ -130,6 +130,10 @@
         "minimax_tool_parser",
         "MinimaxToolParser",
     ),
+    "nemotron_nano_v2": (
+        "nemotron_nano_v2_tool_parser",
+        "NemotronNanoV2ToolParser",
+    ),
     "mistral": (
         "mistral_tool_parser",
         "MistralToolParser",
diff --git a/vllm/tool_parsers/nemotron_nano_v2_tool_parser.py b/vllm/tool_parsers/nemotron_nano_v2_tool_parser.py
new file mode 100644
index 000000000000..0e1221228f24
--- /dev/null
+++ b/vllm/tool_parsers/nemotron_nano_v2_tool_parser.py
@@ -0,0 +1,317 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+from collections.abc import Sequence
+from typing import Any
+
+import partial_json_parser
+import regex as re
+from partial_json_parser.core.options import Allow
+
+from vllm.entrypoints.chat_utils import make_tool_call_id
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+)
+from vllm.entrypoints.openai.engine.protocol import (
+    DeltaFunctionCall,
+    DeltaMessage,
+    DeltaToolCall,
+    ExtractedToolCallInformation,
+    FunctionCall,
+    ToolCall,
+)
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+from vllm.logger import init_logger
+from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
+from vllm.tool_parsers.utils import partial_tag_overlap
+
+logger = init_logger(__name__)
+
+
+class NemotronNanoV2ToolParser(ToolParser):
+    """Tool parser for Nemotron Nano v2 models that emit <TOOLCALL> JSON."""
+
+    def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+        super().__init__(tokenizer, tools)
+
+        self.tool_call_start_token = "<TOOLCALL>"
+        self.tool_call_end_token = "</TOOLCALL>"
+        self.tool_call_regex = re.compile(
+            rf"{self.tool_call_start_token}(.*?){self.tool_call_end_token}",
+            re.DOTALL,
+        )
+        self._sent_content_idx = 0
+        self._tool_args_emitted: list[bool] = []
+
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
+        request = super().adjust_request(request)
+        if request.tools and request.tool_choice != "none":
+            request.skip_special_tokens = False
+        return request
+
+    @staticmethod
+    def _normalize_tool_call_payload(payload: str) -> list[dict[str, Any]]:
+        payload = payload.strip()
+        if not payload.startswith("["):
+            payload = "[" + payload
+        if not payload.endswith("]"):
+            payload = payload + "]"
+
+        parsed = json.loads(payload)
+        if isinstance(parsed, dict):
+            return [parsed]
+        if isinstance(parsed, list):
+            return [item for item in parsed if isinstance(item, dict)]
+        return []
+
+    @staticmethod
+    def _serialize_arguments(arguments: Any) -> str:
+        if isinstance(arguments, str):
+            return arguments
+        return json.dumps(arguments, ensure_ascii=False)
+
+    @staticmethod
+    def _strip_trailing_auto_closers(chunk: str) -> str:
+        idx = len(chunk)
+        while idx > 0 and chunk[idx - 1] in " \t\r\n}]":
+            idx -= 1
+        while idx > 0 and chunk[idx - 1] == '"':
+            if idx - 2 >= 0 and chunk[idx - 2] == "\\":
+                break
+            idx -= 1
+        return chunk[:idx]
+
+    @staticmethod
+    def _common_prefix_len(left: str, right: str) -> int:
+        max_len = min(len(left), len(right))
+        idx = 0
+        while idx < max_len and left[idx] == right[idx]:
+            idx += 1
+        return idx
+
+    def _compute_arguments_delta(self, arguments: Any, end_of_call: bool) -> str:
+        if self.current_tool_id < 0:
+            return ""
+
+        while len(self.streamed_args_for_tool) <= self.current_tool_id:
+            self.streamed_args_for_tool.append("")
+        while len(self._tool_args_emitted) <= self.current_tool_id:
+            self._tool_args_emitted.append(False)
+
+        cur_arguments = self._serialize_arguments(arguments)
+        streamed_prefix = self.streamed_args_for_tool[self.current_tool_id]
+        emitted_any = self._tool_args_emitted[self.current_tool_id]
+
+        lcp_len = self._common_prefix_len(cur_arguments, streamed_prefix)
+        if lcp_len != len(streamed_prefix):
+            streamed_prefix = streamed_prefix[:lcp_len]
+            self.streamed_args_for_tool[self.current_tool_id] = streamed_prefix
+
+        arguments_delta = cur_arguments[lcp_len:]
+        if not arguments_delta:
+            return ""
+
+        if not end_of_call:
+            arguments_delta = self._strip_trailing_auto_closers(arguments_delta)
+
+        if (
+            not emitted_any
+            and not end_of_call
+            and arguments_delta
+            and arguments_delta.endswith("}")
+        ):
+            arguments_delta = arguments_delta[:-1]
+            if arguments_delta.endswith('"'):
+                arguments_delta = arguments_delta[:-1]
+
+        return arguments_delta
+
+    def extract_tool_calls(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+    ) -> ExtractedToolCallInformation:
+        if self.tool_call_start_token not in model_output:
+            return ExtractedToolCallInformation(
+                tools_called=False,
+                tool_calls=[],
+                content=model_output,
+            )
+
+        try:
+            payloads = self.tool_call_regex.findall(model_output)
+            tool_calls: list[ToolCall] = []
+            for payload in payloads:
+                for raw_tool_call in self._normalize_tool_call_payload(payload):
+                    try:
+                        tool_calls.append(
+                            ToolCall(
+                                type="function",
+                                function=FunctionCall(
+                                    name=raw_tool_call["name"],
+                                    arguments=self._serialize_arguments(
+                                        raw_tool_call["arguments"]
+                                    ),
+                                ),
+                            )
+                        )
+                    except Exception:
+                        continue
+
+            if not tool_calls:
+                return ExtractedToolCallInformation(
+                    tools_called=False,
+                    tool_calls=[],
+                    content=model_output,
+                )
+
+            content = model_output[: model_output.find(self.tool_call_start_token)]
+            return ExtractedToolCallInformation(
+                tools_called=True,
+                tool_calls=tool_calls,
+                content=content if content else None,
+            )
+        except Exception:
+            logger.exception("Error extracting tool call from response.")
+            return ExtractedToolCallInformation(
+                tools_called=False,
+                tool_calls=[],
+                content=model_output,
+            )
+
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> DeltaMessage | None:
+        if not previous_text:
+            self.current_tool_id = -1
+            self.current_tool_name_sent = False
+            self.streamed_args_for_tool = []
+            self._tool_args_emitted = []
+            self._sent_content_idx = 0
+
+        start_idx = current_text.find(self.tool_call_start_token)
+        if start_idx == -1:
+            overlap = partial_tag_overlap(current_text, self.tool_call_start_token)
+            sendable_idx = len(current_text) - overlap
+            if sendable_idx > self._sent_content_idx:
+                content = current_text[self._sent_content_idx : sendable_idx]
+                self._sent_content_idx = sendable_idx
+                return DeltaMessage(content=content)
+            return None
+
+        content_delta: str | None = None
+        if self._sent_content_idx < start_idx:
+            content_delta = current_text[self._sent_content_idx : start_idx]
+            self._sent_content_idx = start_idx
+
+        payload_start = start_idx + len(self.tool_call_start_token)
+        payload_end = current_text.find(self.tool_call_end_token, payload_start)
+        end_of_call = payload_end != -1
+        payload = current_text[
+            payload_start : payload_end if end_of_call else len(current_text)
+        ]
+        if not payload.strip():
+            return None
+
+        flags = Allow.ALL if self.current_tool_name_sent else Allow.ALL & ~Allow.STR
+        try:
+            parsed_tool_calls = partial_json_parser.loads(payload, flags)
+        except (
+            partial_json_parser.core.exceptions.MalformedJSON,
+            json.JSONDecodeError,
+            ValueError,
+        ):
+            return None
+
+        if isinstance(parsed_tool_calls, dict):
+            parsed_tool_calls = [parsed_tool_calls]
+        if not isinstance(parsed_tool_calls, list) or not parsed_tool_calls:
+            return None
+
+        if self.current_tool_id < 0:
+            self.current_tool_id = 0
+            self.current_tool_name_sent = False
+            self.streamed_args_for_tool.append("")
+            self._tool_args_emitted.append(False)
+
+        tool_call_deltas: list[DeltaToolCall] = []
+        while self.current_tool_id < len(parsed_tool_calls):
+            current_tool_call = parsed_tool_calls[self.current_tool_id]
+            if not isinstance(current_tool_call, dict):
+                break
+
+            call_complete = end_of_call or self.current_tool_id + 1 < len(
+                parsed_tool_calls
+            )
+
+            if not self.current_tool_name_sent:
+                function_name = current_tool_call.get("name")
+                if not function_name:
+                    break
+
+                arguments_delta = ""
+                if "arguments" in current_tool_call:
+                    arguments_delta = self._compute_arguments_delta(
+                        current_tool_call["arguments"], call_complete
+                    )
+                    if arguments_delta:
+                        self.streamed_args_for_tool[self.current_tool_id] += (
+                            arguments_delta
+                        )
+                        self._tool_args_emitted[self.current_tool_id] = True
+
+                self.current_tool_name_sent = True
+                tool_call_deltas.append(
+                    DeltaToolCall(
+                        index=self.current_tool_id,
+                        id=make_tool_call_id(),
+                        type="function",
+                        function=DeltaFunctionCall(
+                            name=function_name,
+                            arguments=arguments_delta or None,
+                        ),
+                    )
+                )
+            elif "arguments" in current_tool_call:
+                arguments_delta = self._compute_arguments_delta(
+                    current_tool_call["arguments"], call_complete
+                )
+                if arguments_delta:
+                    self.streamed_args_for_tool[self.current_tool_id] += arguments_delta
+                    self._tool_args_emitted[self.current_tool_id] = True
+                    tool_call_deltas.append(
+                        DeltaToolCall(
+                            index=self.current_tool_id,
+                            function=DeltaFunctionCall(arguments=arguments_delta),
+                        )
+                    )
+            elif not call_complete:
+                break
+
+            if self.current_tool_id + 1 >= len(parsed_tool_calls):
+                break
+
+            self.current_tool_id += 1
+            self.current_tool_name_sent = False
+            while len(self.streamed_args_for_tool) <= self.current_tool_id:
+                self.streamed_args_for_tool.append("")
+            while len(self._tool_args_emitted) <= self.current_tool_id:
+                self._tool_args_emitted.append(False)
+
+        if content_delta is not None or tool_call_deltas:
+            return DeltaMessage(
+                content=content_delta,
+                tool_calls=tool_call_deltas or None,
+            )
+        return None