vllm-project · sniper35 · May 11, 2026 · May 11, 2026 · May 11, 2026 · May 11, 2026
diff --git a/docs/features/tool_calling.md b/docs/features/tool_calling.md
@@ -324,6 +324,17 @@ Supported models:
 
 Flags: `--tool-call-parser minimax --chat-template examples/tool_chat_template_minimax_m1.jinja`
 
+### Nemotron Nano v2 Models (`nemotron_nano_v2`)
+
+Supported models:
+
+* `nvidia/NVIDIA-Nemotron-Nano-9B-v2` (and FP8/NVFP4 variants; use with [examples/tool_chat_template_nemotron_nano_v2.jinja](../../examples/tool_chat_template_nemotron_nano_v2.jinja))
+* `nvidia/NVIDIA-Nemotron-Nano-12B-v2` (and FP8/NVFP4 variants; use with [examples/tool_chat_template_nemotron_nano_v2.jinja](../../examples/tool_chat_template_nemotron_nano_v2.jinja))
+
+The parser handles the `<TOOLCALL>[{"name": ..., "arguments": ...}, ...]</TOOLCALL>` envelope emitted by the Nemotron chat template, and works with the model's hybrid thinking mode: any `<think>...</think>` prefix is preserved as message content (or stripped by a reasoning parser if one is configured).
+
+Flags: `--tool-call-parser nemotron_nano_v2 --chat-template examples/tool_chat_template_nemotron_nano_v2.jinja`
+
 ### DeepSeek-V3 Models (`deepseek_v3`)
 
 Supported models:
@@ -510,8 +521,8 @@ Here is a summary of a plugin file:
     # in --tool-call-parser. you can define as many
     # tool parsers as you want here.
     class ExampleToolParser(ToolParser):
-        def __init__(self, tokenizer: TokenizerLike):
-            super().__init__(tokenizer)
+        def __init__(self, tokenizer: TokenizerLike, tools=None):
+            super().__init__(tokenizer, tools)
 
         # adjust request. e.g.: set skip special tokens
         # to False for tool call output.

diff --git a/examples/tool_chat_template_nemotron_nano_v2.jinja b/examples/tool_chat_template_nemotron_nano_v2.jinja
@@ -0,0 +1,137 @@
+{%- set ns = namespace(enable_thinking=true) -%}
+
+{%- for message in messages -%}
+    {%- set content = message['content'] -%}
+    {%- if message['role'] == 'user' or message['role'] == 'system' -%}
+        {%- if '/think' in content -%}
+            {%- set ns.enable_thinking = true -%}
+        {%- elif '/no_think' in content -%}
+            {%- set ns.enable_thinking = false -%}
+        {%- endif -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- if messages[0]['role'] != 'system' -%}
+    {%- set ns.non_tool_system_content = '' -%}
+    {{- '<SPECIAL_10>System\n' -}}
+{%- else -%}
+    {%- set ns.non_tool_system_content = messages[0]['content']
+        .replace('/think', '')
+        .replace('/no_think', '')
+        .strip()
+    -%}
+    {{- '<SPECIAL_10>System\n' + ns.non_tool_system_content }}
+{%- endif -%}
+
+{%- if tools -%}
+    {%- if ns.non_tool_system_content is defined
+        and ns.non_tool_system_content != '' -%}
+        {{- '\n\n' -}}
+    {%- endif -%}
+
+    {{- 'You can use the following tools to assist the user if required:' -}}
+    {{- '\n<AVAILABLE_TOOLS>[' -}}
+    {%- for tool in tools -%}
+        {{- (tool.function if tool.function is defined else tool) | tojson -}}
+        {{- ', ' if not loop.last else '' -}}
+    {%- endfor -%}
+    {{- ']</AVAILABLE_TOOLS>\n\n' -}}
+
+    {{- 'If you decide to call any tool(s), use the following format:\n' -}}
+    {{- '<TOOLCALL>[{{"name": "tool_name1", "arguments": "tool_args1"}}, ' -}}
+    {{- '{{"name": "tool_name2", "arguments": "tool_args2"}}]</TOOLCALL>\n\n' -}}
+
+    {{- 'The user will execute tool-calls and return responses from tool(s) in this format:\n' -}}
+    {{- '<TOOL_RESPONSE>[{{"tool_response1"}}, {{"tool_response2"}}]</TOOL_RESPONSE>\n\n' -}}
+
+    {{- 'Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user.' -}}
+{%- endif -%}
+
+{{- '\n' -}}
+
+{%- set messages = messages[1:] if messages[0]['role'] == 'system' else messages -%}
+
+{%- if messages[-1]['role'] == 'assistant' -%}
+    {%- set ns.last_turn_assistant_content = messages[-1]['content'].strip() -%}
+    {%- set messages = messages[:-1] -%}
+{%- endif -%}
+
+{%- for message in messages -%}
+    {%- set content = message['content'] -%}
+
+    {%- if message['role'] == 'user' -%}
+        {{- '<SPECIAL_11>User\n' + content.replace('/think', '').replace('/no_think', '').strip() + '\n' }}
+
+    {%- elif message['role'] == 'tool' -%}
+        {%- if loop.first or (messages[loop.index0 - 1].role != 'tool') -%}
+            {{- '<SPECIAL_11>User\n' + '<TOOL_RESPONSE>[' }}
+        {%- endif -%}
+        {{- message['content'] -}}
+        {{- ', ' if not loop.last and (messages[loop.index0 + 1].role == 'tool') else '' -}}
+        {%- if loop.last or (messages[loop.index0 + 1].role != 'tool') -%}
+            {{- ']</TOOL_RESPONSE>\n' -}}
+        {%- endif -%}
+
+    {%- elif message['role'] == 'assistant' -%}
+        {%- if '</think>' in content -%}
+            {%- set content = content.split('</think>')[1].strip() -%}
+        {%- endif -%}
+
+        {{- '<SPECIAL_11>Assistant\n' + content.strip() }}
+
+        {%- if message.tool_calls -%}
+            {%- if content.strip() != '' -%}
+                {{- '\n\n' -}}
+            {%- endif -%}
+            {{- '<TOOLCALL>[' -}}
+            {%- for call in message.tool_calls -%}
+                {%- set fn = call.function if call.function is defined else call -%}
+                {{- '{"name": ' -}}
+                {{- fn.name | tojson -}}
+                {{- ', "arguments": ' -}}
+                {%- if fn.arguments is string -%}
+                    {{- fn.arguments -}}
+                {%- else -%}
+                    {{- fn.arguments | tojson -}}
+                {%- endif -%}
+                {{- '}' + (', ' if not loop.last else '') -}}
+            {%- endfor -%}
+            {{- ']</TOOLCALL>' -}}
+        {%- endif -%}
+
+        {{- '\n<SPECIAL_12>\n' -}}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt -%}
+    {{- '<SPECIAL_11>Assistant\n' -}}
+    {%- if ns.enable_thinking is defined and ns.enable_thinking is false -%}
+        {{- '<think></think>' -}}
+    {%- else -%}
+        {{- '<think>\n' -}}
+    {%- endif -%}
+    {%- if ns.last_turn_assistant_content is defined
+        and ns.last_turn_assistant_content != '' -%}
+        {{- ns.last_turn_assistant_content -}}
+    {%- endif -%}
+
+{%- else -%}
+    {%- if ns.last_turn_assistant_content is defined
+        and ns.last_turn_assistant_content != '' -%}
+        {{- '<SPECIAL_11>Assistant\n' -}}
+        {%- if ns.enable_thinking is defined and ns.enable_thinking is false -%}
+            {{- '<think></think>' -}}
+        {%- else -%}
+            {{- '<think>\n' -}}
+        {%- endif -%}
+        {{- ns.last_turn_assistant_content -}}
+
+        {%- if continue_final_message is defined -%}
+            {%- if continue_final_message is false -%}
+                {{- '\n<SPECIAL_12>\n' -}}
+            {%- endif -%}
+        {%- else -%}
+            {{- '\n<SPECIAL_12>\n' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endif -%}
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+from pathlib import Path
+
+import jinja2.sandbox
+
+TEMPLATE_PATH = (
+    Path(__file__).resolve().parent.parent.parent
+    / "examples"
+    / "tool_chat_template_nemotron_nano_v2.jinja"
+)
+
+
+def test_tool_call_name_is_json_escaped():
+    template = jinja2.sandbox.ImmutableSandboxedEnvironment().from_string(
+        TEMPLATE_PATH.read_text()
+    )
+    tool_name = 'search"quoted\\name'
+    rendered = template.render(
+        messages=[
+            {"role": "user", "content": "Search docs"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "function": {
+                            "name": tool_name,
+                            "arguments": {"query": "vllm"},
+                        },
+                    }
+                ],
+            },
+            {"role": "tool", "content": '{"result": "ok"}'},
+        ],
+        add_generation_prompt=False,
+    )
+
+    payload = rendered.split("<TOOLCALL>", 1)[1].split("</TOOLCALL>", 1)[0]
+    tool_calls = json.loads(payload)
+
+    assert tool_calls[0]["name"] == tool_name
+    assert tool_calls[0]["arguments"] == {"query": "vllm"}