diff --git a/renderers/parsers.py b/renderers/parsers.py
index 77fa850..a277a0e 100644
--- a/renderers/parsers.py
+++ b/renderers/parsers.py
@@ -20,6 +20,7 @@
from typing import Protocol, runtime_checkable
from renderers.base import ParsedToolCall, ToolCallParseStatus
+from renderers.parsing import _parse_xml_function_blocks
# ── Shared helpers ───────────────────────────────────────────────────
@@ -156,7 +157,22 @@ def extract(self, ids: list[int]) -> tuple[list[int], list[ParsedToolCall]]:
class Qwen35ToolParser:
- """XML-style tool calls: ``V...``."""
+ """XML-style tool calls: ``V...``.
+
+ Mirrors vLLM's ``Qwen3CoderToolParser`` extraction logic — see
+ ``renderers.parsing`` for the full ladder and the rationale behind
+ picking ``qwen3_coder`` over ``qwen3_xml``. This parser drives
+ ``DefaultRenderer`` when callers wire it up with
+ ``tool_parser="qwen3.5"``; ``Qwen35Renderer.parse_response``
+ routes through the same shared helpers via ``parse_qwen35``.
+
+ Schema-aware coercion is intentionally inert here: ``ToolParser``'s
+ ``extract`` API does not receive the tool list (callers consume
+ the parsed values as opaque dicts), so ``param_index`` is empty
+ and every parameter goes through the no-schema branch of
+ ``_coerce_arg_value`` — vLLM-equivalent verbatim strings after the
+ case-insensitive ``null`` short-circuit.
+ """
def __init__(self, tokenizer):
self._tokenizer = tokenizer
@@ -169,69 +185,57 @@ def extract(self, ids: list[int]) -> tuple[list[int], list[ParsedToolCall]]:
tc_start = _find(ids, self._tc_id)
if tc_start == -1:
return ids, []
+
tool_calls: list[ParsedToolCall] = []
i = tc_start
+ param_index: dict = {}
while i < len(ids):
- if ids[i] == self._tc_id:
- end = (
- _find(ids, self._tc_end_id, i + 1)
- if self._tc_end_id is not None
- else -1
- )
- if end == -1:
- raw = _decode(self._tokenizer, ids[i + 1 :])
- tool_calls.append(
- ParsedToolCall(
- raw=raw,
- token_span=(i, len(ids)),
- status=ToolCallParseStatus.UNCLOSED_BLOCK,
- )
- )
- break
+ if ids[i] != self._tc_id:
+ i += 1
+ continue
+
+ end = (
+ _find(ids, self._tc_end_id, i + 1)
+ if self._tc_end_id is not None
+ else -1
+ )
+ if end == -1:
+ block_text = _decode(self._tokenizer, ids[i + 1 :])
+ span = (i, len(ids))
+ wrapper_unclosed = True
+ else:
block_text = _decode(self._tokenizer, ids[i + 1 : end])
span = (i, end + 1)
- name_match = re.search(r"]+)>", block_text)
- if not name_match:
- tool_calls.append(
- ParsedToolCall(
- raw=block_text,
- token_span=span,
- status=ToolCallParseStatus.MALFORMED_STRUCTURE,
- )
- )
- i = end + 1
- continue
- name = name_match.group(1)
- arguments: dict = {}
- any_json_fallback = False
- for pm in re.finditer(
- r"]+)>\n?(.*?)\n?",
- block_text,
- re.DOTALL,
- ):
- arg_name = pm.group(1)
- arg_value = pm.group(2).strip()
- try:
- arguments[arg_name] = json.loads(arg_value)
- except (json.JSONDecodeError, ValueError):
- arguments[arg_name] = arg_value
- any_json_fallback = True
+ wrapper_unclosed = False
+
+ block_calls = _parse_xml_function_blocks(
+ block_text,
+ param_index=param_index,
+ token_span=span,
+ wrapper_unclosed=wrapper_unclosed,
+ )
+ if block_calls:
+ tool_calls.extend(block_calls)
+ elif wrapper_unclosed:
tool_calls.append(
ParsedToolCall(
raw=block_text,
- name=name,
- arguments=arguments,
token_span=span,
- status=(
- ToolCallParseStatus.INVALID_JSON
- if any_json_fallback
- else ToolCallParseStatus.OK
- ),
+ status=ToolCallParseStatus.UNCLOSED_BLOCK,
)
)
- i = end + 1
else:
- i += 1
+ tool_calls.append(
+ ParsedToolCall(
+ raw=block_text,
+ token_span=span,
+ status=ToolCallParseStatus.MALFORMED_STRUCTURE,
+ )
+ )
+
+ if wrapper_unclosed:
+ break
+ i = end + 1
return ids[:tc_start], tool_calls
diff --git a/renderers/parsing.py b/renderers/parsing.py
index ee26e89..0a8f25a 100644
--- a/renderers/parsing.py
+++ b/renderers/parsing.py
@@ -17,20 +17,122 @@
from __future__ import annotations
import json
+import re
+import uuid
from typing import Any
from renderers.base import ParsedResponse, ParsedToolCall, ToolCallParseStatus, ToolSpec
+# ── vLLM Qwen3CoderToolParser regex patterns ────────────────────────
+#
+# Lifted verbatim from ``vllm/tool_parsers/qwen3coder_tool_parser.py``
+# (``self.tool_call_regex`` / ``tool_call_function_regex`` /
+# ``tool_call_parameter_regex``). Each pattern has two branches: one
+# for the closed form (with the matching end tag) and one anchored at
+# end-of-string for unclosed output. The parameter regex uses
+# positive lookaheads so a missing ```` is recovered from
+# the next ```` boundary instead of
+# silently dropping the value.
+
+
+# Mask matching vLLM's ``vllm.utils.MASK_64_BITS`` so the random id we
+# generate is byte-shape compatible with vLLM's ``random_uuid()``.
+_MASK_64_BITS = (1 << 64) - 1
+
+
+def _make_tool_call_id() -> str:
+ """Mint a tool-call id in vLLM's default format.
+
+ vLLM's ``ToolCall`` dataclass uses ``default_factory=make_tool_call_id``
+ which returns ``f"chatcmpl-tool-{random_uuid()}"`` where
+ ``random_uuid`` is ``f"{uuid.uuid4().int & MASK_64_BITS:016x}"`` —
+ 16 hex characters drawn from the low 64 bits of a v4 UUID. We
+ reproduce the format so OpenAI-shaped clients get the same id
+ surface across renderers and vLLM serving paths.
+ """
+ return f"chatcmpl-tool-{uuid.uuid4().int & _MASK_64_BITS:016x}"
+
+
+_FUNCTION_BLOCK_RE = re.compile(
+ r"||(?=)|$)",
+ re.DOTALL,
+)
+
+
# ── Schema-aware argument coercion ──────────────────────────────────
#
# XML-style tool-call formats render argument values verbatim inside
-# ```` tags with no quoting. ``true`` and the string
+# ```` tags with no quoting. ``true`` and the string
# ``"true"`` produce identical wire bytes; without the tool schema, the
-# parser has no signal to distinguish them and defaults to
-# ``json.loads`` (the historical behavior). When the caller passes
-# ``tools=[...]``, parsers consult the per-parameter declared type to
-# keep string args verbatim, matching vLLM / SGLang reference parsers.
+# parser has no signal to distinguish them. We mirror vLLM's shared
+# ``extract_types_from_schema`` + ``coerce_to_schema_type`` helpers in
+# ``vllm/tool_parsers/utils.py``:
+#
+# 1. Flatten the JSON-Schema fragment to a set of types, recursing
+# through ``anyOf`` / ``oneOf`` / ``allOf`` and inferring types from
+# ``enum`` values. A schema with no extractable type information
+# defaults to ``["string"]``.
+# 2. Walk the priority-ordered ladder ``null > integer > number >
+# boolean > object > array > string``, returning the first
+# successful coercion. ``string`` is the always-succeeding terminal
+# whenever it appears in the type set, which subsumes both pure
+# string params and union types like ``Union[str, bool]``.
+#
+# Why ``qwen3_coder`` semantics and not ``qwen3_xml``: vLLM ships two
+# parsers for the same wire format. ``qwen3_xml`` is the newer
+# streaming-first parser (uses ``xml.parsers.expat`` and is the
+# recommended choice for *live serving* because ``qwen3_coder`` has
+# known infinite-loop bugs in vLLM's streaming path). For offline
+# token-level extraction we follow ``qwen3_coder``, which itself routes
+# through the shared ``coerce_to_schema_type`` helper above — so the
+# two converge on scalar semantics in non-streaming use.
+# TODO: when a streaming parse API lands, switch the streaming path to
+# ``qwen3_xml``-style state-machine semantics to dodge vLLM's
+# regex-streaming bugs (see HF Qwen3-Coder-Next discussion #17).
+
+
+# Type aliases mirror ``vllm/tool_parsers/utils.py:_TYPE_ALIASES``.
+_TYPE_ALIASES: dict[str, str] = {
+ "str": "string",
+ "text": "string",
+ "varchar": "string",
+ "char": "string",
+ "enum": "string",
+ "int": "integer",
+ "int32": "integer",
+ "int64": "integer",
+ "uint": "integer",
+ "uint32": "integer",
+ "uint64": "integer",
+ "long": "integer",
+ "short": "integer",
+ "unsigned": "integer",
+ "float": "number",
+ "float32": "number",
+ "float64": "number",
+ "double": "number",
+ "bool": "boolean",
+ "dict": "object",
+ "arr": "array",
+ "list": "array",
+ "sequence": "array",
+}
+
+
+# Priority order mirrors ``vllm/tool_parsers/utils.py:coerce_to_schema_type``.
+_TYPE_PRIORITY = (
+ "null",
+ "integer",
+ "number",
+ "boolean",
+ "object",
+ "array",
+ "string",
+)
def _build_param_type_index(
@@ -59,42 +161,124 @@ def _build_param_type_index(
return index
-def _coerce_arg_value(
- text: str, param_schema: dict[str, Any] | None
-) -> tuple[Any, bool]:
- """Coerce a raw ```` body to its declared type.
+def _extract_types_from_schema(schema: Any) -> list[str]:
+ """Flatten a JSON-Schema fragment to a list of type strings.
- Returns ``(value, used_json_fallback)``. The boolean is ``True`` only
- when ``json.loads`` was attempted, raised, AND the schema doesn't
- permit a string. Returning a string verbatim because the schema
- permits strings is NOT a fallback.
-
- Rule (matches vLLM / SGLang reference parsers):
+ Byte-for-byte port of ``vllm/tool_parsers/utils.py:extract_types_from_schema``:
+ handles top-level ``type`` (string or list), infers types from
+ ``enum`` values, and recurses through ``anyOf`` / ``oneOf`` /
+ ``allOf``. Returns ``["string"]`` when no type information can be
+ determined — which makes the no-schema branch coerce to ``string``
+ via the priority-ordered ladder in :func:`_coerce_arg_value`.
+ """
+ if schema is None or not isinstance(schema, dict):
+ return ["string"]
+
+ types: set[str] = set()
+
+ if "type" in schema:
+ type_value = schema["type"]
+ if isinstance(type_value, str):
+ types.add(type_value)
+ elif isinstance(type_value, list):
+ for t in type_value:
+ if isinstance(t, str):
+ types.add(t)
+
+ if "enum" in schema and isinstance(schema["enum"], list) and schema["enum"]:
+ for value in schema["enum"]:
+ if value is None:
+ types.add("null")
+ elif isinstance(value, bool):
+ types.add("boolean")
+ elif isinstance(value, int):
+ types.add("integer")
+ elif isinstance(value, float):
+ types.add("number")
+ elif isinstance(value, str):
+ types.add("string")
+ elif isinstance(value, list):
+ types.add("array")
+ elif isinstance(value, dict):
+ types.add("object")
+
+ for choice_field in ("anyOf", "oneOf", "allOf"):
+ if choice_field in schema and isinstance(schema[choice_field], list):
+ for choice in schema[choice_field]:
+ types.update(_extract_types_from_schema(choice))
+
+ return list(types) if types else ["string"]
- - If the param's declared ``type`` is ``"string"`` (or single-element
- ``["string"]``), return ``text`` verbatim — never ``json.loads``.
- - Otherwise try ``json.loads``. If that fails, return raw ``text``.
- The ``used_json_fallback`` flag is ``True`` only when the schema
- does NOT permit a string — i.e. the fallback is truly suspect.
- Union types (``anyOf``/``oneOf``) that include ``"string"`` alongside
- other types still attempt ``json.loads`` first so an explicit
- integer / bool can parse; the string branch wins as fallback, and
- landing there is expected — not a malformed-JSON signal.
+def _coerce_arg_value(
+ text: str, param_schema: dict[str, Any] | None
+) -> tuple[Any, bool]:
+ """Coerce a raw ```` body to its schema type.
+
+ Mirrors vLLM ``coerce_to_schema_type`` (``vllm/tool_parsers/utils.py``):
+ flatten the schema to a set of JSON-Schema types (recursing through
+ ``anyOf``/``oneOf``/``allOf`` and ``enum``), then walk a priority-
+ ordered ladder ``null > integer > number > boolean > object > array
+ > string``, returning the first successful coercion. ``string`` is
+ the always-succeeding terminal whenever it is in the type set.
+
+ Returns ``(value, used_fallback)``. ``used_fallback=True`` iff
+ every type in the schema declined the value AND the last-ditch
+ ``json.loads`` of the raw text also failed — i.e. the value
+ couldn't be expressed as anything the schema permits. The renderer
+ propagates that flag to ``ToolCallParseStatus.INVALID_JSON`` for the
+ verifier / RL-loss signal; vLLM has no such signal.
"""
- string_is_allowed = False
- if param_schema is not None:
- declared = param_schema.get("type")
- if declared == "string" or declared == ["string"]:
+ schema_types = _extract_types_from_schema(param_schema)
+ normalized_types = {
+ _TYPE_ALIASES.get(key, key)
+ for t in schema_types
+ if isinstance(t, str)
+ for key in [t.strip().lower()]
+ }
+
+ for candidate in _TYPE_PRIORITY:
+ if candidate not in normalized_types:
+ continue
+ if candidate == "null":
+ if text.lower() == "null":
+ return None, False
+ continue
+ if candidate == "string":
return text, False
- for branch in param_schema.get("anyOf") or param_schema.get("oneOf") or []:
- if isinstance(branch, dict) and branch.get("type") == "string":
- string_is_allowed = True
- break
+ if candidate == "integer":
+ try:
+ return int(text), False
+ except (ValueError, TypeError):
+ continue
+ if candidate == "number":
+ # ``int(val)`` is inside the try block so ``nan`` (raises
+ # ``ValueError``) and ``inf`` (raises ``OverflowError``) skip
+ # the number branch instead of aborting the whole parse.
+ # vLLM's reference catches ``ValueError`` / ``TypeError`` only
+ # — we widen to ``OverflowError`` so ``"inf"`` doesn't crash.
+ try:
+ val = float(text)
+ return (val if val != int(val) else int(val)), False
+ except (ValueError, TypeError, OverflowError):
+ continue
+ if candidate == "boolean":
+ lowered = text.lower().strip()
+ if lowered in ("true", "1"):
+ return True, False
+ if lowered in ("false", "0"):
+ return False, False
+ continue
+ if candidate in ("object", "array"):
+ try:
+ return json.loads(text), False
+ except (json.JSONDecodeError, ValueError, TypeError):
+ continue
+
try:
return json.loads(text), False
except (json.JSONDecodeError, ValueError):
- return text, not string_is_allowed
+ return text, True
def _find(ids: list[int], target: int, start: int = 0) -> int:
@@ -259,20 +443,49 @@ def parse_qwen35(
content="", reasoning_content=reasoning or None, tool_calls=[]
)
+ full_text = _decode(tokenizer, ids)
tc_start = _find(ids, tool_call_id)
- tool_calls: list[ParsedToolCall] = []
- if tc_start != -1:
- content_text = _decode(tokenizer, ids[:tc_start]).strip()
- tool_calls = _parse_xml_tool_calls(
- tokenizer,
- ids[tc_start:],
- tool_call_id,
- tool_call_end_id,
- section_offset=parse_offset + tc_start,
- param_index=_build_param_type_index(tools),
- )
- else:
- content_text = _decode(tokenizer, ids).strip()
+ param_index = _build_param_type_index(tools)
+ try:
+ if tc_start != -1:
+ # vLLM ``qwen3coder_tool_parser.py:316-319``: content text
+ # is the raw slice up to the first ```` (or
+ # first ```` is present).
+ # No whitespace stripping — the model's prefix prose is
+ # surfaced verbatim.
+ content_text = _decode(tokenizer, ids[:tc_start])
+ tool_calls = _parse_xml_tool_calls(
+ tokenizer,
+ ids[tc_start:],
+ tool_call_id,
+ tool_call_end_id,
+ section_offset=parse_offset + tc_start,
+ param_index=param_index,
+ )
+ else:
+ # vLLM ``qwen3coder_tool_parser.py:269-271`` back-off: no
+ # ```` markers — scan whole output for raw
+ # ```` blocks. Content text is whatever sits
+ # before the first ``= 0 else full_text
+ else:
+ content_text = full_text
+ except Exception:
+ # vLLM ``qwen3coder_tool_parser.py:327-331`` catch-all: on any
+ # extraction error, drop every recovered call and surface the
+ # raw text as content so downstream consumers never see a
+ # half-parsed call.
+ content_text = full_text
+ tool_calls = []
return ParsedResponse(
content=content_text,
@@ -290,71 +503,149 @@ def _parse_xml_tool_calls(
section_offset: int,
param_index: dict[str, dict[str, dict[str, Any]]],
) -> list[ParsedToolCall]:
- """Parse Qwen3.5-style XML tool calls from token IDs."""
- import re
-
+ """Parse Qwen3.5-style XML tool calls from token IDs.
+
+ Uses token IDs to demarcate ```` / ````
+ boundaries (so ``token_span`` stays precise for trainer-side
+ masking) and vLLM-parity regex on the decoded block text to
+ extract the function/parameter content tolerantly. Mirrors
+ ``Qwen3CoderToolParser._parse_xml_function_call`` plus the
+ tag-tolerance branches of the three module-level patterns.
+ """
tool_calls: list[ParsedToolCall] = []
i = 0
while i < len(ids):
- if ids[i] == tc_id:
- end = _find(ids, tc_end_id, i + 1)
- if end == -1:
- raw = _decode(tokenizer, ids[i + 1 :])
- tool_calls.append(
- ParsedToolCall(
- raw=raw,
- token_span=(section_offset + i, section_offset + len(ids)),
- status=ToolCallParseStatus.UNCLOSED_BLOCK,
- )
- )
- break
+ if ids[i] != tc_id:
+ i += 1
+ continue
+
+ end = _find(ids, tc_end_id, i + 1)
+ if end == -1:
+ block_ids = ids[i + 1 :]
+ block_text = _decode(tokenizer, block_ids)
+ span = (section_offset + i, section_offset + len(ids))
+ wrapper_unclosed = True
+ else:
block_text = _decode(tokenizer, ids[i + 1 : end])
span = (section_offset + i, section_offset + end + 1)
- name_match = re.search(r"]+)>", block_text)
- if not name_match:
- tool_calls.append(
- ParsedToolCall(
- raw=block_text,
- token_span=span,
- status=ToolCallParseStatus.MALFORMED_STRUCTURE,
- )
- )
- i = end + 1
- continue
+ wrapper_unclosed = False
- name = name_match.group(1)
- params = param_index.get(name, {})
- arguments: dict = {}
- any_json_fallback = False
- for pm in re.finditer(
- r"]+)>\n?(.*?)\n?", block_text, re.DOTALL
- ):
- arg_name = pm.group(1)
- arg_value = pm.group(2).strip()
- value, used_fallback = _coerce_arg_value(
- arg_value, params.get(arg_name)
+ block_calls = _parse_xml_function_blocks(
+ block_text,
+ param_index=param_index,
+ token_span=span,
+ wrapper_unclosed=wrapper_unclosed,
+ )
+ if block_calls:
+ tool_calls.extend(block_calls)
+ elif wrapper_unclosed:
+ # vLLM would silently drop a content-less unclosed
+ # ````; we keep the diagnostic so verifier /
+ # RL-loss code can mask the malformed span.
+ tool_calls.append(
+ ParsedToolCall(
+ raw=block_text,
+ token_span=span,
+ status=ToolCallParseStatus.UNCLOSED_BLOCK,
)
- arguments[arg_name] = value
- any_json_fallback = any_json_fallback or used_fallback
+ )
+ else:
tool_calls.append(
ParsedToolCall(
raw=block_text,
- name=name,
- arguments=arguments,
token_span=span,
- status=(
- ToolCallParseStatus.INVALID_JSON
- if any_json_fallback
- else ToolCallParseStatus.OK
- ),
+ status=ToolCallParseStatus.MALFORMED_STRUCTURE,
)
)
- i = end + 1
+
+ if wrapper_unclosed:
+ break
+ i = end + 1
+ return tool_calls
+
+
+def _parse_xml_function_blocks(
+ text: str,
+ *,
+ param_index: dict[str, dict[str, dict[str, Any]]],
+ token_span: tuple[int, int] | None,
+ wrapper_unclosed: bool,
+) -> list[ParsedToolCall]:
+ """Apply vLLM's ```` regex over *text*.
+
+ ``token_span`` is shared by every call recovered from this block;
+ the granularity is the surrounding ```` region (or the
+ whole completion in the no-marker back-off path), matching how
+ vLLM treats multiple ```` siblings — it does not try to
+ attribute character offsets back to token positions.
+ """
+ tool_calls: list[ParsedToolCall] = []
+ for match in _FUNCTION_BLOCK_RE.finditer(text):
+ closed = match.group(1)
+ body = closed if closed is not None else (match.group(2) or "")
+ function_unclosed = closed is None
+ end_index = body.find(">")
+ if end_index == -1:
+ tool_calls.append(
+ ParsedToolCall(
+ raw=body,
+ token_span=token_span,
+ status=ToolCallParseStatus.MALFORMED_STRUCTURE,
+ )
+ )
+ continue
+
+ name = body[:end_index]
+ params_text = body[end_index + 1 :]
+ params = param_index.get(name, {})
+ arguments: dict = {}
+ any_fallback = False
+ for capture in _PARAMETER_BLOCK_RE.findall(params_text):
+ idx = capture.find(">")
+ if idx == -1:
+ continue
+ arg_name = capture[:idx]
+ arg_value = _strip_one_newline(capture[idx + 1 :])
+ value, used_fallback = _coerce_arg_value(arg_value, params.get(arg_name))
+ arguments[arg_name] = value
+ any_fallback = any_fallback or used_fallback
+
+ # Precedence: structural failure dominates coercion failure.
+ # When a block is both unclosed AND has args that couldn't be
+ # coerced, surface ``UNCLOSED_BLOCK`` only — the truncation is
+ # the root cause and the verifier already discounts unclosed
+ # blocks. ``INVALID_JSON`` is reserved for well-formed blocks
+ # whose recovered args couldn't satisfy the schema.
+ if wrapper_unclosed or function_unclosed:
+ status = ToolCallParseStatus.UNCLOSED_BLOCK
+ elif any_fallback:
+ status = ToolCallParseStatus.INVALID_JSON
else:
- i += 1
+ status = ToolCallParseStatus.OK
+ tool_calls.append(
+ ParsedToolCall(
+ raw=body,
+ name=name,
+ arguments=arguments,
+ id=_make_tool_call_id(),
+ token_span=token_span,
+ status=status,
+ )
+ )
return tool_calls
+def _strip_one_newline(s: str) -> str:
+ """vLLM ``qwen3coder_tool_parser.py:246-250`` strips exactly one
+ leading and one trailing newline from a parameter value — no
+ further whitespace trimming."""
+ if s.startswith("\n"):
+ s = s[1:]
+ if s.endswith("\n"):
+ s = s[:-1]
+ return s
+
+
# ── GLM-5/4.7/4.5: name k v
diff --git a/tests/test_arg_coercion.py b/tests/test_arg_coercion.py
new file mode 100644
index 0000000..b966305
--- /dev/null
+++ b/tests/test_arg_coercion.py
@@ -0,0 +1,503 @@
+"""Schema-aware argument coercion parity with vLLM / SGLang.
+
+The XML-style tool-call wire format (Qwen3.5, GLM, MiniMax, Laguna)
+renders parameter values verbatim with no quoting, so the parser must
+consult the tool schema to know whether ``True`` is a bool or a string.
+Renderers' previous coercion was strict ``json.loads``, which flagged
+``True`` / ``False`` (capitalised Python literals) as ``INVALID_JSON``
+for boolean params — but both vLLM's ``Qwen3CoderToolParser`` and
+SGLang's ``Qwen3CoderDetector`` accept them via case-folded comparison.
+
+These tests pin the new ladder to the vLLM / SGLang reference shape and
+guard the bug from issue #47.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from renderers.base import ToolCallParseStatus, load_tokenizer
+from renderers.parsing import _coerce_arg_value, parse_qwen35
+
+
+# ── Direct coercion ─────────────────────────────────────────────────
+
+
+@pytest.mark.parametrize(
+ "raw,expected",
+ [
+ ("true", True),
+ ("True", True),
+ ("TRUE", True),
+ ("false", False),
+ ("False", False),
+ ("FALSE", False),
+ ],
+)
+def test_boolean_accepts_case_insensitive(raw, expected):
+ value, used_fallback = _coerce_arg_value(raw, {"type": "boolean"})
+ assert value is expected
+ assert used_fallback is False
+
+
+@pytest.mark.parametrize("declared", ["boolean", "bool"])
+def test_boolean_type_aliases_all_accepted(declared):
+ value, used_fallback = _coerce_arg_value("True", {"type": declared})
+ assert value is True
+ assert used_fallback is False
+
+
+def test_boolean_garbage_keeps_raw_text_with_invalid_flag():
+ """vLLM ``coerce_to_schema_type``: when the boolean branch declines
+ (value not in ``{true, false, 1, 0}``) and no other type in the
+ schema accepts the value, the terminal ``json.loads`` is attempted
+ and on failure the raw text is returned. We surface that path
+ via ``used_fallback=True`` for the verifier / RL-loss signal."""
+ value, used_fallback = _coerce_arg_value("yes", {"type": "boolean"})
+ assert value == "yes"
+ assert used_fallback is True
+
+
+@pytest.mark.parametrize(
+ "declared",
+ [["boolean", "null"], ["integer", "null"], ["number", "null"], ["object", "null"]],
+)
+def test_null_coerces_when_null_is_in_type_set(declared):
+ """vLLM null-coerces only when ``"null"`` is one of the declared
+ schema types — typically via JSON-Schema array-form ``["X", "null"]``
+ (the ``Optional[X]`` shape) or via ``anyOf: [..., {"type": "null"}]``.
+ Case-insensitive: matches ``null`` / ``Null`` / ``NULL``."""
+ for raw in ("null", "Null", "NULL"):
+ value, used_fallback = _coerce_arg_value(raw, {"type": declared})
+ assert value is None
+ assert used_fallback is False
+
+
+def test_string_typed_null_returns_verbatim():
+ """Pure string-typed param: ``"null"`` on the wire is just the
+ string ``"null"`` — no null coercion because ``"null"`` is not in
+ the schema's type set. Byte parity with vLLM
+ ``coerce_to_schema_type`` (string branch always succeeds)."""
+ for raw in ("null", "Null", "NULL"):
+ value, used_fallback = _coerce_arg_value(raw, {"type": "string"})
+ assert value == raw
+ assert used_fallback is False
+
+
+@pytest.mark.parametrize(
+ "declared,raw,expected",
+ [
+ ("integer", "42", 42),
+ ("int", "-7", -7),
+ ("uint", "0", 0),
+ ("long", "9999999999", 9999999999),
+ ("short", "12", 12),
+ ("unsigned", "5", 5),
+ ],
+)
+def test_int_family_coerces(declared, raw, expected):
+ value, used_fallback = _coerce_arg_value(raw, {"type": declared})
+ assert value == expected
+ assert isinstance(value, int)
+ assert used_fallback is False
+
+
+def test_int_failure_keeps_raw_and_flags_fallback():
+ value, used_fallback = _coerce_arg_value("abc", {"type": "integer"})
+ assert value == "abc"
+ assert used_fallback is True
+
+
+@pytest.mark.parametrize(
+ "raw,expected,exact_type",
+ [
+ ("3.14", 3.14, float),
+ ("-2.5", -2.5, float),
+ # vLLM ``coerce_to_schema_type`` int-demotes whenever the parsed
+ # float is a whole number, regardless of source-string shape —
+ # so ``1e3``, ``1.0``, and ``7`` all become int.
+ ("1e3", 1000, int),
+ ("1.0", 1, int),
+ ("7", 7, int),
+ ],
+)
+def test_float_family_coerces(raw, expected, exact_type):
+ value, used_fallback = _coerce_arg_value(raw, {"type": "number"})
+ assert value == expected
+ assert type(value) is exact_type
+ assert used_fallback is False
+
+
+def test_float_failure_keeps_raw_and_flags_fallback():
+ value, used_fallback = _coerce_arg_value("not-a-number", {"type": "float"})
+ assert value == "not-a-number"
+ assert used_fallback is True
+
+
+def test_object_via_json_loads():
+ value, used_fallback = _coerce_arg_value('{"k": 1}', {"type": "object"})
+ assert value == {"k": 1}
+ assert used_fallback is False
+
+
+def test_object_python_literal_does_not_parse():
+ """vLLM ``coerce_to_schema_type`` only uses ``json.loads`` for
+ object/array types — no ``ast.literal_eval`` fallback. Python-style
+ single-quoted dicts therefore land on the terminal raw-text path
+ and get flagged. (Renderers' previous ``ast.literal_eval`` fallback
+ was a vLLM deviation; we dropped it for parity.)"""
+ value, used_fallback = _coerce_arg_value("{'k': 1}", {"type": "object"})
+ assert value == "{'k': 1}"
+ assert used_fallback is True
+
+
+def test_array_via_json_loads():
+ value, used_fallback = _coerce_arg_value("[1, 2, 3]", {"type": "array"})
+ assert value == [1, 2, 3]
+ assert used_fallback is False
+
+
+def test_object_total_failure_flags_invalid():
+ value, used_fallback = _coerce_arg_value(
+ "this is not a json object", {"type": "object"}
+ )
+ assert value == "this is not a json object"
+ assert used_fallback is True
+
+
+def test_anyof_recursively_flattens_types():
+ """vLLM ``extract_types_from_schema`` recurses through ``anyOf`` /
+ ``oneOf`` / ``allOf`` to build the type set. ``Union[int, str]`` ⇒
+ ``{integer, string}``: integer parses first, falling back to string
+ for non-numeric values — and a bare string never gets flagged
+ because the string branch is the always-succeeding terminal."""
+ schema = {"anyOf": [{"type": "integer"}, {"type": "string"}]}
+
+ # Integer-shaped wire value rides the integer branch.
+ value, used_fallback = _coerce_arg_value("42", schema)
+ assert value == 42
+ assert used_fallback is False
+
+ # Non-numeric wire value declines integer, lands on string. The
+ # exact regression from PR #63: pre-fix, this was flagged
+ # ``INVALID_JSON`` because top-level ``type`` was absent and the
+ # parser tried ``json.loads("LIS")``. vLLM's recursive flatten +
+ # priority-ordered ladder fixes that as a side effect.
+ value, used_fallback = _coerce_arg_value("LIS", schema)
+ assert value == "LIS"
+ assert used_fallback is False
+
+
+def test_oneof_with_string_branch_recovers_bare_strings():
+ """Same recursive-flatten path as ``anyOf``: ``oneOf`` with a
+ string branch must accept bare-string wire values without flagging."""
+ schema = {"oneOf": [{"type": "integer"}, {"type": "string"}]}
+ value, used_fallback = _coerce_arg_value("hello", schema)
+ assert value == "hello"
+ assert used_fallback is False
+
+
+@pytest.mark.parametrize(
+ "declared", ["string", "str", "text", "varchar", "char", "enum"]
+)
+def test_string_family_returns_verbatim(declared):
+ value, used_fallback = _coerce_arg_value("True", {"type": declared})
+ assert value == "True"
+ assert used_fallback is False
+
+
+def test_string_with_list_form_type():
+ value, used_fallback = _coerce_arg_value("True", {"type": ["string"]})
+ assert value == "True"
+ assert used_fallback is False
+
+
+def test_no_schema_returns_verbatim_string():
+ """vLLM ``extract_types_from_schema(None)`` returns ``["string"]``,
+ which routes every value through the always-succeeding string
+ branch of the priority ladder. Untyped ``"null"`` therefore stays
+ a string — there is no top-level null short-circuit ahead of the
+ type check."""
+ value, used_fallback = _coerce_arg_value("42", None)
+ assert value == "42"
+ assert used_fallback is False
+
+ value, used_fallback = _coerce_arg_value("True", None)
+ assert value == "True"
+ assert used_fallback is False
+
+ value, used_fallback = _coerce_arg_value("hello", None)
+ assert value == "hello"
+ assert used_fallback is False
+
+ value, used_fallback = _coerce_arg_value("null", None)
+ assert value == "null"
+ assert used_fallback is False
+
+
+def test_unknown_type_falls_through_to_terminal_json_loads():
+ """A declared type the priority ladder doesn't recognise (e.g.
+ ``"date"``) skips every typed branch and lands on the terminal
+ ``json.loads``. JSON-shaped values still parse; everything else
+ keeps the raw text and gets flagged."""
+ value, used_fallback = _coerce_arg_value("2024-01-01", {"type": "date"})
+ assert value == "2024-01-01"
+ assert used_fallback is True
+
+ # JSON-shaped value rides the terminal ``json.loads``.
+ value, used_fallback = _coerce_arg_value("[1, 2]", {"type": "date"})
+ assert value == [1, 2]
+ assert used_fallback is False
+
+
+# ── Integration: parse_qwen35 end-to-end (the bug from issue #47) ───
+
+
+_TOOLS_FROM_ISSUE = [
+ {
+ "type": "function",
+ "function": {
+ "name": "filesystem_server_get_directory_tree",
+ "description": "List a directory tree.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "path": {"type": "string"},
+ "max_depth": {"type": "integer"},
+ "include_files": {"type": "boolean"},
+ "show_size": {"type": "boolean"},
+ },
+ },
+ },
+ }
+]
+
+
+def _parse(tok, completion: str, *, tools=None):
+ ids = tok.encode(completion, add_special_tokens=False)
+ return parse_qwen35(
+ tok,
+ list(ids),
+ stop_ids={tok.eos_token_id} if tok.eos_token_id is not None else set(),
+ think_id=tok.convert_tokens_to_ids(""),
+ think_end_id=tok.convert_tokens_to_ids(""),
+ tool_call_id=tok.convert_tokens_to_ids(""),
+ tool_call_end_id=tok.convert_tokens_to_ids(""),
+ tools=tools,
+ )
+
+
+def test_qwen35_capital_true_for_boolean_is_ok_status():
+ """Regression for issue #47: ``True``
+ used to be flagged ``INVALID_JSON`` because ``json.loads("True")``
+ fails. SGLang and vLLM both accept it via case-folded comparison;
+ renderers now does too."""
+ tok = load_tokenizer("Qwen/Qwen3.5-9B")
+ parsed = _parse(
+ tok,
+ "\n"
+ "\n"
+ "/\n"
+ "2\n"
+ "True\n"
+ "True\n"
+ "\n"
+ "",
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert len(parsed.tool_calls) == 1
+ tc = parsed.tool_calls[0]
+ assert tc.status == ToolCallParseStatus.OK
+ assert tc.name == "filesystem_server_get_directory_tree"
+ assert tc.arguments == {
+ "path": "/",
+ "max_depth": 2,
+ "include_files": True,
+ "show_size": True,
+ }
+
+
+# ── Structural tolerance: parity with vLLM's three regex patterns ────
+
+
+def test_qwen35_tolerates_missing_closing_parameter():
+ """vLLM ``tool_call_parameter_regex`` uses positive lookaheads so a
+ missing ```` is recovered from the next ```` boundary instead of dropping the value."""
+ tok = load_tokenizer("Qwen/Qwen3.5-9B")
+ parsed = _parse(
+ tok,
+ "\n"
+ "\n"
+ "/\n" # missing
+ "2\n"
+ "\n"
+ "",
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert len(parsed.tool_calls) == 1
+ tc = parsed.tool_calls[0]
+ assert tc.status == ToolCallParseStatus.OK
+ assert tc.name == "filesystem_server_get_directory_tree"
+ assert tc.arguments == {"path": "/", "max_depth": 2}
+
+
+def test_qwen35_tolerates_unclosed_function():
+ """vLLM ``tool_call_function_regex`` second alternation matches an
+ unclosed ``\n"
+ "\n"
+ "/\n", # no , no
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert len(parsed.tool_calls) == 1
+ tc = parsed.tool_calls[0]
+ assert tc.status == ToolCallParseStatus.UNCLOSED_BLOCK
+ assert tc.name == "filesystem_server_get_directory_tree"
+ assert tc.arguments == {"path": "/"}
+
+
+def test_qwen35_backoff_no_tool_call_markers():
+ """vLLM ``qwen3coder_tool_parser.py:269-271``: when the output
+ contains no ```` markers, treat the whole completion as
+ a single tool-call region and scan for ```` directly.
+ Content text is the raw slice up to the first ``\n"
+ "/etc\n"
+ "1\n"
+ "",
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert len(parsed.tool_calls) == 1
+ tc = parsed.tool_calls[0]
+ assert tc.status == ToolCallParseStatus.OK
+ assert tc.name == "filesystem_server_get_directory_tree"
+ assert tc.arguments == {"path": "/etc", "max_depth": 1}
+ # vLLM-parity: content = text up to first ```` and no ````; the commented-out ``.rstrip()`` confirms
+ intent. We mirror that — surrounding whitespace round-trips."""
+ tok = load_tokenizer("Qwen/Qwen3.5-9B")
+ parsed = _parse(
+ tok,
+ "let me check\n\n" # trailing whitespace before
+ "\n"
+ "\n"
+ "/\n"
+ "\n"
+ "",
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert parsed.content == "let me check\n\n"
+ assert len(parsed.tool_calls) == 1
+ assert parsed.tool_calls[0].status == ToolCallParseStatus.OK
+
+
+def test_qwen35_tool_call_id_matches_vllm_format():
+ """vLLM's ``ToolCall`` default factory mints ids of the form
+ ``chatcmpl-tool-<16 hex>``. Each surfaced call gets a fresh id."""
+ import re as _re
+
+ tok = load_tokenizer("Qwen/Qwen3.5-9B")
+ parsed = _parse(
+ tok,
+ "\n"
+ "\n"
+ "/\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "/etc\n"
+ "\n"
+ "",
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert len(parsed.tool_calls) == 2
+ pattern = _re.compile(r"^chatcmpl-tool-[0-9a-f]{16}$")
+ ids = [tc.id for tc in parsed.tool_calls]
+ for tcid in ids:
+ assert tcid is not None and pattern.match(tcid), tcid
+ # Two surfaced calls must get distinct ids.
+ assert ids[0] != ids[1]
+
+
+def test_qwen35_exception_fallback_returns_full_text():
+ """vLLM ``qwen3coder_tool_parser.py:327-331`` catch-all returns
+ ``tools_called=False, tool_calls=[], content=model_output`` on any
+ unhandled extraction error. We mirror that by wrapping the
+ tool-call extraction; reasoning content extracted earlier is kept.
+ """
+ from unittest.mock import patch
+
+ tok = load_tokenizer("Qwen/Qwen3.5-9B")
+ completion = (
+ "\n"
+ "\n"
+ "/\n"
+ "\n"
+ ""
+ )
+ ids = tok.encode(completion, add_special_tokens=False)
+
+ with patch(
+ "renderers.parsing._parse_xml_tool_calls",
+ side_effect=RuntimeError("boom"),
+ ):
+ parsed = parse_qwen35(
+ tok,
+ list(ids),
+ stop_ids={tok.eos_token_id} if tok.eos_token_id is not None else set(),
+ think_id=tok.convert_tokens_to_ids(""),
+ think_end_id=tok.convert_tokens_to_ids(""),
+ tool_call_id=tok.convert_tokens_to_ids(""),
+ tool_call_end_id=tok.convert_tokens_to_ids(""),
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert parsed.tool_calls == []
+ # content is the full decoded post-think text, surfaced verbatim.
+ assert "" in parsed.content
+ assert "" in parsed.content
+
+
+def test_qwen35_parameter_value_preserves_internal_whitespace():
+ """vLLM strips exactly one leading and one trailing ``\\n`` from the
+ parameter body (``qwen3coder_tool_parser.py:246-250``); inner
+ indentation must round-trip verbatim."""
+ tok = load_tokenizer("Qwen/Qwen3.5-9B")
+ parsed = _parse(
+ tok,
+ "\n"
+ "\n"
+ "\n /etc \n\n"
+ "\n"
+ "",
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert len(parsed.tool_calls) == 1
+ tc = parsed.tool_calls[0]
+ assert tc.status == ToolCallParseStatus.OK
+ # One leading + one trailing \n stripped — interior spaces preserved.
+ assert tc.arguments == {"path": " /etc "}
diff --git a/tests/test_tool_arg_type_preservation.py b/tests/test_tool_arg_type_preservation.py
index cf2a179..4ca4688 100644
--- a/tests/test_tool_arg_type_preservation.py
+++ b/tests/test_tool_arg_type_preservation.py
@@ -70,7 +70,12 @@ def renderer(model, renderer_name):
# Each case: a single ``x`` argument whose value is a STRING that
# happens to be valid JSON of another type. With a schema declaring
-# ``x: string``, the parser must return the string verbatim.
+# ``x: string``, the parser must return the string verbatim across all
+# renderers — including the ``string-null`` case, which now rides
+# vLLM's priority-ordered ladder where ``string`` is the always-
+# succeeding terminal and ``null`` is only coerced when ``"null"`` is
+# in the type set (``Optional[str]`` declares it; pure ``str`` does
+# not). See ``vllm/tool_parsers/utils.py:coerce_to_schema_type``.
JSON_LOOKING_STRING_ARGS = [
pytest.param({"x": "true"}, id="string-bool"),
pytest.param({"x": "42"}, id="string-int"),
@@ -119,7 +124,16 @@ def _extract_assistant_tokens(renderer, prompt, assistant_msg, *, tools=None):
@pytest.mark.parametrize("args", JSON_LOOKING_STRING_ARGS)
def test_string_arg_preserves_type(model, renderer_name, renderer, args):
"""Tool-call args of declared type ``str`` must round-trip as ``str``,
- not get re-parsed as bool/int/null/list/dict by the parser."""
+ not get re-parsed as bool/int/null/list/dict by the parser.
+
+ Under vLLM ``coerce_to_schema_type`` parity, a string-typed param
+ whose wire bytes happen to spell ``null`` stays a string — null
+ coercion only fires when ``"null"`` is declared in the schema
+ (typically via ``Optional[X]`` ⇒ ``anyOf [X, null]`` or
+ ``type: ["X", "null"]``). The string branch is the always-
+ succeeding terminal of the priority ladder, so it absorbs every
+ bare wire value without flagging.
+ """
msg = {
"role": "assistant",
"content": "",
@@ -135,6 +149,7 @@ def test_string_arg_preserves_type(model, renderer_name, renderer, args):
assert parsed.tool_calls, f"{model}: parser returned no tool_calls"
got = _normalize_args(parsed.tool_calls[0].arguments)
+
assert got == args, (
f"{model}: tool-arg type drift — sent {args!r}, parser returned {got!r}"
)