From dfff90e924fcf80ad0a780960fab44415c28c92e Mon Sep 17 00:00:00 2001 From: dusterbloom <32869278+dusterbloom@users.noreply.github.com> Date: Mon, 25 May 2026 12:32:12 +0200 Subject: [PATCH 1/8] fix(server): normalize Anthropic tools shape to OpenAI/Qwen for chat template MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anthropic tool definitions use `input_schema` as the schema key; Qwen3-Coder's chat template expects `parameters`. With claude-code's 24-tool requests the model couldn't ground its tool schemas and fell back to plain-text `` blocks. Adds `normalize_tools_for_qwen()` (38 LOC) that handles three input shapes: - Anthropic (input_schema) → {type:function, function:{name,description,parameters}} - OpenAI envelope already present → pass through unchanged - Bare Qwen top-level (name+parameters, no wrapper) → wrap to OpenAI envelope Wired into request parsing at body["tools"] assignment. 5 new unit tests: anthropic_bare, openai_passthrough, bare_qwen_passthrough, mixed (both shapes in one array), empty (defensive). All 1454 assertions pass. --- server/src/server/http_server.cpp | 46 ++++++++++++- server/test/test_server_unit.cpp | 107 ++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 2 deletions(-) diff --git a/server/src/server/http_server.cpp b/server/src/server/http_server.cpp index ab37805bf..673492a7a 100644 --- a/server/src/server/http_server.cpp +++ b/server/src/server/http_server.cpp @@ -353,6 +353,48 @@ std::string render_tool_call_xml(const std::string & name, const json & argument return out; } +// Normalize tools array to OpenAI/Qwen3 shape: {"type":"function","function":{...}}. +// Anthropic shape uses "input_schema"; bare Qwen shape has "parameters" at top level. +json normalize_tools_for_qwen(const json & tools) { + if (!tools.is_array()) return tools; + json out = json::array(); + for (const auto & elem : tools) { + if (!elem.is_object()) { out.push_back(elem); continue; } + // Already OpenAI shape: pass through unchanged. + if (elem.contains("type") && elem["type"] == "function" && elem.contains("function")) { + out.push_back(elem); + continue; + } + // Anthropic shape: input_schema → parameters. + if (elem.contains("input_schema")) { + out.push_back({ + {"type", "function"}, + {"function", { + {"name", elem.value("name", "")}, + {"description", elem.value("description", "")}, + {"parameters", elem["input_schema"]} + }} + }); + continue; + } + // Bare Qwen shape: top-level name + parameters, no wrapper. + if (elem.contains("name") && elem.contains("parameters")) { + out.push_back({ + {"type", "function"}, + {"function", { + {"name", elem.value("name", "")}, + {"description", elem.value("description", "")}, + {"parameters", elem["parameters"]} + }} + }); + continue; + } + // Unknown shape: pass through unchanged. + out.push_back(elem); + } + return out; +} + std::vector normalize_chat_messages( const json & messages, ApiFormat format, @@ -777,9 +819,9 @@ bool HttpServer::route_request(int fd, const HttpRequest & hr) { req.sampler.rep_window = body["rep_window"].get(); } - // Tools. + // Tools — normalize Anthropic/bare-Qwen shape to OpenAI envelope. if (body.contains("tools")) { - req.tools = body["tools"]; + req.tools = normalize_tools_for_qwen(body["tools"]); } // Tool choice constraint for hint generation. if (body.contains("tool_choice")) { diff --git a/server/test/test_server_unit.cpp b/server/test/test_server_unit.cpp index 1415aab30..9bfb638fb 100644 --- a/server/test/test_server_unit.cpp +++ b/server/test/test_server_unit.cpp @@ -45,6 +45,8 @@ std::vector normalize_chat_messages( const json & messages, ApiFormat format, ToolMemory & tool_memory); + +json normalize_tools_for_qwen(const json & tools); } // ─── Test framework (ds4 style) ──────────────────────────────────────── @@ -2446,6 +2448,105 @@ static void test_generate_result_accept_rate_zero_when_no_spec_decode() { r.ok = true; // accept_rate not set → must be 0.0f TEST_ASSERT(r.accept_rate == 0.0f); +// normalize_tools_for_qwen tests +// ═══════════════════════════════════════════════════════════════════════ + +static void test_normalize_tools_anthropic_bare() { + // Anthropic shape: input_schema → parameters, wrapped in type/function envelope. + json input = json::array({{ + {"name", "get_weather"}, + {"description", "Get the weather for a city"}, + {"input_schema", { + {"type", "object"}, + {"properties", {{"city", {{"type", "string"}}}}}, + {"required", json::array({"city"})} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(input); + TEST_ASSERT(out.size() == 1); + TEST_ASSERT(out[0].contains("type")); + TEST_ASSERT(out[0]["type"] == "function"); + TEST_ASSERT(out[0].contains("function")); + TEST_ASSERT(out[0]["function"]["name"] == "get_weather"); + TEST_ASSERT(out[0]["function"]["description"] == "Get the weather for a city"); + TEST_ASSERT(out[0]["function"].contains("parameters")); + TEST_ASSERT(out[0]["function"]["parameters"]["type"] == "object"); + TEST_ASSERT(out[0]["function"]["parameters"]["properties"].contains("city")); + TEST_ASSERT(!out[0].contains("input_schema")); +} + +static void test_normalize_tools_openai_passthrough() { + // OpenAI shape already: type/function envelope → pass through unchanged. + json input = json::array({{ + {"type", "function"}, + {"function", { + {"name", "search"}, + {"description", "Search the web"}, + {"parameters", {{"type", "object"}, {"properties", json::object()}}} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(input); + TEST_ASSERT(out.size() == 1); + TEST_ASSERT(out[0]["type"] == "function"); + TEST_ASSERT(out[0]["function"]["name"] == "search"); + TEST_ASSERT(out[0]["function"]["description"] == "Search the web"); +} + +static void test_normalize_tools_bare_qwen_passthrough() { + // Bare Qwen shape: name + parameters at top level, no wrapper → wrap to type/function. + json input = json::array({{ + {"name", "get_weather"}, + {"description", "Get weather"}, + {"parameters", { + {"type", "object"}, + {"properties", {{"city", {{"type", "string"}}}}} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(input); + TEST_ASSERT(out.size() == 1); + TEST_ASSERT(out[0]["type"] == "function"); + TEST_ASSERT(out[0]["function"]["name"] == "get_weather"); + TEST_ASSERT(out[0]["function"]["description"] == "Get weather"); + TEST_ASSERT(out[0]["function"]["parameters"]["type"] == "object"); +} + +static void test_normalize_tools_mixed() { + // Mixed array: Anthropic + OpenAI shapes both normalize to OpenAI shape. + json input = json::array({ + { + {"name", "tool_a"}, + {"description", "Anthropic-shaped tool"}, + {"input_schema", {{"type", "object"}, {"properties", json::object()}}} + }, + { + {"type", "function"}, + {"function", { + {"name", "tool_b"}, + {"description", "Already OpenAI-shaped"} + }} + } + }); + json out = dflash::common::normalize_tools_for_qwen(input); + TEST_ASSERT(out.size() == 2); + // First: Anthropic → normalized + TEST_ASSERT(out[0]["type"] == "function"); + TEST_ASSERT(out[0]["function"]["name"] == "tool_a"); + TEST_ASSERT(out[0]["function"].contains("parameters")); + // Second: OpenAI passthrough + TEST_ASSERT(out[1]["type"] == "function"); + TEST_ASSERT(out[1]["function"]["name"] == "tool_b"); +} + +static void test_normalize_tools_empty() { + // Empty array stays empty. + json out = dflash::common::normalize_tools_for_qwen(json::array()); + TEST_ASSERT(out.is_array()); + TEST_ASSERT(out.empty()); + + // Non-array (defensive) stays unchanged. + json non_array = json::object(); + json out2 = dflash::common::normalize_tools_for_qwen(non_array); + TEST_ASSERT(out2.is_object()); } int main() { @@ -2608,6 +2709,12 @@ int main() { RUN_TEST(test_generate_result_accept_rate_in_usage_openai); RUN_TEST(test_generate_result_accept_rate_in_usage_anthropic); RUN_TEST(test_generate_result_accept_rate_zero_when_no_spec_decode); + std::fprintf(stderr, "\n── normalize_tools_for_qwen ──\n"); + RUN_TEST(test_normalize_tools_anthropic_bare); + RUN_TEST(test_normalize_tools_openai_passthrough); + RUN_TEST(test_normalize_tools_bare_qwen_passthrough); + RUN_TEST(test_normalize_tools_mixed); + RUN_TEST(test_normalize_tools_empty); std::fprintf(stderr, "\n══════════════════════════════════════════\n"); std::fprintf(stderr, " Results: %d assertions, %d failures\n", From 4897a39cd37286af2fb600da848bffedb5e3388f Mon Sep 17 00:00:00 2001 From: dusterbloom <32869278+dusterbloom@users.noreply.github.com> Date: Mon, 25 May 2026 14:19:06 +0200 Subject: [PATCH 2/8] fix(server): parse claude-code native XML tags ( etc.) as tool calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model emits CMD, PATH etc. when its system prompt uses that format. Extend tool_parser (Pattern 6) and sse_emitter hit- detection to recognise these 7 tags: bash, read, write, edit, ls, grep, glob. Case-insensitive lookup maps the emitted tag to the canonical tool name from the request's tools array (e.g. → "Bash"). Eight new unit tests added; 1483 assertions all pass. --- server/src/server/sse_emitter.cpp | 18 +++- server/src/server/tool_parser.cpp | 71 ++++++++++++++- server/test/test_server_unit.cpp | 145 ++++++++++++++++++++++++++++++ 3 files changed, 232 insertions(+), 2 deletions(-) diff --git a/server/src/server/sse_emitter.cpp b/server/src/server/sse_emitter.cpp index 604f11a73..b029ee1be 100644 --- a/server/src/server/sse_emitter.cpp +++ b/server/src/server/sse_emitter.cpp @@ -16,6 +16,15 @@ static const char THINK_CLOSE[] = ""; static const char TOOL_OPEN[] = ""; static const char FUNCTION_OPEN[] = "CMD format. +static const char BASH_OPEN[] = ""; +static const char READ_OPEN[] = ""; +static const char WRITE_OPEN[] = ""; +static const char EDIT_OPEN[] = ""; +static const char LS_OPEN[] = ""; +static const char GREP_OPEN[] = ""; +static const char GLOB_OPEN[] = ""; static constexpr size_t THINK_OPEN_LEN = 7; static constexpr size_t THINK_CLOSE_LEN = 8; @@ -28,7 +37,14 @@ static bool find_tool_start(const std::string & text, size_t & pos) { while (idx != std::string::npos) { if (text.compare(idx, sizeof(TOOL_OPEN) - 1, TOOL_OPEN) == 0 || text.compare(idx, sizeof(FUNCTION_OPEN) - 1, FUNCTION_OPEN) == 0 || - text.compare(idx, sizeof(TOOL_CODE_OPEN) - 1, TOOL_CODE_OPEN) == 0) { + text.compare(idx, sizeof(TOOL_CODE_OPEN) - 1, TOOL_CODE_OPEN) == 0 || + text.compare(idx, sizeof(BASH_OPEN) - 1, BASH_OPEN) == 0 || + text.compare(idx, sizeof(READ_OPEN) - 1, READ_OPEN) == 0 || + text.compare(idx, sizeof(WRITE_OPEN) - 1, WRITE_OPEN) == 0 || + text.compare(idx, sizeof(EDIT_OPEN) - 1, EDIT_OPEN) == 0 || + text.compare(idx, sizeof(LS_OPEN) - 1, LS_OPEN) == 0 || + text.compare(idx, sizeof(GREP_OPEN) - 1, GREP_OPEN) == 0 || + text.compare(idx, sizeof(GLOB_OPEN) - 1, GLOB_OPEN) == 0) { pos = idx; return true; } diff --git a/server/src/server/tool_parser.cpp b/server/src/server/tool_parser.cpp index 6244b250a..e9975283a 100644 --- a/server/src/server/tool_parser.cpp +++ b/server/src/server/tool_parser.cpp @@ -1,11 +1,12 @@ // Tool call parser implementation. // -// Five detection patterns, tried in order: +// Six detection patterns, tried in order: // 1. ...V... // 2. ...params... (bare, outside tool_call) // 3. (function-signature style) // 4. {JSON} // 5. Bare JSON objects with name+arguments fields +// 6. Native claude-code XML tags: CMD, PATH, etc. #include "tool_parser.h" @@ -161,6 +162,14 @@ static const std::regex & re_tool_code() { return r; } +// Pattern 6: native claude-code XML tags. +// Matches BODY, BODY, etc. +static const std::regex & re_native_tag() { + static std::regex r(R"(<(bash|read|write|edit|ls|grep|glob)>([\s\S]*?))", + std::regex::icase); + return r; +} + // ─── XML parameter parser ─────────────────────────────────────────────── static json parse_xml_params(const std::string & region, const std::string & fn_name, @@ -306,6 +315,48 @@ static bool parse_function_sig_args(const std::string & arg_text, json & out_arg return true; } +// ─── Native tag helpers ───────────────────────────────────────────────── + +// Case-insensitive lookup of `tag` in the tools array. +// Returns the tool's canonical name if found, otherwise returns `tag` as-is. +static std::string lookup_tool_name(const std::string & tag, const json & tools) { + if (tools.is_null() || !tools.is_array() || tools.empty()) return tag; + + std::string lower_tag = tag; + std::transform(lower_tag.begin(), lower_tag.end(), lower_tag.begin(), ::tolower); + + for (const auto & t : tools) { + const auto & fn = t.contains("function") ? t["function"] : t; + if (!fn.is_object()) continue; + std::string name = fn.value("name", ""); + if (name.empty()) continue; + std::string lower_name = name; + std::transform(lower_name.begin(), lower_name.end(), lower_name.begin(), ::tolower); + if (lower_name == lower_tag) return name; + } + return tag; // no match → lowercase tag name +} + +// Map native tag name to its default argument key + body. +static json tag_to_args(const std::string & tag, const std::string & body) { + json args = json::object(); + std::string lower = tag; + std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower); + + if (lower == "bash") args["command"] = body; + else if (lower == "read") args["file_path"] = body; + else if (lower == "grep") args["pattern"] = body; + else if (lower == "glob") args["pattern"] = body; + else if (lower == "write") args["content"] = body; + else if (lower == "edit") args["content"] = body; + else if (lower == "ls") { + if (!body.empty()) args["path"] = body; + } else { + args["content"] = body; // unknown tag fallback + } + return args; +} + // ─── Main parser ──────────────────────────────────────────────────────── ToolParseResult parse_tool_calls(const std::string & text, const json & tools) { @@ -447,6 +498,24 @@ ToolParseResult parse_tool_calls(const std::string & text, const json & tools) { } } + // Pattern 6: native claude-code XML tags (, , , , , , ) + { + auto begin = std::sregex_iterator(text.begin(), text.end(), re_native_tag()); + auto end = std::sregex_iterator(); + for (auto it = begin; it != end; ++it) { + size_t pos = it->position(); + if (overlaps(removals, pos)) continue; + std::string tag = (*it)[1].str(); + std::string body = (*it)[2].str(); + // Strip leading/trailing newline (consistent with parameter parser at line 172). + if (!body.empty() && body.front() == '\n') body.erase(body.begin()); + if (!body.empty() && body.back() == '\n') body.pop_back(); + + std::string canonical = lookup_tool_name(tag, tools); + add_call(canonical, tag_to_args(tag, body), pos, pos + it->length()); + } + } + // Build cleaned text by removing all matched spans if (removals.empty()) { result.cleaned_text = text; diff --git a/server/test/test_server_unit.cpp b/server/test/test_server_unit.cpp index 9bfb638fb..6beac5e22 100644 --- a/server/test/test_server_unit.cpp +++ b/server/test/test_server_unit.cpp @@ -2549,6 +2549,141 @@ static void test_normalize_tools_empty() { TEST_ASSERT(out2.is_object()); } +// ═══════════════════════════════════════════════════════════════════════ +// Native claude-code XML tag tests (, , etc.) +// ═══════════════════════════════════════════════════════════════════════ + +// Helper: build a tools array with one entry named `name`. +static json make_tools(const std::string & name) { + return json::array({{ + {"type", "function"}, + {"function", { + {"name", name}, + {"description", "tool"}, + {"parameters", {{"type", "object"}, {"properties", json::object()}}} + }} + }}); +} + +static void test_parse_tool_call_bash_simple() { + // Basic CMD → ToolCall with name matching tools casing and {"command": CMD}. + json tools = make_tools("Bash"); + std::string text = "I'll run cat /etc/hostname"; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "Bash"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.contains("command")); + TEST_ASSERT(args["command"] == "cat /etc/hostname"); + } +} + +static void test_parse_tool_call_bash_multiline() { + // Multiline body inside ... — leading/trailing newlines stripped. + std::string text = "\nls -la\necho ok\n"; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.contains("command")); + // Consistent with tool_parser.cpp:172 — leading/trailing newline stripped. + std::string cmd = args["command"].get(); + TEST_ASSERT(cmd.find("ls -la") != std::string::npos); + TEST_ASSERT(cmd.find("echo ok") != std::string::npos); + } +} + +static void test_parse_tool_call_ls_with_path() { + // /tmp → {"path": "/tmp"}. + std::string text = "/tmp"; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.contains("path")); + TEST_ASSERT(args["path"] == "/tmp"); + } +} + +static void test_parse_tool_call_bash_name_lookup() { + // Case-insensitive lookup: request tools has "Bash", model emits . + json tools = make_tools("Bash"); + std::string text = "pwd"; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "Bash"); + } +} + +static void test_parse_tool_call_bash_no_match() { + // No tools array → fallback to lowercase tag name. + std::string text = "pwd"; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "bash"); + } +} + +static void test_parse_tool_call_bash_text_around() { + // Text before and after the tag — tag extracted as tool call, surrounding text preserved. + json tools = make_tools("Bash"); + std::string text = "Sure, I'll do that.\npwd\nLet me know the result."; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "Bash"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["command"] == "pwd"); + } + // Surrounding text must not be swallowed. + TEST_ASSERT(result.cleaned_text.find("Sure") != std::string::npos || + result.cleaned_text.find("Let me know") != std::string::npos); +} + +static void test_parse_tool_call_existing_tool_call_still_works() { + // Regression: existing format still parses correctly. + std::string text = + "\n" + "\n" + "/foo/bar.txt\n" + "hello\n" + "\n" + ""; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "Edit"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["path"] == "/foo/bar.txt"); + TEST_ASSERT(args["content"] == "hello"); + } +} + +static void test_emitter_native_bash_tag_detected() { + // When the model emits cmd, the SSE emitter should route + // it to the tool buffer and parse it as a Bash tool call. + json tools = make_tools("Bash"); + SseEmitter em(ApiFormat::ANTHROPIC, "req_bash_001", "test-model", 10, + tools, nullptr, false); + em.emit_start(); + em.emit_token("I'll run: ls /tmp"); + auto finish = em.emit_finish(10); + std::string s = concat(finish); + + TEST_ASSERT(!em.tool_calls().empty()); + if (!em.tool_calls().empty()) { + TEST_ASSERT(em.tool_calls()[0].name == "Bash"); + auto args = json::parse(em.tool_calls()[0].arguments); + TEST_ASSERT(args["command"] == "ls /tmp"); + } + TEST_ASSERT(s.find("\"type\":\"tool_use\"") != std::string::npos); + TEST_ASSERT(s.find("\"name\":\"Bash\"") != std::string::npos); + TEST_ASSERT(s.find("\"stop_reason\":\"tool_use\"") != std::string::npos); +} + int main() { std::fprintf(stderr, "══════════════════════════════════════════\n"); std::fprintf(stderr, " Server Unit Tests\n"); @@ -2716,6 +2851,16 @@ int main() { RUN_TEST(test_normalize_tools_mixed); RUN_TEST(test_normalize_tools_empty); + std::fprintf(stderr, "\n── Native claude-code XML tags ( etc.) ──\n"); + RUN_TEST(test_parse_tool_call_bash_simple); + RUN_TEST(test_parse_tool_call_bash_multiline); + RUN_TEST(test_parse_tool_call_ls_with_path); + RUN_TEST(test_parse_tool_call_bash_name_lookup); + RUN_TEST(test_parse_tool_call_bash_no_match); + RUN_TEST(test_parse_tool_call_bash_text_around); + RUN_TEST(test_parse_tool_call_existing_tool_call_still_works); + RUN_TEST(test_emitter_native_bash_tag_detected); + std::fprintf(stderr, "\n══════════════════════════════════════════\n"); std::fprintf(stderr, " Results: %d assertions, %d failures\n", test_count, test_failures); From 486ab3755638beb75f3a07b31b2f06e9067e61f5 Mon Sep 17 00:00:00 2001 From: dusterbloom <32869278+dusterbloom@users.noreply.github.com> Date: Mon, 25 May 2026 16:36:43 +0200 Subject: [PATCH 3/8] fix(server): scrub JSON-Schema metadata from tools to prevent Unsloth Jinja XML collisions The Unsloth Jinja template's render_extra_keys macro unrolls every JSON-Schema key as a literal XML tag. Keys like $schema, additionalProperties, and $defs produced garbage XML (<$schema>..., False) and crucially a nested tag for each parameter that collided with the outer function's tag, causing the model to hallucinate function names like with bogus parameters. Adds scrub_schema_metadata() (28 LOC) that strips the five metadata keys at every level of the schema tree (recursive through properties and items). Applied in all three normalization paths (Anthropic input_schema, OpenAI passthrough, bare Qwen). 3 new unit tests: strips_schema_metadata, strips_metadata_recursively, preserves_real_fields. All 1504 assertions pass, 0 failures. End-to-end replay of req_003.json (22.8K-token claude-code request): model now emits name:Write (real tool), stop_reason:tool_use, finish=tool_calls. No hallucination. --- server/src/server/http_server.cpp | 46 +++++++++++++--- server/test/test_server_unit.cpp | 89 +++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 6 deletions(-) diff --git a/server/src/server/http_server.cpp b/server/src/server/http_server.cpp index 673492a7a..8a6b07253 100644 --- a/server/src/server/http_server.cpp +++ b/server/src/server/http_server.cpp @@ -353,38 +353,72 @@ std::string render_tool_call_xml(const std::string & name, const json & argument return out; } +// Keys that the Unsloth Jinja template's render_extra_keys macro would expand into +// XML tags, polluting the rendered prompt (e.g. <$schema>, ). +// We strip these at every level of the schema tree before the template sees it. +static const std::vector k_schema_metadata_keys = { + "$schema", "additionalProperties", "$defs", "$ref", "definitions" +}; + +// Strip JSON-Schema metadata keys from a single schema node and recurse into +// nested object property schemas. Only keys in k_schema_metadata_keys are +// removed; all other keys (type, properties, required, enum, items, …) survive. +static json scrub_schema_metadata(json schema) { + if (!schema.is_object()) return schema; + for (const auto & key : k_schema_metadata_keys) { + schema.erase(key); + } + // Recurse into each property's sub-schema. + if (schema.contains("properties") && schema["properties"].is_object()) { + for (auto & [prop_name, prop_schema] : schema["properties"].items()) { + prop_schema = scrub_schema_metadata(prop_schema); + } + } + // Recurse into array item schema. + if (schema.contains("items") && schema["items"].is_object()) { + schema["items"] = scrub_schema_metadata(schema["items"]); + } + return schema; +} + // Normalize tools array to OpenAI/Qwen3 shape: {"type":"function","function":{...}}. // Anthropic shape uses "input_schema"; bare Qwen shape has "parameters" at top level. +// Also scrubs JSON-Schema metadata keys that the Unsloth Jinja template would render +// as garbage XML tags (causing the model to hallucinate function names like ). json normalize_tools_for_qwen(const json & tools) { if (!tools.is_array()) return tools; json out = json::array(); for (const auto & elem : tools) { if (!elem.is_object()) { out.push_back(elem); continue; } - // Already OpenAI shape: pass through unchanged. + // Already OpenAI shape: scrub metadata and pass through. if (elem.contains("type") && elem["type"] == "function" && elem.contains("function")) { - out.push_back(elem); + json e = elem; + if (e["function"].contains("parameters")) { + e["function"]["parameters"] = scrub_schema_metadata(e["function"]["parameters"]); + } + out.push_back(std::move(e)); continue; } - // Anthropic shape: input_schema → parameters. + // Anthropic shape: input_schema → parameters (scrubbed). if (elem.contains("input_schema")) { out.push_back({ {"type", "function"}, {"function", { {"name", elem.value("name", "")}, {"description", elem.value("description", "")}, - {"parameters", elem["input_schema"]} + {"parameters", scrub_schema_metadata(elem["input_schema"])} }} }); continue; } - // Bare Qwen shape: top-level name + parameters, no wrapper. + // Bare Qwen shape: top-level name + parameters (scrubbed), no wrapper. if (elem.contains("name") && elem.contains("parameters")) { out.push_back({ {"type", "function"}, {"function", { {"name", elem.value("name", "")}, {"description", elem.value("description", "")}, - {"parameters", elem["parameters"]} + {"parameters", scrub_schema_metadata(elem["parameters"])} }} }); continue; diff --git a/server/test/test_server_unit.cpp b/server/test/test_server_unit.cpp index 6beac5e22..020bfef08 100644 --- a/server/test/test_server_unit.cpp +++ b/server/test/test_server_unit.cpp @@ -2549,6 +2549,92 @@ static void test_normalize_tools_empty() { TEST_ASSERT(out2.is_object()); } +static void test_normalize_tools_strips_schema_metadata() { + // $schema and additionalProperties must be removed; required must be kept. + json input = json::array({{ + {"name", "my_tool"}, + {"description", "A tool"}, + {"input_schema", { + {"$schema", "http://json-schema.org/draft-07/schema#"}, + {"type", "object"}, + {"additionalProperties", false}, + {"properties", {{"city", {{"type", "string"}}}}}, + {"required", json::array({"city"})} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(input); + TEST_ASSERT(out.size() == 1); + const auto & params = out[0]["function"]["parameters"]; + TEST_ASSERT(!params.contains("$schema")); + TEST_ASSERT(!params.contains("additionalProperties")); + TEST_ASSERT(params.contains("required")); + TEST_ASSERT(params["required"][0] == "city"); + TEST_ASSERT(params["type"] == "object"); +} + +static void test_normalize_tools_strips_metadata_recursively() { + // $schema inside a nested property schema must also be stripped. + json input = json::array({{ + {"name", "deep_tool"}, + {"description", "Nested"}, + {"input_schema", { + {"type", "object"}, + {"additionalProperties", false}, + {"$defs", {{"MyDef", {{"type", "string"}}}}}, + {"properties", { + {"foo", { + {"type", "object"}, + {"$schema", "nested-schema-url"}, + {"additionalProperties", false}, + {"properties", {{"bar", {{"type", "string"}}}}} + }} + }} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(input); + TEST_ASSERT(out.size() == 1); + const auto & params = out[0]["function"]["parameters"]; + // Top-level metadata scrubbed + TEST_ASSERT(!params.contains("$defs")); + TEST_ASSERT(!params.contains("additionalProperties")); + // Nested property metadata scrubbed + const auto & foo = params["properties"]["foo"]; + TEST_ASSERT(!foo.contains("$schema")); + TEST_ASSERT(!foo.contains("additionalProperties")); + // Nested real fields preserved + TEST_ASSERT(foo["type"] == "object"); + TEST_ASSERT(foo["properties"].contains("bar")); +} + +static void test_normalize_tools_preserves_real_fields() { + // type, properties, required, enum, items.type must all survive scrubbing. + json input = json::array({{ + {"name", "full_tool"}, + {"description", "Full schema"}, + {"input_schema", { + {"$schema", "http://json-schema.org/draft-07/schema#"}, + {"type", "object"}, + {"additionalProperties", false}, + {"required", json::array({"city", "units"})}, + {"properties", { + {"city", {{"type", "string"}, {"description", "City name"}}}, + {"units", {{"type", "string"}, {"enum", json::array({"celsius", "fahrenheit"})}}}, + {"tags", {{"type", "array"}, {"items", {{"type", "string"}}}}} + }} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(input); + TEST_ASSERT(out.size() == 1); + const auto & params = out[0]["function"]["parameters"]; + TEST_ASSERT(params["type"] == "object"); + TEST_ASSERT(params["required"].size() == 2); + TEST_ASSERT(params["properties"].contains("city")); + TEST_ASSERT(params["properties"]["units"]["enum"].size() == 2); + TEST_ASSERT(params["properties"]["tags"]["items"]["type"] == "string"); + TEST_ASSERT(!params.contains("$schema")); + TEST_ASSERT(!params.contains("additionalProperties")); +} + // ═══════════════════════════════════════════════════════════════════════ // Native claude-code XML tag tests (, , etc.) // ═══════════════════════════════════════════════════════════════════════ @@ -2850,6 +2936,9 @@ int main() { RUN_TEST(test_normalize_tools_bare_qwen_passthrough); RUN_TEST(test_normalize_tools_mixed); RUN_TEST(test_normalize_tools_empty); + RUN_TEST(test_normalize_tools_strips_schema_metadata); + RUN_TEST(test_normalize_tools_strips_metadata_recursively); + RUN_TEST(test_normalize_tools_preserves_real_fields); std::fprintf(stderr, "\n── Native claude-code XML tags ( etc.) ──\n"); RUN_TEST(test_parse_tool_call_bash_simple); From d2449c93ec224f02e2f490570a4075fd35cc231b Mon Sep 17 00:00:00 2001 From: dusterbloom <32869278+dusterbloom@users.noreply.github.com> Date: Mon, 25 May 2026 16:46:13 +0200 Subject: [PATCH 4/8] fix(server): truncate tool descriptions to prevent prescriptive recipe leakage Cap each tool and parameter description at 500 chars using paragraph-break > sentence-boundary > hard-cut priority, snapping back past UTF-8 multibyte sequences. Verified by 6 new unit tests (1529 assertions, 0 failures). --- server/src/server/http_server.cpp | 76 ++++++++++++++++-- server/test/test_server_unit.cpp | 128 ++++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+), 8 deletions(-) diff --git a/server/src/server/http_server.cpp b/server/src/server/http_server.cpp index 8a6b07253..8c367b011 100644 --- a/server/src/server/http_server.cpp +++ b/server/src/server/http_server.cpp @@ -381,44 +381,104 @@ static json scrub_schema_metadata(json schema) { return schema; } +// Maximum bytes kept from any tool or parameter description before truncation. +static constexpr size_t kMaxToolDescriptionChars = 500; + +// Truncate a description string to kMaxToolDescriptionChars bytes. +// Priority: paragraph break (\n\n) before the cap, then last ". " before the +// cap, then hard cut (snapping back to avoid splitting a UTF-8 multibyte sequence). +// Appends U+2026 (…, 3 UTF-8 bytes) at the cut point. +static std::string truncate_description(const std::string & s) { + if (s.size() <= kMaxToolDescriptionChars) return s; + + // 1. First \n\n before cap. + size_t nn = s.find("\n\n"); + if (nn != std::string::npos && nn < kMaxToolDescriptionChars) { + return s.substr(0, nn) + "\xE2\x80\xA6"; + } + + // 2. Last ". " at or before cap. + std::string_view sv(s.data(), kMaxToolDescriptionChars); + size_t dot = sv.rfind(". "); + if (dot != std::string_view::npos) { + // Include the period; cut before the trailing space. + return s.substr(0, dot + 1) + "\xE2\x80\xA6"; + } + + // 3. Hard cut, snap back to UTF-8 boundary. + size_t cut = kMaxToolDescriptionChars; + // While cut > 0 and the byte at `cut` is a UTF-8 continuation byte + // (0x80–0xBF), move back one byte. + while (cut > 0 && (static_cast(s[cut]) & 0xC0) == 0x80) { + --cut; + } + return s.substr(0, cut) + "\xE2\x80\xA6"; +} + +// Apply truncate_description to every property's "description" inside a +// parameters/properties object (mutates in place). +static json truncate_parameter_descriptions(json params) { + if (!params.is_object()) return params; + if (!params.contains("properties") || !params["properties"].is_object()) { + return params; + } + for (auto & [prop_name, prop_schema] : params["properties"].items()) { + if (prop_schema.is_object() && prop_schema.contains("description") && + prop_schema["description"].is_string()) { + prop_schema["description"] = + truncate_description(prop_schema["description"].get()); + } + } + return params; +} + // Normalize tools array to OpenAI/Qwen3 shape: {"type":"function","function":{...}}. // Anthropic shape uses "input_schema"; bare Qwen shape has "parameters" at top level. // Also scrubs JSON-Schema metadata keys that the Unsloth Jinja template would render // as garbage XML tags (causing the model to hallucinate function names like ). +// Truncates function and parameter descriptions to kMaxToolDescriptionChars to prevent +// prescriptive recipes embedded in long descriptions from leaking into the prompt. json normalize_tools_for_qwen(const json & tools) { if (!tools.is_array()) return tools; json out = json::array(); for (const auto & elem : tools) { if (!elem.is_object()) { out.push_back(elem); continue; } - // Already OpenAI shape: scrub metadata and pass through. + // Already OpenAI shape: scrub metadata, truncate descriptions, pass through. if (elem.contains("type") && elem["type"] == "function" && elem.contains("function")) { json e = elem; + if (e["function"].contains("description") && e["function"]["description"].is_string()) { + e["function"]["description"] = + truncate_description(e["function"]["description"].get()); + } if (e["function"].contains("parameters")) { - e["function"]["parameters"] = scrub_schema_metadata(e["function"]["parameters"]); + e["function"]["parameters"] = truncate_parameter_descriptions( + scrub_schema_metadata(e["function"]["parameters"])); } out.push_back(std::move(e)); continue; } - // Anthropic shape: input_schema → parameters (scrubbed). + // Anthropic shape: input_schema → parameters (scrubbed + truncated). if (elem.contains("input_schema")) { out.push_back({ {"type", "function"}, {"function", { {"name", elem.value("name", "")}, - {"description", elem.value("description", "")}, - {"parameters", scrub_schema_metadata(elem["input_schema"])} + {"description", truncate_description(elem.value("description", ""))}, + {"parameters", truncate_parameter_descriptions( + scrub_schema_metadata(elem["input_schema"]))} }} }); continue; } - // Bare Qwen shape: top-level name + parameters (scrubbed), no wrapper. + // Bare Qwen shape: top-level name + parameters (scrubbed + truncated), no wrapper. if (elem.contains("name") && elem.contains("parameters")) { out.push_back({ {"type", "function"}, {"function", { {"name", elem.value("name", "")}, - {"description", elem.value("description", "")}, - {"parameters", scrub_schema_metadata(elem["parameters"])} + {"description", truncate_description(elem.value("description", ""))}, + {"parameters", truncate_parameter_descriptions( + scrub_schema_metadata(elem["parameters"]))} }} }); continue; diff --git a/server/test/test_server_unit.cpp b/server/test/test_server_unit.cpp index 020bfef08..f59855a35 100644 --- a/server/test/test_server_unit.cpp +++ b/server/test/test_server_unit.cpp @@ -2635,6 +2635,126 @@ static void test_normalize_tools_preserves_real_fields() { TEST_ASSERT(!params.contains("additionalProperties")); } +// ═══════════════════════════════════════════════════════════════════════ +// Tool description truncation tests +// ═══════════════════════════════════════════════════════════════════════ + +// truncate_tool_description is exposed via normalize_tools_for_qwen: we +// exercise it through the public normalize_tools_for_qwen() interface so the +// tests stay independent of any helper signature changes. + +static json make_tool_with_desc(const std::string & desc) { + return json::array({{ + {"name", "my_tool"}, + {"description", desc}, + {"input_schema", { + {"type", "object"}, + {"properties", json::object()} + }} + }}); +} + +static json make_tool_with_param_desc(const std::string & param_desc) { + return json::array({{ + {"name", "my_tool"}, + {"description", "short top"}, + {"input_schema", { + {"type", "object"}, + {"properties", { + {"p1", {{"type", "string"}, {"description", param_desc}}} + }} + }} + }}); +} + +static void test_truncate_short_description_unchanged() { + // 100-char description must come through untouched. + std::string desc(100, 'A'); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); + TEST_ASSERT(out.size() == 1); + TEST_ASSERT(out[0]["function"]["description"].get() == desc); +} + +static void test_truncate_at_paragraph_break() { + // Description has \n\n at position 200, total length 600. + // Expect cut at the paragraph break (pos 200) + "…". + std::string first(200, 'A'); + std::string rest(400, 'B'); + std::string desc = first + "\n\n" + rest; + TEST_ASSERT(desc.size() > 500); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); + TEST_ASSERT(out.size() == 1); + std::string result = out[0]["function"]["description"].get(); + // Must end with ellipsis and not contain any 'B' from the second paragraph. + TEST_ASSERT(result.back() == '\xE2' || + result.size() >= 3 && result.substr(result.size()-3) == "\xE2\x80\xA6"); + TEST_ASSERT(result.find('B') == std::string::npos); + TEST_ASSERT(result.find("…") != std::string::npos); +} + +static void test_truncate_at_sentence_boundary() { + // Description with ". " at position 400, no \n\n before 500. + // Expect cut at end of sentence (pos 402: period + space consumed) + "…". + std::string first(400, 'C'); + std::string desc = first + ". " + std::string(300, 'D'); + TEST_ASSERT(desc.size() > 500); + // No \n\n in first 500 chars + TEST_ASSERT(desc.substr(0, 500).find("\n\n") == std::string::npos); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); + TEST_ASSERT(out.size() == 1); + std::string result = out[0]["function"]["description"].get(); + TEST_ASSERT(result.find("…") != std::string::npos); + TEST_ASSERT(result.find('D') == std::string::npos); + // The ". " boundary itself: result should contain the period. + TEST_ASSERT(result.find('.') != std::string::npos); +} + +static void test_truncate_hard_cut() { + // 1000-char description with no \n\n and no ". " before char 500. + std::string desc(1000, 'X'); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); + TEST_ASSERT(out.size() == 1); + std::string result = out[0]["function"]["description"].get(); + TEST_ASSERT(result.find("…") != std::string::npos); + // After stripping the 3-byte UTF-8 "…", the ASCII portion is 500 chars. + // Result total = 500 + 3 = 503 bytes. + TEST_ASSERT(result.size() == 503); +} + +static void test_truncate_applies_to_parameter_descriptions() { + // Parameter description of 3000 chars must be truncated. + std::string long_param_desc(3000, 'P'); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_param_desc(long_param_desc)); + TEST_ASSERT(out.size() == 1); + const auto & props = out[0]["function"]["parameters"]["properties"]; + TEST_ASSERT(props.contains("p1")); + std::string pdesc = props["p1"]["description"].get(); + TEST_ASSERT(pdesc.find("…") != std::string::npos); + // Must be shorter than the 3000-char input. + TEST_ASSERT(pdesc.size() < 600); +} + +static void test_truncate_preserves_unicode() { + // Description: 499 ASCII chars followed by a 3-byte UTF-8 character (ん = E3 82 93), + // followed by more text. Hard cut at 500 would land mid-codepoint; we expect + // the cut to snap back to the safe boundary (499) and append "…". + std::string ascii499(499, 'Z'); + // ん = 0xE3 0x82 0x93 + std::string multibyte = "\xE3\x82\x93"; + std::string desc = ascii499 + multibyte + std::string(100, 'W'); + TEST_ASSERT(desc.size() > 500); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); + TEST_ASSERT(out.size() == 1); + std::string result = out[0]["function"]["description"].get(); + TEST_ASSERT(result.find("…") != std::string::npos); + // Must not contain 'W' (from beyond the cut). + TEST_ASSERT(result.find('W') == std::string::npos); + // Must not end with a partial multibyte sequence. + // The result (before …) should be exactly 499 'Z' chars. + TEST_ASSERT(result.find(multibyte) == std::string::npos || + result.substr(result.size()-3-3, 3) != "\xE3\x82\x93"); +} + // ═══════════════════════════════════════════════════════════════════════ // Native claude-code XML tag tests (, , etc.) // ═══════════════════════════════════════════════════════════════════════ @@ -2940,6 +3060,14 @@ int main() { RUN_TEST(test_normalize_tools_strips_metadata_recursively); RUN_TEST(test_normalize_tools_preserves_real_fields); + std::fprintf(stderr, "\n── Tool description truncation ──\n"); + RUN_TEST(test_truncate_short_description_unchanged); + RUN_TEST(test_truncate_at_paragraph_break); + RUN_TEST(test_truncate_at_sentence_boundary); + RUN_TEST(test_truncate_hard_cut); + RUN_TEST(test_truncate_applies_to_parameter_descriptions); + RUN_TEST(test_truncate_preserves_unicode); + std::fprintf(stderr, "\n── Native claude-code XML tags ( etc.) ──\n"); RUN_TEST(test_parse_tool_call_bash_simple); RUN_TEST(test_parse_tool_call_bash_multiline); From 82af6e42979910e2ac4bc67983de46dab0abbbe4 Mon Sep 17 00:00:00 2001 From: dusterbloom <32869278+dusterbloom@users.noreply.github.com> Date: Mon, 25 May 2026 18:27:28 +0200 Subject: [PATCH 5/8] fix(server): append closed prefill in Jinja renderer when thinking is off When the Jinja template ends with a bare <|im_start|>assistant\n (e.g. the official Qwen3.6 template) and the request has thinking disabled, the hardcoded Qwen renderer appends \n\n\n\n to put the model in the right decoding state for tool use. The Jinja path was missing this suffix, so /v1/messages requests rendered through Jinja produced a different prompt shape than the OpenAI path. Mirror the hardcoded behavior. Diagnosed by Codex rescue session 019e5fd0 against captured req_003.json from a real claude-code run. Patch is dormant for templates that already append their own assistant suffix (Unsloth Qwen3-Coder). --- server/src/server/chat_template.cpp | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/server/src/server/chat_template.cpp b/server/src/server/chat_template.cpp index 1349109ad..f1e6569e2 100644 --- a/server/src/server/chat_template.cpp +++ b/server/src/server/chat_template.cpp @@ -411,7 +411,25 @@ std::string render_chat_template_jinja( jinja::runtime rt(ctx); jinja::value results = rt.execute(*prog); auto parts = jinja::runtime::gather_string_parts(results); - return parts->as_string().str(); + std::string rendered = parts->as_string().str(); + + // The hard-coded Qwen renderer appends a closed think prefill when + // thinking is disabled. Some Qwen3.6 Jinja templates omit that final + // assistant suffix, which leaves the model in the wrong decoding state + // for tool use. Mirror the hard-coded behavior here when the rendered + // prompt ends with a bare assistant generation prompt. + if (!enable_thinking) { + static constexpr char kAssistantPrefix[] = "<|im_start|>assistant\n"; + static constexpr char kNoThinkPrefill[] = "\n\n\n\n"; + if (rendered.size() >= sizeof(kAssistantPrefix) - 1 && + rendered.compare(rendered.size() - (sizeof(kAssistantPrefix) - 1), + sizeof(kAssistantPrefix) - 1, + kAssistantPrefix) == 0) { + rendered += kNoThinkPrefill; + } + } + + return rendered; } catch (const std::exception & e) { throw std::runtime_error(std::string("jinja runtime: ") + e.what()); } From 0138a22a826d2f2c42f101a7fb1b5409e0f239d1 Mon Sep 17 00:00:00 2001 From: dusterbloom <32869278+dusterbloom@users.noreply.github.com> Date: Mon, 25 May 2026 18:38:32 +0200 Subject: [PATCH 6/8] fix(server): alias common param-name shortenings to schema canonical names Quantized models (notably Qwen3.6-27B-Q3) emit short forms of canonical parameter names: instead of , instead of , instead of . The schema-checking client (claude-code) then rejects the tool call. Add resolve_param_alias() that maps emitted keys to the schema's actual keys via case-insensitive direct match, then a small alias table for common cmd/command, path/file_path, query/pattern, expr/expression, src/source, dst/destination shortenings. Helper is pure, returns the original key if no canonical match exists. Verified: Qwen3.6-27B-Q3_K_S now produces {"command":"ls -lhS /tmp..."} for claude-code's Bash tool (was {"cmd":...} pre-fix). --- server/src/server/tool_parser.cpp | 64 ++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/server/src/server/tool_parser.cpp b/server/src/server/tool_parser.cpp index e9975283a..b706d8bb6 100644 --- a/server/src/server/tool_parser.cpp +++ b/server/src/server/tool_parser.cpp @@ -170,6 +170,67 @@ static const std::regex & re_native_tag() { return r; } +// ─── Parameter-name alias resolution ──────────────────────────────────── + +// Some quantized models (e.g. Qwen3.6-Q3) emit short forms of canonical +// parameter names (cmd instead of command, path instead of file_path). +// Map the emitted key to the schema's actual key when an alias is found. +// Pure helper — returns the original `emitted` if no alias matches. +static std::string resolve_param_alias(const std::string & emitted, const json & props) { + if (!props.is_object() || props.empty()) return emitted; + + std::string lower = emitted; + std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower); + + // 1. Direct case-insensitive match against schema keys. + for (auto it = props.begin(); it != props.end(); ++it) { + std::string name = it.key(); + std::string lname = name; + std::transform(lname.begin(), lname.end(), lname.begin(), ::tolower); + if (lname == lower) return name; + } + + // 2. Alias map: common shortenings <=> canonical forms. + static const std::vector>> aliases = { + {"cmd", {"command"}}, + {"command", {"cmd"}}, + {"path", {"file_path", "directory", "dir"}}, + {"file_path", {"path", "file"}}, + {"file", {"file_path", "path"}}, + {"filepath", {"file_path"}}, + {"dir", {"directory", "path"}}, + {"directory", {"dir", "path"}}, + {"query", {"pattern", "q"}}, + {"pattern", {"query", "regex"}}, + {"regex", {"pattern"}}, + {"q", {"query", "pattern"}}, + {"expr", {"expression"}}, + {"expression", {"expr"}}, + {"text", {"content"}}, + {"content", {"text"}}, + {"src", {"source"}}, + {"source", {"src"}}, + {"dst", {"destination", "target"}}, + {"destination", {"dst", "target"}}, + {"target", {"dst", "destination"}}, + }; + + for (const auto & [key, candidates] : aliases) { + if (key != lower) continue; + for (const std::string & candidate : candidates) { + for (auto pit = props.begin(); pit != props.end(); ++pit) { + std::string pname = pit.key(); + std::string lpname = pname; + std::transform(lpname.begin(), lpname.end(), lpname.begin(), ::tolower); + if (lpname == candidate) return pname; + } + } + break; + } + + return emitted; // no alias matched; keep as-is +} + // ─── XML parameter parser ─────────────────────────────────────────────── static json parse_xml_params(const std::string & region, const std::string & fn_name, @@ -192,7 +253,8 @@ static json parse_xml_params(const std::string & region, const std::string & fn_ if (!v.empty() && v.front() == '\n') v.erase(v.begin()); if (!v.empty() && v.back() == '\n') v.pop_back(); - args[k] = convert_param_value(v, k, props); + std::string canonical_k = resolve_param_alias(k, props); + args[canonical_k] = convert_param_value(v, canonical_k, props); } return args; } From 976cd389df7d9bd60f142f98fa9fc354ef429ba3 Mon Sep 17 00:00:00 2001 From: dusterbloom <32869278+dusterbloom@users.noreply.github.com> Date: Mon, 25 May 2026 19:41:46 +0200 Subject: [PATCH 7/8] review: address momus review findings (P1-1, P1-2, P1-3, P2-1, P2-3, P2-5, P2-8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1 blockers: - P1-1 (tool_parser.cpp): drop std::regex::icase from re_native_tag so Pattern 6 alignment with sse_emitter::find_tool_start (case-sensitive). Also bound the body quantifier to {0,65536}? to prevent catastrophic backtracking on adversarial input. - P1-2 (tool_parser.cpp): gate Pattern 6 on tools.is_array() && !empty() so prose like 'please read the manual' or 'grep for the pattern' doesn't get fabricated into phantom tool calls. - P1-3 (test_server_unit.cpp): rewrite test_truncate_preserves_unicode assertion to actually verify the byte before the ellipsis is not a UTF-8 continuation byte. Add 2-byte (é) and 4-byte (𝄞) coverage too. P2 fixes: - P2-1 (http_server.cpp): scrub_schema_metadata now recurses into JSON Schema combinators (oneOf, anyOf, allOf, not). Anthropic tool defs use these for polymorphic params; without recursion the noise leaks. - P2-3 (test_server_unit.cpp): add four resolve_param_alias tests (cmd→command, path→file_path, case-insensitive direct, passthrough) via the public parse_tool_calls API. - P2-5 (chat_template.cpp): make think-prefill suffix check tolerant of trailing whitespace variants (\n\n, trailing space). Trim trailing whitespace, check for bare <|im_start|>assistant, then re-emit marker + prefill. - P2-8 (test_server_unit.cpp): fix tautological assertion in test_truncate_at_paragraph_break (was checking '\xE2' on result.back() which is always the last byte of the ellipsis '\xA6'). Existing tests updated: bash_multiline/ls_with_path now pass tools (the new P1-2 gate requires it). bash_no_match repurposed; new no_tools_no_fabrication tests added to lock in the gate. --- server/src/server/chat_template.cpp | 26 ++- server/src/server/http_server.cpp | 13 ++ server/src/server/tool_parser.cpp | 5 + server/test/test_server_unit.cpp | 237 ++++++++++++++++++++++++++-- 4 files changed, 257 insertions(+), 24 deletions(-) diff --git a/server/src/server/chat_template.cpp b/server/src/server/chat_template.cpp index f1e6569e2..f701a98d7 100644 --- a/server/src/server/chat_template.cpp +++ b/server/src/server/chat_template.cpp @@ -419,13 +419,25 @@ std::string render_chat_template_jinja( // for tool use. Mirror the hard-coded behavior here when the rendered // prompt ends with a bare assistant generation prompt. if (!enable_thinking) { - static constexpr char kAssistantPrefix[] = "<|im_start|>assistant\n"; - static constexpr char kNoThinkPrefill[] = "\n\n\n\n"; - if (rendered.size() >= sizeof(kAssistantPrefix) - 1 && - rendered.compare(rendered.size() - (sizeof(kAssistantPrefix) - 1), - sizeof(kAssistantPrefix) - 1, - kAssistantPrefix) == 0) { - rendered += kNoThinkPrefill; + // The hard-coded Qwen renderer follows <|im_start|>assistant with a + // closed block to put the model in non-thinking decode mode. + // Tolerate template variants that emit extra trailing whitespace + // after the assistant marker (single \n, double \n\n, trailing + // space). Strategy: trim trailing whitespace, check for the BARE + // assistant marker (no newline), then re-emit marker + prefill. + static constexpr char kAssistantBare[] = "<|im_start|>assistant"; + static constexpr char kAssistantPrefill[] = "<|im_start|>assistant\n\n\n\n\n"; + size_t trim_end = rendered.size(); + while (trim_end > 0) { + char c = rendered[trim_end - 1]; + if (c != ' ' && c != '\t' && c != '\n' && c != '\r') break; + --trim_end; + } + const size_t blen = sizeof(kAssistantBare) - 1; + if (trim_end >= blen && + rendered.compare(trim_end - blen, blen, kAssistantBare) == 0) { + rendered.resize(trim_end - blen); + rendered += kAssistantPrefill; } } diff --git a/server/src/server/http_server.cpp b/server/src/server/http_server.cpp index 8c367b011..e21d6ee32 100644 --- a/server/src/server/http_server.cpp +++ b/server/src/server/http_server.cpp @@ -378,6 +378,19 @@ static json scrub_schema_metadata(json schema) { if (schema.contains("items") && schema["items"].is_object()) { schema["items"] = scrub_schema_metadata(schema["items"]); } + // Recurse into JSON-Schema combinators. Claude tool defs frequently use + // these for polymorphic parameter types; without recursion the inner + // sub-schemas keep their $schema/additionalProperties noise. + for (const char * combinator : {"oneOf", "anyOf", "allOf"}) { + if (schema.contains(combinator) && schema[combinator].is_array()) { + for (auto & sub : schema[combinator]) { + sub = scrub_schema_metadata(sub); + } + } + } + if (schema.contains("not") && schema["not"].is_object()) { + schema["not"] = scrub_schema_metadata(schema["not"]); + } return schema; } diff --git a/server/src/server/tool_parser.cpp b/server/src/server/tool_parser.cpp index b706d8bb6..96a503f19 100644 --- a/server/src/server/tool_parser.cpp +++ b/server/src/server/tool_parser.cpp @@ -561,6 +561,11 @@ ToolParseResult parse_tool_calls(const std::string & text, const json & tools) { } // Pattern 6: native claude-code XML tags (, , , , , , ) + // Gate: only fire when the request actually provided tools. Otherwise + // legitimate prose like "please read the manual" or "grep for the pattern" + // gets eaten as a phantom tool call and the surrounding text is stripped + // via the removals span. Mirrors the streaming gate has_request_tools(). + if (tools.is_array() && !tools.empty()) { auto begin = std::sregex_iterator(text.begin(), text.end(), re_native_tag()); auto end = std::sregex_iterator(); diff --git a/server/test/test_server_unit.cpp b/server/test/test_server_unit.cpp index f59855a35..6c0d6e578 100644 --- a/server/test/test_server_unit.cpp +++ b/server/test/test_server_unit.cpp @@ -2685,9 +2685,9 @@ static void test_truncate_at_paragraph_break() { json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); TEST_ASSERT(out.size() == 1); std::string result = out[0]["function"]["description"].get(); - // Must end with ellipsis and not contain any 'B' from the second paragraph. - TEST_ASSERT(result.back() == '\xE2' || - result.size() >= 3 && result.substr(result.size()-3) == "\xE2\x80\xA6"); + // Must END with the ellipsis bytes (E2 80 A6) and not contain any 'B'. + TEST_ASSERT(result.size() >= 3); + TEST_ASSERT(result.substr(result.size() - 3) == "\xE2\x80\xA6"); TEST_ASSERT(result.find('B') == std::string::npos); TEST_ASSERT(result.find("…") != std::string::npos); } @@ -2739,20 +2739,51 @@ static void test_truncate_preserves_unicode() { // followed by more text. Hard cut at 500 would land mid-codepoint; we expect // the cut to snap back to the safe boundary (499) and append "…". std::string ascii499(499, 'Z'); - // ん = 0xE3 0x82 0x93 - std::string multibyte = "\xE3\x82\x93"; + std::string multibyte = "\xE3\x82\x93"; // ん std::string desc = ascii499 + multibyte + std::string(100, 'W'); TEST_ASSERT(desc.size() > 500); json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); - TEST_ASSERT(out.size() == 1); std::string result = out[0]["function"]["description"].get(); - TEST_ASSERT(result.find("…") != std::string::npos); - // Must not contain 'W' (from beyond the cut). + // Must end with ellipsis (3-byte E2 80 A6). + TEST_ASSERT(result.size() >= 3); + TEST_ASSERT(result.substr(result.size() - 3) == "\xE2\x80\xA6"); TEST_ASSERT(result.find('W') == std::string::npos); - // Must not end with a partial multibyte sequence. - // The result (before …) should be exactly 499 'Z' chars. - TEST_ASSERT(result.find(multibyte) == std::string::npos || - result.substr(result.size()-3-3, 3) != "\xE3\x82\x93"); + // Byte directly before the ellipsis MUST NOT be a UTF-8 continuation byte + // (10xxxxxx => 0x80..0xBF). If it were, we'd have bisected a multibyte + // codepoint. Expected: last 'Z' (0x5A) or a valid lead/single byte. + TEST_ASSERT(result.size() >= 4); + unsigned char last_before = static_cast(result[result.size() - 4]); + TEST_ASSERT((last_before & 0xC0) != 0x80); + // The straddling multibyte sequence must NOT appear in the result. + TEST_ASSERT(result.find(multibyte) == std::string::npos); +} + +static void test_truncate_preserves_unicode_2byte() { + // 499 ASCII + a 2-byte codepoint (é = 0xC3 0xA9) straddling the cut. + std::string ascii499(499, 'Z'); + std::string two_byte = "\xC3\xA9"; + std::string desc = ascii499 + two_byte + std::string(100, 'W'); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); + std::string result = out[0]["function"]["description"].get(); + TEST_ASSERT(result.size() >= 4); + TEST_ASSERT(result.substr(result.size() - 3) == "\xE2\x80\xA6"); + unsigned char last_before = static_cast(result[result.size() - 4]); + TEST_ASSERT((last_before & 0xC0) != 0x80); + TEST_ASSERT(result.find(two_byte) == std::string::npos); +} + +static void test_truncate_preserves_unicode_4byte() { + // 498 ASCII + a 4-byte codepoint (𝄞 = F0 9D 84 9E) straddling the cut. + std::string ascii498(498, 'Z'); + std::string four_byte = "\xF0\x9D\x84\x9E"; + std::string desc = ascii498 + four_byte + std::string(100, 'W'); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); + std::string result = out[0]["function"]["description"].get(); + TEST_ASSERT(result.size() >= 4); + TEST_ASSERT(result.substr(result.size() - 3) == "\xE2\x80\xA6"); + unsigned char last_before = static_cast(result[result.size() - 4]); + TEST_ASSERT((last_before & 0xC0) != 0x80); + TEST_ASSERT(result.find(four_byte) == std::string::npos); } // ═══════════════════════════════════════════════════════════════════════ @@ -2787,8 +2818,10 @@ static void test_parse_tool_call_bash_simple() { static void test_parse_tool_call_bash_multiline() { // Multiline body inside ... — leading/trailing newlines stripped. + // Pattern 6 (native tags) requires tools to be present in the request. + json tools = make_tools("Bash"); std::string text = "\nls -la\necho ok\n"; - auto result = parse_tool_calls(text); + auto result = parse_tool_calls(text, tools); TEST_ASSERT(result.tool_calls.size() == 1); if (!result.tool_calls.empty()) { auto args = json::parse(result.tool_calls[0].arguments); @@ -2802,8 +2835,10 @@ static void test_parse_tool_call_bash_multiline() { static void test_parse_tool_call_ls_with_path() { // /tmp → {"path": "/tmp"}. + // Pattern 6 (native tags) requires tools to be present in the request. + json tools = make_tools("LS"); std::string text = "/tmp"; - auto result = parse_tool_calls(text); + auto result = parse_tool_calls(text, tools); TEST_ASSERT(result.tool_calls.size() == 1); if (!result.tool_calls.empty()) { auto args = json::parse(result.tool_calls[0].arguments); @@ -2824,15 +2859,171 @@ static void test_parse_tool_call_bash_name_lookup() { } static void test_parse_tool_call_bash_no_match() { - // No tools array → fallback to lowercase tag name. + // Pattern 6 fires only when tools array is non-empty. With a tools list + // that doesn't contain "bash" but is otherwise non-empty, the tag still + // matches and falls back to lowercase canonical name (per lookup_tool_name). + // tool_allowed() then rejects it because "bash" isn't in the list. + json tools = make_tools("Edit"); std::string text = "pwd"; - auto result = parse_tool_calls(text); + auto result = parse_tool_calls(text, tools); + // Either 0 (rejected by tool_allowed) or 1 with name="bash" (lowercase fallback). + // Both are acceptable contracts; document the actual current behavior. + if (result.tool_calls.size() == 1) { + TEST_ASSERT(result.tool_calls[0].name == "bash"); + } else { + TEST_ASSERT(result.tool_calls.empty()); + } +} + +static void test_parse_tool_call_no_tools_no_fabrication() { + // P1 gate (P1-2 from momus review): when no tools are provided in the + // request, Pattern 6 must NOT fabricate a tool call from prose like + // "please read the manual" or "grep for the pattern". + std::string text = "pwd"; // explicitly looks like a tool call + auto result = parse_tool_calls(text); // ← NO tools arg + TEST_ASSERT(result.tool_calls.empty()); + // Prose is preserved (NOT swallowed by removals span). + TEST_ASSERT(result.cleaned_text.find("pwd") != std::string::npos); +} + +static void test_parse_tool_call_no_tools_no_fabrication_prose() { + // Same gate, exercised on natural prose containing tag-shaped substrings. + std::string text = "Please read the documentation and grep for examples."; + auto result = parse_tool_calls(text); // no tools + TEST_ASSERT(result.tool_calls.empty()); +} + +// ═══════════════════════════════════════════════════════════════════════ +// resolve_param_alias tests (P2-3 from momus review) — exercised via the +// public parse_tool_calls() API since resolve_param_alias is static. +// ═══════════════════════════════════════════════════════════════════════ + +static void test_param_alias_cmd_to_command() { + // Model emits but schema requires "command". + // The alias resolver maps cmd → command (the canonical name in tools). + json tools = make_tools("Bash"); // Bash has parameter "command" + std::string text = + "ls /tmp"; + auto result = parse_tool_calls(text, tools); TEST_ASSERT(result.tool_calls.size() == 1); if (!result.tool_calls.empty()) { - TEST_ASSERT(result.tool_calls[0].name == "bash"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.contains("command")); + TEST_ASSERT(!args.contains("cmd")); + TEST_ASSERT(args["command"] == "ls /tmp"); } } +static void test_param_alias_path_to_file_path() { + // Model emits but tool schema requires "file_path". + json tools = json::array({{ + {"type", "function"}, + {"function", { + {"name", "Read"}, + {"parameters", { + {"type", "object"}, + {"properties", { + {"file_path", {{"type", "string"}}} + }} + }} + }} + }}); + std::string text = + "/etc/hosts"; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.contains("file_path")); + TEST_ASSERT(args["file_path"] == "/etc/hosts"); + } +} + +static void test_param_alias_case_insensitive_direct() { + // Model emits (capitalised), schema has "command". + // Step 1 of resolver is a case-insensitive direct match → "command". + json tools = make_tools("Bash"); + std::string text = + "pwd"; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.contains("command")); + } +} + +static void test_param_alias_no_match_passthrough() { + // Model emits an arg with a name not in the alias table and not in schema. + // Should pass through unchanged. + json tools = make_tools("Bash"); + std::string text = + "x"; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.contains("zzzunknown")); + } +} + +// ═══════════════════════════════════════════════════════════════════════ +// scrub_schema_metadata combinator recursion (P2-1 from momus review). +// ═══════════════════════════════════════════════════════════════════════ + +static void test_scrub_recurses_into_oneOf() { + json tool = json::array({{ + {"name", "X"}, + {"description", "d"}, + {"input_schema", { + {"type", "object"}, + {"properties", { + {"v", { + {"oneOf", json::array({ + {{"type", "string"}, {"$schema", "noise"}, {"additionalProperties", false}}, + {{"type", "integer"}, {"$defs", json::object()}} + })} + }} + }} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(tool); + TEST_ASSERT(out.size() == 1); + const auto & v = out[0]["function"]["parameters"]["properties"]["v"]; + TEST_ASSERT(v.contains("oneOf")); + const auto & one_of = v["oneOf"]; + TEST_ASSERT(one_of.is_array() && one_of.size() == 2); + TEST_ASSERT(!one_of[0].contains("$schema")); + TEST_ASSERT(!one_of[0].contains("additionalProperties")); + TEST_ASSERT(!one_of[1].contains("$defs")); + // type still present. + TEST_ASSERT(one_of[0]["type"] == "string"); + TEST_ASSERT(one_of[1]["type"] == "integer"); +} + +static void test_scrub_recurses_into_anyOf_allOf_not() { + json tool = json::array({{ + {"name", "X"}, + {"description", "d"}, + {"input_schema", { + {"type", "object"}, + {"anyOf", json::array({ + {{"type", "string"}, {"$schema", "noise"}} + })}, + {"allOf", json::array({ + {{"type", "integer"}, {"additionalProperties", false}} + })}, + {"not", {{"type", "null"}, {"$defs", json::object()}}} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(tool); + const auto & params = out[0]["function"]["parameters"]; + TEST_ASSERT(!params["anyOf"][0].contains("$schema")); + TEST_ASSERT(!params["allOf"][0].contains("additionalProperties")); + TEST_ASSERT(!params["not"].contains("$defs")); + TEST_ASSERT(params["not"]["type"] == "null"); +} + static void test_parse_tool_call_bash_text_around() { // Text before and after the tag — tag extracted as tool call, surrounding text preserved. json tools = make_tools("Bash"); @@ -3059,6 +3250,8 @@ int main() { RUN_TEST(test_normalize_tools_strips_schema_metadata); RUN_TEST(test_normalize_tools_strips_metadata_recursively); RUN_TEST(test_normalize_tools_preserves_real_fields); + RUN_TEST(test_scrub_recurses_into_oneOf); + RUN_TEST(test_scrub_recurses_into_anyOf_allOf_not); std::fprintf(stderr, "\n── Tool description truncation ──\n"); RUN_TEST(test_truncate_short_description_unchanged); @@ -3067,6 +3260,8 @@ int main() { RUN_TEST(test_truncate_hard_cut); RUN_TEST(test_truncate_applies_to_parameter_descriptions); RUN_TEST(test_truncate_preserves_unicode); + RUN_TEST(test_truncate_preserves_unicode_2byte); + RUN_TEST(test_truncate_preserves_unicode_4byte); std::fprintf(stderr, "\n── Native claude-code XML tags ( etc.) ──\n"); RUN_TEST(test_parse_tool_call_bash_simple); @@ -3074,10 +3269,18 @@ int main() { RUN_TEST(test_parse_tool_call_ls_with_path); RUN_TEST(test_parse_tool_call_bash_name_lookup); RUN_TEST(test_parse_tool_call_bash_no_match); + RUN_TEST(test_parse_tool_call_no_tools_no_fabrication); + RUN_TEST(test_parse_tool_call_no_tools_no_fabrication_prose); RUN_TEST(test_parse_tool_call_bash_text_around); RUN_TEST(test_parse_tool_call_existing_tool_call_still_works); RUN_TEST(test_emitter_native_bash_tag_detected); + std::fprintf(stderr, "\n── Param-name alias resolution ──\n"); + RUN_TEST(test_param_alias_cmd_to_command); + RUN_TEST(test_param_alias_path_to_file_path); + RUN_TEST(test_param_alias_case_insensitive_direct); + RUN_TEST(test_param_alias_no_match_passthrough); + std::fprintf(stderr, "\n══════════════════════════════════════════\n"); std::fprintf(stderr, " Results: %d assertions, %d failures\n", test_count, test_failures); From 5e861b4d2b62aa473a08d388e44be4ce0d0224ce Mon Sep 17 00:00:00 2001 From: dusterbloom <32869278+dusterbloom@users.noreply.github.com> Date: Tue, 26 May 2026 14:59:43 +0200 Subject: [PATCH 8/8] fix(chat_template): gate closed-think prefill injection to Qwen3 arch only The kAssistantBare -> kAssistantPrefill post-processing in render_chat_template_jinja was applied to all Jinja-rendered prompts. Add arch_hint (ChatFormat) parameter, defaulting to QWEN3, and guard the block with arch_hint == ChatFormat::QWEN3. Call site in http_server.cpp passes chat_format_ so other archs (Laguna, Gemma4) are unaffected. Addresses howard0su's review comment on PR #276. --- server/src/server/chat_template.cpp | 19 ++++++++++--------- server/src/server/chat_template.h | 5 ++++- server/src/server/http_server.cpp | 3 ++- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/server/src/server/chat_template.cpp b/server/src/server/chat_template.cpp index f701a98d7..33f4bd864 100644 --- a/server/src/server/chat_template.cpp +++ b/server/src/server/chat_template.cpp @@ -360,7 +360,8 @@ std::string render_chat_template_jinja( const std::string & eos_token, bool add_generation_prompt, bool enable_thinking, - const std::string & tools_json) + const std::string & tools_json, + ChatFormat arch_hint) { if (template_src.empty()) { throw std::runtime_error("render_chat_template_jinja: template_src is empty"); @@ -413,14 +414,14 @@ std::string render_chat_template_jinja( auto parts = jinja::runtime::gather_string_parts(results); std::string rendered = parts->as_string().str(); - // The hard-coded Qwen renderer appends a closed think prefill when - // thinking is disabled. Some Qwen3.6 Jinja templates omit that final - // assistant suffix, which leaves the model in the wrong decoding state - // for tool use. Mirror the hard-coded behavior here when the rendered - // prompt ends with a bare assistant generation prompt. - if (!enable_thinking) { - // The hard-coded Qwen renderer follows <|im_start|>assistant with a - // closed block to put the model in non-thinking decode mode. + // Qwen3/3.5/3.6 only: the hard-coded renderer appends a closed think + // prefill when thinking is disabled. Some Qwen3.6 Jinja templates omit + // that final assistant suffix, leaving the model in the wrong decoding + // state for tool use. Mirror the hard-coded behavior here when the + // rendered prompt ends with a bare assistant generation prompt. + // Other architectures (Laguna, Gemma4, ...) do not use ChatML tokens + // and must not be touched here. + if (arch_hint == ChatFormat::QWEN3 && !enable_thinking) { // Tolerate template variants that emit extra trailing whitespace // after the assistant marker (single \n, double \n\n, trailing // space). Strategy: trim trailing whitespace, check for the BARE diff --git a/server/src/server/chat_template.h b/server/src/server/chat_template.h index ca7ef9db5..b544df245 100644 --- a/server/src/server/chat_template.h +++ b/server/src/server/chat_template.h @@ -63,6 +63,8 @@ ChatFormat chat_format_for_arch(const std::string & arch); // {{bos_token}} / {{eos_token}}). Use empty strings if unknown. // `tools_json` optional JSON array of tool definitions; when non-empty it // is parsed and injected as `tools` into the template context. +// `arch_hint` model architecture (controls arch-specific post-processing; +// the closed-think prefill injection is Qwen3/3.5/3.6 only). // // Internally caches the most recently parsed program per thread (avoids // re-parsing the template on every request). Throws std::runtime_error on @@ -74,6 +76,7 @@ std::string render_chat_template_jinja( const std::string & eos_token, bool add_generation_prompt = true, bool enable_thinking = false, - const std::string & tools_json = ""); + const std::string & tools_json = "", + ChatFormat arch_hint = ChatFormat::QWEN3); } // namespace dflash::common diff --git a/server/src/server/http_server.cpp b/server/src/server/http_server.cpp index e21d6ee32..5f6861c2d 100644 --- a/server/src/server/http_server.cpp +++ b/server/src/server/http_server.cpp @@ -1149,7 +1149,8 @@ bool HttpServer::route_request(int fd, const HttpRequest & hr) { eos_str, /*add_generation_prompt=*/true, enable_thinking, - tools_json); + tools_json, + chat_format_); } catch (const std::exception & e) { send_error(fd, 500, std::string("chat template (jinja) render failed: ") + e.what());