diff --git a/server/src/server/chat_template.cpp b/server/src/server/chat_template.cpp index 1349109ad..33f4bd864 100644 --- a/server/src/server/chat_template.cpp +++ b/server/src/server/chat_template.cpp @@ -360,7 +360,8 @@ std::string render_chat_template_jinja( const std::string & eos_token, bool add_generation_prompt, bool enable_thinking, - const std::string & tools_json) + const std::string & tools_json, + ChatFormat arch_hint) { if (template_src.empty()) { throw std::runtime_error("render_chat_template_jinja: template_src is empty"); @@ -411,7 +412,37 @@ std::string render_chat_template_jinja( jinja::runtime rt(ctx); jinja::value results = rt.execute(*prog); auto parts = jinja::runtime::gather_string_parts(results); - return parts->as_string().str(); + std::string rendered = parts->as_string().str(); + + // Qwen3/3.5/3.6 only: the hard-coded renderer appends a closed think + // prefill when thinking is disabled. Some Qwen3.6 Jinja templates omit + // that final assistant suffix, leaving the model in the wrong decoding + // state for tool use. Mirror the hard-coded behavior here when the + // rendered prompt ends with a bare assistant generation prompt. + // Other architectures (Laguna, Gemma4, ...) do not use ChatML tokens + // and must not be touched here. + if (arch_hint == ChatFormat::QWEN3 && !enable_thinking) { + // Tolerate template variants that emit extra trailing whitespace + // after the assistant marker (single \n, double \n\n, trailing + // space). Strategy: trim trailing whitespace, check for the BARE + // assistant marker (no newline), then re-emit marker + prefill. + static constexpr char kAssistantBare[] = "<|im_start|>assistant"; + static constexpr char kAssistantPrefill[] = "<|im_start|>assistant\n\n\n\n\n"; + size_t trim_end = rendered.size(); + while (trim_end > 0) { + char c = rendered[trim_end - 1]; + if (c != ' ' && c != '\t' && c != '\n' && c != '\r') break; + --trim_end; + } + const size_t blen = sizeof(kAssistantBare) - 1; + if (trim_end >= blen && + rendered.compare(trim_end - blen, blen, kAssistantBare) == 0) { + rendered.resize(trim_end - blen); + rendered += kAssistantPrefill; + } + } + + return rendered; } catch (const std::exception & e) { throw std::runtime_error(std::string("jinja runtime: ") + e.what()); } diff --git a/server/src/server/chat_template.h b/server/src/server/chat_template.h index ca7ef9db5..b544df245 100644 --- a/server/src/server/chat_template.h +++ b/server/src/server/chat_template.h @@ -63,6 +63,8 @@ ChatFormat chat_format_for_arch(const std::string & arch); // {{bos_token}} / {{eos_token}}). Use empty strings if unknown. // `tools_json` optional JSON array of tool definitions; when non-empty it // is parsed and injected as `tools` into the template context. +// `arch_hint` model architecture (controls arch-specific post-processing; +// the closed-think prefill injection is Qwen3/3.5/3.6 only). // // Internally caches the most recently parsed program per thread (avoids // re-parsing the template on every request). Throws std::runtime_error on @@ -74,6 +76,7 @@ std::string render_chat_template_jinja( const std::string & eos_token, bool add_generation_prompt = true, bool enable_thinking = false, - const std::string & tools_json = ""); + const std::string & tools_json = "", + ChatFormat arch_hint = ChatFormat::QWEN3); } // namespace dflash::common diff --git a/server/src/server/http_server.cpp b/server/src/server/http_server.cpp index ab37805bf..5f6861c2d 100644 --- a/server/src/server/http_server.cpp +++ b/server/src/server/http_server.cpp @@ -353,6 +353,155 @@ std::string render_tool_call_xml(const std::string & name, const json & argument return out; } +// Keys that the Unsloth Jinja template's render_extra_keys macro would expand into +// XML tags, polluting the rendered prompt (e.g. <$schema>, ). +// We strip these at every level of the schema tree before the template sees it. +static const std::vector k_schema_metadata_keys = { + "$schema", "additionalProperties", "$defs", "$ref", "definitions" +}; + +// Strip JSON-Schema metadata keys from a single schema node and recurse into +// nested object property schemas. Only keys in k_schema_metadata_keys are +// removed; all other keys (type, properties, required, enum, items, …) survive. +static json scrub_schema_metadata(json schema) { + if (!schema.is_object()) return schema; + for (const auto & key : k_schema_metadata_keys) { + schema.erase(key); + } + // Recurse into each property's sub-schema. + if (schema.contains("properties") && schema["properties"].is_object()) { + for (auto & [prop_name, prop_schema] : schema["properties"].items()) { + prop_schema = scrub_schema_metadata(prop_schema); + } + } + // Recurse into array item schema. + if (schema.contains("items") && schema["items"].is_object()) { + schema["items"] = scrub_schema_metadata(schema["items"]); + } + // Recurse into JSON-Schema combinators. Claude tool defs frequently use + // these for polymorphic parameter types; without recursion the inner + // sub-schemas keep their $schema/additionalProperties noise. + for (const char * combinator : {"oneOf", "anyOf", "allOf"}) { + if (schema.contains(combinator) && schema[combinator].is_array()) { + for (auto & sub : schema[combinator]) { + sub = scrub_schema_metadata(sub); + } + } + } + if (schema.contains("not") && schema["not"].is_object()) { + schema["not"] = scrub_schema_metadata(schema["not"]); + } + return schema; +} + +// Maximum bytes kept from any tool or parameter description before truncation. +static constexpr size_t kMaxToolDescriptionChars = 500; + +// Truncate a description string to kMaxToolDescriptionChars bytes. +// Priority: paragraph break (\n\n) before the cap, then last ". " before the +// cap, then hard cut (snapping back to avoid splitting a UTF-8 multibyte sequence). +// Appends U+2026 (…, 3 UTF-8 bytes) at the cut point. +static std::string truncate_description(const std::string & s) { + if (s.size() <= kMaxToolDescriptionChars) return s; + + // 1. First \n\n before cap. + size_t nn = s.find("\n\n"); + if (nn != std::string::npos && nn < kMaxToolDescriptionChars) { + return s.substr(0, nn) + "\xE2\x80\xA6"; + } + + // 2. Last ". " at or before cap. + std::string_view sv(s.data(), kMaxToolDescriptionChars); + size_t dot = sv.rfind(". "); + if (dot != std::string_view::npos) { + // Include the period; cut before the trailing space. + return s.substr(0, dot + 1) + "\xE2\x80\xA6"; + } + + // 3. Hard cut, snap back to UTF-8 boundary. + size_t cut = kMaxToolDescriptionChars; + // While cut > 0 and the byte at `cut` is a UTF-8 continuation byte + // (0x80–0xBF), move back one byte. + while (cut > 0 && (static_cast(s[cut]) & 0xC0) == 0x80) { + --cut; + } + return s.substr(0, cut) + "\xE2\x80\xA6"; +} + +// Apply truncate_description to every property's "description" inside a +// parameters/properties object (mutates in place). +static json truncate_parameter_descriptions(json params) { + if (!params.is_object()) return params; + if (!params.contains("properties") || !params["properties"].is_object()) { + return params; + } + for (auto & [prop_name, prop_schema] : params["properties"].items()) { + if (prop_schema.is_object() && prop_schema.contains("description") && + prop_schema["description"].is_string()) { + prop_schema["description"] = + truncate_description(prop_schema["description"].get()); + } + } + return params; +} + +// Normalize tools array to OpenAI/Qwen3 shape: {"type":"function","function":{...}}. +// Anthropic shape uses "input_schema"; bare Qwen shape has "parameters" at top level. +// Also scrubs JSON-Schema metadata keys that the Unsloth Jinja template would render +// as garbage XML tags (causing the model to hallucinate function names like ). +// Truncates function and parameter descriptions to kMaxToolDescriptionChars to prevent +// prescriptive recipes embedded in long descriptions from leaking into the prompt. +json normalize_tools_for_qwen(const json & tools) { + if (!tools.is_array()) return tools; + json out = json::array(); + for (const auto & elem : tools) { + if (!elem.is_object()) { out.push_back(elem); continue; } + // Already OpenAI shape: scrub metadata, truncate descriptions, pass through. + if (elem.contains("type") && elem["type"] == "function" && elem.contains("function")) { + json e = elem; + if (e["function"].contains("description") && e["function"]["description"].is_string()) { + e["function"]["description"] = + truncate_description(e["function"]["description"].get()); + } + if (e["function"].contains("parameters")) { + e["function"]["parameters"] = truncate_parameter_descriptions( + scrub_schema_metadata(e["function"]["parameters"])); + } + out.push_back(std::move(e)); + continue; + } + // Anthropic shape: input_schema → parameters (scrubbed + truncated). + if (elem.contains("input_schema")) { + out.push_back({ + {"type", "function"}, + {"function", { + {"name", elem.value("name", "")}, + {"description", truncate_description(elem.value("description", ""))}, + {"parameters", truncate_parameter_descriptions( + scrub_schema_metadata(elem["input_schema"]))} + }} + }); + continue; + } + // Bare Qwen shape: top-level name + parameters (scrubbed + truncated), no wrapper. + if (elem.contains("name") && elem.contains("parameters")) { + out.push_back({ + {"type", "function"}, + {"function", { + {"name", elem.value("name", "")}, + {"description", truncate_description(elem.value("description", ""))}, + {"parameters", truncate_parameter_descriptions( + scrub_schema_metadata(elem["parameters"]))} + }} + }); + continue; + } + // Unknown shape: pass through unchanged. + out.push_back(elem); + } + return out; +} + std::vector normalize_chat_messages( const json & messages, ApiFormat format, @@ -777,9 +926,9 @@ bool HttpServer::route_request(int fd, const HttpRequest & hr) { req.sampler.rep_window = body["rep_window"].get(); } - // Tools. + // Tools — normalize Anthropic/bare-Qwen shape to OpenAI envelope. if (body.contains("tools")) { - req.tools = body["tools"]; + req.tools = normalize_tools_for_qwen(body["tools"]); } // Tool choice constraint for hint generation. if (body.contains("tool_choice")) { @@ -1000,7 +1149,8 @@ bool HttpServer::route_request(int fd, const HttpRequest & hr) { eos_str, /*add_generation_prompt=*/true, enable_thinking, - tools_json); + tools_json, + chat_format_); } catch (const std::exception & e) { send_error(fd, 500, std::string("chat template (jinja) render failed: ") + e.what()); diff --git a/server/src/server/sse_emitter.cpp b/server/src/server/sse_emitter.cpp index 604f11a73..b029ee1be 100644 --- a/server/src/server/sse_emitter.cpp +++ b/server/src/server/sse_emitter.cpp @@ -16,6 +16,15 @@ static const char THINK_CLOSE[] = ""; static const char TOOL_OPEN[] = ""; static const char FUNCTION_OPEN[] = "CMD format. +static const char BASH_OPEN[] = ""; +static const char READ_OPEN[] = ""; +static const char WRITE_OPEN[] = ""; +static const char EDIT_OPEN[] = ""; +static const char LS_OPEN[] = ""; +static const char GREP_OPEN[] = ""; +static const char GLOB_OPEN[] = ""; static constexpr size_t THINK_OPEN_LEN = 7; static constexpr size_t THINK_CLOSE_LEN = 8; @@ -28,7 +37,14 @@ static bool find_tool_start(const std::string & text, size_t & pos) { while (idx != std::string::npos) { if (text.compare(idx, sizeof(TOOL_OPEN) - 1, TOOL_OPEN) == 0 || text.compare(idx, sizeof(FUNCTION_OPEN) - 1, FUNCTION_OPEN) == 0 || - text.compare(idx, sizeof(TOOL_CODE_OPEN) - 1, TOOL_CODE_OPEN) == 0) { + text.compare(idx, sizeof(TOOL_CODE_OPEN) - 1, TOOL_CODE_OPEN) == 0 || + text.compare(idx, sizeof(BASH_OPEN) - 1, BASH_OPEN) == 0 || + text.compare(idx, sizeof(READ_OPEN) - 1, READ_OPEN) == 0 || + text.compare(idx, sizeof(WRITE_OPEN) - 1, WRITE_OPEN) == 0 || + text.compare(idx, sizeof(EDIT_OPEN) - 1, EDIT_OPEN) == 0 || + text.compare(idx, sizeof(LS_OPEN) - 1, LS_OPEN) == 0 || + text.compare(idx, sizeof(GREP_OPEN) - 1, GREP_OPEN) == 0 || + text.compare(idx, sizeof(GLOB_OPEN) - 1, GLOB_OPEN) == 0) { pos = idx; return true; } diff --git a/server/src/server/tool_parser.cpp b/server/src/server/tool_parser.cpp index 6244b250a..96a503f19 100644 --- a/server/src/server/tool_parser.cpp +++ b/server/src/server/tool_parser.cpp @@ -1,11 +1,12 @@ // Tool call parser implementation. // -// Five detection patterns, tried in order: +// Six detection patterns, tried in order: // 1. ...V... // 2. ...params... (bare, outside tool_call) // 3. (function-signature style) // 4. {JSON} // 5. Bare JSON objects with name+arguments fields +// 6. Native claude-code XML tags: CMD, PATH, etc. #include "tool_parser.h" @@ -161,6 +162,75 @@ static const std::regex & re_tool_code() { return r; } +// Pattern 6: native claude-code XML tags. +// Matches BODY, BODY, etc. +static const std::regex & re_native_tag() { + static std::regex r(R"(<(bash|read|write|edit|ls|grep|glob)>([\s\S]*?))", + std::regex::icase); + return r; +} + +// ─── Parameter-name alias resolution ──────────────────────────────────── + +// Some quantized models (e.g. Qwen3.6-Q3) emit short forms of canonical +// parameter names (cmd instead of command, path instead of file_path). +// Map the emitted key to the schema's actual key when an alias is found. +// Pure helper — returns the original `emitted` if no alias matches. +static std::string resolve_param_alias(const std::string & emitted, const json & props) { + if (!props.is_object() || props.empty()) return emitted; + + std::string lower = emitted; + std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower); + + // 1. Direct case-insensitive match against schema keys. + for (auto it = props.begin(); it != props.end(); ++it) { + std::string name = it.key(); + std::string lname = name; + std::transform(lname.begin(), lname.end(), lname.begin(), ::tolower); + if (lname == lower) return name; + } + + // 2. Alias map: common shortenings <=> canonical forms. + static const std::vector>> aliases = { + {"cmd", {"command"}}, + {"command", {"cmd"}}, + {"path", {"file_path", "directory", "dir"}}, + {"file_path", {"path", "file"}}, + {"file", {"file_path", "path"}}, + {"filepath", {"file_path"}}, + {"dir", {"directory", "path"}}, + {"directory", {"dir", "path"}}, + {"query", {"pattern", "q"}}, + {"pattern", {"query", "regex"}}, + {"regex", {"pattern"}}, + {"q", {"query", "pattern"}}, + {"expr", {"expression"}}, + {"expression", {"expr"}}, + {"text", {"content"}}, + {"content", {"text"}}, + {"src", {"source"}}, + {"source", {"src"}}, + {"dst", {"destination", "target"}}, + {"destination", {"dst", "target"}}, + {"target", {"dst", "destination"}}, + }; + + for (const auto & [key, candidates] : aliases) { + if (key != lower) continue; + for (const std::string & candidate : candidates) { + for (auto pit = props.begin(); pit != props.end(); ++pit) { + std::string pname = pit.key(); + std::string lpname = pname; + std::transform(lpname.begin(), lpname.end(), lpname.begin(), ::tolower); + if (lpname == candidate) return pname; + } + } + break; + } + + return emitted; // no alias matched; keep as-is +} + // ─── XML parameter parser ─────────────────────────────────────────────── static json parse_xml_params(const std::string & region, const std::string & fn_name, @@ -183,7 +253,8 @@ static json parse_xml_params(const std::string & region, const std::string & fn_ if (!v.empty() && v.front() == '\n') v.erase(v.begin()); if (!v.empty() && v.back() == '\n') v.pop_back(); - args[k] = convert_param_value(v, k, props); + std::string canonical_k = resolve_param_alias(k, props); + args[canonical_k] = convert_param_value(v, canonical_k, props); } return args; } @@ -306,6 +377,48 @@ static bool parse_function_sig_args(const std::string & arg_text, json & out_arg return true; } +// ─── Native tag helpers ───────────────────────────────────────────────── + +// Case-insensitive lookup of `tag` in the tools array. +// Returns the tool's canonical name if found, otherwise returns `tag` as-is. +static std::string lookup_tool_name(const std::string & tag, const json & tools) { + if (tools.is_null() || !tools.is_array() || tools.empty()) return tag; + + std::string lower_tag = tag; + std::transform(lower_tag.begin(), lower_tag.end(), lower_tag.begin(), ::tolower); + + for (const auto & t : tools) { + const auto & fn = t.contains("function") ? t["function"] : t; + if (!fn.is_object()) continue; + std::string name = fn.value("name", ""); + if (name.empty()) continue; + std::string lower_name = name; + std::transform(lower_name.begin(), lower_name.end(), lower_name.begin(), ::tolower); + if (lower_name == lower_tag) return name; + } + return tag; // no match → lowercase tag name +} + +// Map native tag name to its default argument key + body. +static json tag_to_args(const std::string & tag, const std::string & body) { + json args = json::object(); + std::string lower = tag; + std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower); + + if (lower == "bash") args["command"] = body; + else if (lower == "read") args["file_path"] = body; + else if (lower == "grep") args["pattern"] = body; + else if (lower == "glob") args["pattern"] = body; + else if (lower == "write") args["content"] = body; + else if (lower == "edit") args["content"] = body; + else if (lower == "ls") { + if (!body.empty()) args["path"] = body; + } else { + args["content"] = body; // unknown tag fallback + } + return args; +} + // ─── Main parser ──────────────────────────────────────────────────────── ToolParseResult parse_tool_calls(const std::string & text, const json & tools) { @@ -447,6 +560,29 @@ ToolParseResult parse_tool_calls(const std::string & text, const json & tools) { } } + // Pattern 6: native claude-code XML tags (, , , , , , ) + // Gate: only fire when the request actually provided tools. Otherwise + // legitimate prose like "please read the manual" or "grep for the pattern" + // gets eaten as a phantom tool call and the surrounding text is stripped + // via the removals span. Mirrors the streaming gate has_request_tools(). + if (tools.is_array() && !tools.empty()) + { + auto begin = std::sregex_iterator(text.begin(), text.end(), re_native_tag()); + auto end = std::sregex_iterator(); + for (auto it = begin; it != end; ++it) { + size_t pos = it->position(); + if (overlaps(removals, pos)) continue; + std::string tag = (*it)[1].str(); + std::string body = (*it)[2].str(); + // Strip leading/trailing newline (consistent with parameter parser at line 172). + if (!body.empty() && body.front() == '\n') body.erase(body.begin()); + if (!body.empty() && body.back() == '\n') body.pop_back(); + + std::string canonical = lookup_tool_name(tag, tools); + add_call(canonical, tag_to_args(tag, body), pos, pos + it->length()); + } + } + // Build cleaned text by removing all matched spans if (removals.empty()) { result.cleaned_text = text; diff --git a/server/test/test_server_unit.cpp b/server/test/test_server_unit.cpp index 1415aab30..6c0d6e578 100644 --- a/server/test/test_server_unit.cpp +++ b/server/test/test_server_unit.cpp @@ -45,6 +45,8 @@ std::vector normalize_chat_messages( const json & messages, ApiFormat format, ToolMemory & tool_memory); + +json normalize_tools_for_qwen(const json & tools); } // ─── Test framework (ds4 style) ──────────────────────────────────────── @@ -2446,6 +2448,637 @@ static void test_generate_result_accept_rate_zero_when_no_spec_decode() { r.ok = true; // accept_rate not set → must be 0.0f TEST_ASSERT(r.accept_rate == 0.0f); +// normalize_tools_for_qwen tests +// ═══════════════════════════════════════════════════════════════════════ + +static void test_normalize_tools_anthropic_bare() { + // Anthropic shape: input_schema → parameters, wrapped in type/function envelope. + json input = json::array({{ + {"name", "get_weather"}, + {"description", "Get the weather for a city"}, + {"input_schema", { + {"type", "object"}, + {"properties", {{"city", {{"type", "string"}}}}}, + {"required", json::array({"city"})} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(input); + TEST_ASSERT(out.size() == 1); + TEST_ASSERT(out[0].contains("type")); + TEST_ASSERT(out[0]["type"] == "function"); + TEST_ASSERT(out[0].contains("function")); + TEST_ASSERT(out[0]["function"]["name"] == "get_weather"); + TEST_ASSERT(out[0]["function"]["description"] == "Get the weather for a city"); + TEST_ASSERT(out[0]["function"].contains("parameters")); + TEST_ASSERT(out[0]["function"]["parameters"]["type"] == "object"); + TEST_ASSERT(out[0]["function"]["parameters"]["properties"].contains("city")); + TEST_ASSERT(!out[0].contains("input_schema")); +} + +static void test_normalize_tools_openai_passthrough() { + // OpenAI shape already: type/function envelope → pass through unchanged. + json input = json::array({{ + {"type", "function"}, + {"function", { + {"name", "search"}, + {"description", "Search the web"}, + {"parameters", {{"type", "object"}, {"properties", json::object()}}} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(input); + TEST_ASSERT(out.size() == 1); + TEST_ASSERT(out[0]["type"] == "function"); + TEST_ASSERT(out[0]["function"]["name"] == "search"); + TEST_ASSERT(out[0]["function"]["description"] == "Search the web"); +} + +static void test_normalize_tools_bare_qwen_passthrough() { + // Bare Qwen shape: name + parameters at top level, no wrapper → wrap to type/function. + json input = json::array({{ + {"name", "get_weather"}, + {"description", "Get weather"}, + {"parameters", { + {"type", "object"}, + {"properties", {{"city", {{"type", "string"}}}}} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(input); + TEST_ASSERT(out.size() == 1); + TEST_ASSERT(out[0]["type"] == "function"); + TEST_ASSERT(out[0]["function"]["name"] == "get_weather"); + TEST_ASSERT(out[0]["function"]["description"] == "Get weather"); + TEST_ASSERT(out[0]["function"]["parameters"]["type"] == "object"); +} + +static void test_normalize_tools_mixed() { + // Mixed array: Anthropic + OpenAI shapes both normalize to OpenAI shape. + json input = json::array({ + { + {"name", "tool_a"}, + {"description", "Anthropic-shaped tool"}, + {"input_schema", {{"type", "object"}, {"properties", json::object()}}} + }, + { + {"type", "function"}, + {"function", { + {"name", "tool_b"}, + {"description", "Already OpenAI-shaped"} + }} + } + }); + json out = dflash::common::normalize_tools_for_qwen(input); + TEST_ASSERT(out.size() == 2); + // First: Anthropic → normalized + TEST_ASSERT(out[0]["type"] == "function"); + TEST_ASSERT(out[0]["function"]["name"] == "tool_a"); + TEST_ASSERT(out[0]["function"].contains("parameters")); + // Second: OpenAI passthrough + TEST_ASSERT(out[1]["type"] == "function"); + TEST_ASSERT(out[1]["function"]["name"] == "tool_b"); +} + +static void test_normalize_tools_empty() { + // Empty array stays empty. + json out = dflash::common::normalize_tools_for_qwen(json::array()); + TEST_ASSERT(out.is_array()); + TEST_ASSERT(out.empty()); + + // Non-array (defensive) stays unchanged. + json non_array = json::object(); + json out2 = dflash::common::normalize_tools_for_qwen(non_array); + TEST_ASSERT(out2.is_object()); +} + +static void test_normalize_tools_strips_schema_metadata() { + // $schema and additionalProperties must be removed; required must be kept. + json input = json::array({{ + {"name", "my_tool"}, + {"description", "A tool"}, + {"input_schema", { + {"$schema", "http://json-schema.org/draft-07/schema#"}, + {"type", "object"}, + {"additionalProperties", false}, + {"properties", {{"city", {{"type", "string"}}}}}, + {"required", json::array({"city"})} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(input); + TEST_ASSERT(out.size() == 1); + const auto & params = out[0]["function"]["parameters"]; + TEST_ASSERT(!params.contains("$schema")); + TEST_ASSERT(!params.contains("additionalProperties")); + TEST_ASSERT(params.contains("required")); + TEST_ASSERT(params["required"][0] == "city"); + TEST_ASSERT(params["type"] == "object"); +} + +static void test_normalize_tools_strips_metadata_recursively() { + // $schema inside a nested property schema must also be stripped. + json input = json::array({{ + {"name", "deep_tool"}, + {"description", "Nested"}, + {"input_schema", { + {"type", "object"}, + {"additionalProperties", false}, + {"$defs", {{"MyDef", {{"type", "string"}}}}}, + {"properties", { + {"foo", { + {"type", "object"}, + {"$schema", "nested-schema-url"}, + {"additionalProperties", false}, + {"properties", {{"bar", {{"type", "string"}}}}} + }} + }} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(input); + TEST_ASSERT(out.size() == 1); + const auto & params = out[0]["function"]["parameters"]; + // Top-level metadata scrubbed + TEST_ASSERT(!params.contains("$defs")); + TEST_ASSERT(!params.contains("additionalProperties")); + // Nested property metadata scrubbed + const auto & foo = params["properties"]["foo"]; + TEST_ASSERT(!foo.contains("$schema")); + TEST_ASSERT(!foo.contains("additionalProperties")); + // Nested real fields preserved + TEST_ASSERT(foo["type"] == "object"); + TEST_ASSERT(foo["properties"].contains("bar")); +} + +static void test_normalize_tools_preserves_real_fields() { + // type, properties, required, enum, items.type must all survive scrubbing. + json input = json::array({{ + {"name", "full_tool"}, + {"description", "Full schema"}, + {"input_schema", { + {"$schema", "http://json-schema.org/draft-07/schema#"}, + {"type", "object"}, + {"additionalProperties", false}, + {"required", json::array({"city", "units"})}, + {"properties", { + {"city", {{"type", "string"}, {"description", "City name"}}}, + {"units", {{"type", "string"}, {"enum", json::array({"celsius", "fahrenheit"})}}}, + {"tags", {{"type", "array"}, {"items", {{"type", "string"}}}}} + }} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(input); + TEST_ASSERT(out.size() == 1); + const auto & params = out[0]["function"]["parameters"]; + TEST_ASSERT(params["type"] == "object"); + TEST_ASSERT(params["required"].size() == 2); + TEST_ASSERT(params["properties"].contains("city")); + TEST_ASSERT(params["properties"]["units"]["enum"].size() == 2); + TEST_ASSERT(params["properties"]["tags"]["items"]["type"] == "string"); + TEST_ASSERT(!params.contains("$schema")); + TEST_ASSERT(!params.contains("additionalProperties")); +} + +// ═══════════════════════════════════════════════════════════════════════ +// Tool description truncation tests +// ═══════════════════════════════════════════════════════════════════════ + +// truncate_tool_description is exposed via normalize_tools_for_qwen: we +// exercise it through the public normalize_tools_for_qwen() interface so the +// tests stay independent of any helper signature changes. + +static json make_tool_with_desc(const std::string & desc) { + return json::array({{ + {"name", "my_tool"}, + {"description", desc}, + {"input_schema", { + {"type", "object"}, + {"properties", json::object()} + }} + }}); +} + +static json make_tool_with_param_desc(const std::string & param_desc) { + return json::array({{ + {"name", "my_tool"}, + {"description", "short top"}, + {"input_schema", { + {"type", "object"}, + {"properties", { + {"p1", {{"type", "string"}, {"description", param_desc}}} + }} + }} + }}); +} + +static void test_truncate_short_description_unchanged() { + // 100-char description must come through untouched. + std::string desc(100, 'A'); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); + TEST_ASSERT(out.size() == 1); + TEST_ASSERT(out[0]["function"]["description"].get() == desc); +} + +static void test_truncate_at_paragraph_break() { + // Description has \n\n at position 200, total length 600. + // Expect cut at the paragraph break (pos 200) + "…". + std::string first(200, 'A'); + std::string rest(400, 'B'); + std::string desc = first + "\n\n" + rest; + TEST_ASSERT(desc.size() > 500); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); + TEST_ASSERT(out.size() == 1); + std::string result = out[0]["function"]["description"].get(); + // Must END with the ellipsis bytes (E2 80 A6) and not contain any 'B'. + TEST_ASSERT(result.size() >= 3); + TEST_ASSERT(result.substr(result.size() - 3) == "\xE2\x80\xA6"); + TEST_ASSERT(result.find('B') == std::string::npos); + TEST_ASSERT(result.find("…") != std::string::npos); +} + +static void test_truncate_at_sentence_boundary() { + // Description with ". " at position 400, no \n\n before 500. + // Expect cut at end of sentence (pos 402: period + space consumed) + "…". + std::string first(400, 'C'); + std::string desc = first + ". " + std::string(300, 'D'); + TEST_ASSERT(desc.size() > 500); + // No \n\n in first 500 chars + TEST_ASSERT(desc.substr(0, 500).find("\n\n") == std::string::npos); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); + TEST_ASSERT(out.size() == 1); + std::string result = out[0]["function"]["description"].get(); + TEST_ASSERT(result.find("…") != std::string::npos); + TEST_ASSERT(result.find('D') == std::string::npos); + // The ". " boundary itself: result should contain the period. + TEST_ASSERT(result.find('.') != std::string::npos); +} + +static void test_truncate_hard_cut() { + // 1000-char description with no \n\n and no ". " before char 500. + std::string desc(1000, 'X'); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); + TEST_ASSERT(out.size() == 1); + std::string result = out[0]["function"]["description"].get(); + TEST_ASSERT(result.find("…") != std::string::npos); + // After stripping the 3-byte UTF-8 "…", the ASCII portion is 500 chars. + // Result total = 500 + 3 = 503 bytes. + TEST_ASSERT(result.size() == 503); +} + +static void test_truncate_applies_to_parameter_descriptions() { + // Parameter description of 3000 chars must be truncated. + std::string long_param_desc(3000, 'P'); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_param_desc(long_param_desc)); + TEST_ASSERT(out.size() == 1); + const auto & props = out[0]["function"]["parameters"]["properties"]; + TEST_ASSERT(props.contains("p1")); + std::string pdesc = props["p1"]["description"].get(); + TEST_ASSERT(pdesc.find("…") != std::string::npos); + // Must be shorter than the 3000-char input. + TEST_ASSERT(pdesc.size() < 600); +} + +static void test_truncate_preserves_unicode() { + // Description: 499 ASCII chars followed by a 3-byte UTF-8 character (ん = E3 82 93), + // followed by more text. Hard cut at 500 would land mid-codepoint; we expect + // the cut to snap back to the safe boundary (499) and append "…". + std::string ascii499(499, 'Z'); + std::string multibyte = "\xE3\x82\x93"; // ん + std::string desc = ascii499 + multibyte + std::string(100, 'W'); + TEST_ASSERT(desc.size() > 500); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); + std::string result = out[0]["function"]["description"].get(); + // Must end with ellipsis (3-byte E2 80 A6). + TEST_ASSERT(result.size() >= 3); + TEST_ASSERT(result.substr(result.size() - 3) == "\xE2\x80\xA6"); + TEST_ASSERT(result.find('W') == std::string::npos); + // Byte directly before the ellipsis MUST NOT be a UTF-8 continuation byte + // (10xxxxxx => 0x80..0xBF). If it were, we'd have bisected a multibyte + // codepoint. Expected: last 'Z' (0x5A) or a valid lead/single byte. + TEST_ASSERT(result.size() >= 4); + unsigned char last_before = static_cast(result[result.size() - 4]); + TEST_ASSERT((last_before & 0xC0) != 0x80); + // The straddling multibyte sequence must NOT appear in the result. + TEST_ASSERT(result.find(multibyte) == std::string::npos); +} + +static void test_truncate_preserves_unicode_2byte() { + // 499 ASCII + a 2-byte codepoint (é = 0xC3 0xA9) straddling the cut. + std::string ascii499(499, 'Z'); + std::string two_byte = "\xC3\xA9"; + std::string desc = ascii499 + two_byte + std::string(100, 'W'); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); + std::string result = out[0]["function"]["description"].get(); + TEST_ASSERT(result.size() >= 4); + TEST_ASSERT(result.substr(result.size() - 3) == "\xE2\x80\xA6"); + unsigned char last_before = static_cast(result[result.size() - 4]); + TEST_ASSERT((last_before & 0xC0) != 0x80); + TEST_ASSERT(result.find(two_byte) == std::string::npos); +} + +static void test_truncate_preserves_unicode_4byte() { + // 498 ASCII + a 4-byte codepoint (𝄞 = F0 9D 84 9E) straddling the cut. + std::string ascii498(498, 'Z'); + std::string four_byte = "\xF0\x9D\x84\x9E"; + std::string desc = ascii498 + four_byte + std::string(100, 'W'); + json out = dflash::common::normalize_tools_for_qwen(make_tool_with_desc(desc)); + std::string result = out[0]["function"]["description"].get(); + TEST_ASSERT(result.size() >= 4); + TEST_ASSERT(result.substr(result.size() - 3) == "\xE2\x80\xA6"); + unsigned char last_before = static_cast(result[result.size() - 4]); + TEST_ASSERT((last_before & 0xC0) != 0x80); + TEST_ASSERT(result.find(four_byte) == std::string::npos); +} + +// ═══════════════════════════════════════════════════════════════════════ +// Native claude-code XML tag tests (, , etc.) +// ═══════════════════════════════════════════════════════════════════════ + +// Helper: build a tools array with one entry named `name`. +static json make_tools(const std::string & name) { + return json::array({{ + {"type", "function"}, + {"function", { + {"name", name}, + {"description", "tool"}, + {"parameters", {{"type", "object"}, {"properties", json::object()}}} + }} + }}); +} + +static void test_parse_tool_call_bash_simple() { + // Basic CMD → ToolCall with name matching tools casing and {"command": CMD}. + json tools = make_tools("Bash"); + std::string text = "I'll run cat /etc/hostname"; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "Bash"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.contains("command")); + TEST_ASSERT(args["command"] == "cat /etc/hostname"); + } +} + +static void test_parse_tool_call_bash_multiline() { + // Multiline body inside ... — leading/trailing newlines stripped. + // Pattern 6 (native tags) requires tools to be present in the request. + json tools = make_tools("Bash"); + std::string text = "\nls -la\necho ok\n"; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.contains("command")); + // Consistent with tool_parser.cpp:172 — leading/trailing newline stripped. + std::string cmd = args["command"].get(); + TEST_ASSERT(cmd.find("ls -la") != std::string::npos); + TEST_ASSERT(cmd.find("echo ok") != std::string::npos); + } +} + +static void test_parse_tool_call_ls_with_path() { + // /tmp → {"path": "/tmp"}. + // Pattern 6 (native tags) requires tools to be present in the request. + json tools = make_tools("LS"); + std::string text = "/tmp"; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.contains("path")); + TEST_ASSERT(args["path"] == "/tmp"); + } +} + +static void test_parse_tool_call_bash_name_lookup() { + // Case-insensitive lookup: request tools has "Bash", model emits . + json tools = make_tools("Bash"); + std::string text = "pwd"; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "Bash"); + } +} + +static void test_parse_tool_call_bash_no_match() { + // Pattern 6 fires only when tools array is non-empty. With a tools list + // that doesn't contain "bash" but is otherwise non-empty, the tag still + // matches and falls back to lowercase canonical name (per lookup_tool_name). + // tool_allowed() then rejects it because "bash" isn't in the list. + json tools = make_tools("Edit"); + std::string text = "pwd"; + auto result = parse_tool_calls(text, tools); + // Either 0 (rejected by tool_allowed) or 1 with name="bash" (lowercase fallback). + // Both are acceptable contracts; document the actual current behavior. + if (result.tool_calls.size() == 1) { + TEST_ASSERT(result.tool_calls[0].name == "bash"); + } else { + TEST_ASSERT(result.tool_calls.empty()); + } +} + +static void test_parse_tool_call_no_tools_no_fabrication() { + // P1 gate (P1-2 from momus review): when no tools are provided in the + // request, Pattern 6 must NOT fabricate a tool call from prose like + // "please read the manual" or "grep for the pattern". + std::string text = "pwd"; // explicitly looks like a tool call + auto result = parse_tool_calls(text); // ← NO tools arg + TEST_ASSERT(result.tool_calls.empty()); + // Prose is preserved (NOT swallowed by removals span). + TEST_ASSERT(result.cleaned_text.find("pwd") != std::string::npos); +} + +static void test_parse_tool_call_no_tools_no_fabrication_prose() { + // Same gate, exercised on natural prose containing tag-shaped substrings. + std::string text = "Please read the documentation and grep for examples."; + auto result = parse_tool_calls(text); // no tools + TEST_ASSERT(result.tool_calls.empty()); +} + +// ═══════════════════════════════════════════════════════════════════════ +// resolve_param_alias tests (P2-3 from momus review) — exercised via the +// public parse_tool_calls() API since resolve_param_alias is static. +// ═══════════════════════════════════════════════════════════════════════ + +static void test_param_alias_cmd_to_command() { + // Model emits but schema requires "command". + // The alias resolver maps cmd → command (the canonical name in tools). + json tools = make_tools("Bash"); // Bash has parameter "command" + std::string text = + "ls /tmp"; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.contains("command")); + TEST_ASSERT(!args.contains("cmd")); + TEST_ASSERT(args["command"] == "ls /tmp"); + } +} + +static void test_param_alias_path_to_file_path() { + // Model emits but tool schema requires "file_path". + json tools = json::array({{ + {"type", "function"}, + {"function", { + {"name", "Read"}, + {"parameters", { + {"type", "object"}, + {"properties", { + {"file_path", {{"type", "string"}}} + }} + }} + }} + }}); + std::string text = + "/etc/hosts"; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.contains("file_path")); + TEST_ASSERT(args["file_path"] == "/etc/hosts"); + } +} + +static void test_param_alias_case_insensitive_direct() { + // Model emits (capitalised), schema has "command". + // Step 1 of resolver is a case-insensitive direct match → "command". + json tools = make_tools("Bash"); + std::string text = + "pwd"; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.contains("command")); + } +} + +static void test_param_alias_no_match_passthrough() { + // Model emits an arg with a name not in the alias table and not in schema. + // Should pass through unchanged. + json tools = make_tools("Bash"); + std::string text = + "x"; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.contains("zzzunknown")); + } +} + +// ═══════════════════════════════════════════════════════════════════════ +// scrub_schema_metadata combinator recursion (P2-1 from momus review). +// ═══════════════════════════════════════════════════════════════════════ + +static void test_scrub_recurses_into_oneOf() { + json tool = json::array({{ + {"name", "X"}, + {"description", "d"}, + {"input_schema", { + {"type", "object"}, + {"properties", { + {"v", { + {"oneOf", json::array({ + {{"type", "string"}, {"$schema", "noise"}, {"additionalProperties", false}}, + {{"type", "integer"}, {"$defs", json::object()}} + })} + }} + }} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(tool); + TEST_ASSERT(out.size() == 1); + const auto & v = out[0]["function"]["parameters"]["properties"]["v"]; + TEST_ASSERT(v.contains("oneOf")); + const auto & one_of = v["oneOf"]; + TEST_ASSERT(one_of.is_array() && one_of.size() == 2); + TEST_ASSERT(!one_of[0].contains("$schema")); + TEST_ASSERT(!one_of[0].contains("additionalProperties")); + TEST_ASSERT(!one_of[1].contains("$defs")); + // type still present. + TEST_ASSERT(one_of[0]["type"] == "string"); + TEST_ASSERT(one_of[1]["type"] == "integer"); +} + +static void test_scrub_recurses_into_anyOf_allOf_not() { + json tool = json::array({{ + {"name", "X"}, + {"description", "d"}, + {"input_schema", { + {"type", "object"}, + {"anyOf", json::array({ + {{"type", "string"}, {"$schema", "noise"}} + })}, + {"allOf", json::array({ + {{"type", "integer"}, {"additionalProperties", false}} + })}, + {"not", {{"type", "null"}, {"$defs", json::object()}}} + }} + }}); + json out = dflash::common::normalize_tools_for_qwen(tool); + const auto & params = out[0]["function"]["parameters"]; + TEST_ASSERT(!params["anyOf"][0].contains("$schema")); + TEST_ASSERT(!params["allOf"][0].contains("additionalProperties")); + TEST_ASSERT(!params["not"].contains("$defs")); + TEST_ASSERT(params["not"]["type"] == "null"); +} + +static void test_parse_tool_call_bash_text_around() { + // Text before and after the tag — tag extracted as tool call, surrounding text preserved. + json tools = make_tools("Bash"); + std::string text = "Sure, I'll do that.\npwd\nLet me know the result."; + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "Bash"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["command"] == "pwd"); + } + // Surrounding text must not be swallowed. + TEST_ASSERT(result.cleaned_text.find("Sure") != std::string::npos || + result.cleaned_text.find("Let me know") != std::string::npos); +} + +static void test_parse_tool_call_existing_tool_call_still_works() { + // Regression: existing format still parses correctly. + std::string text = + "\n" + "\n" + "/foo/bar.txt\n" + "hello\n" + "\n" + ""; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "Edit"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["path"] == "/foo/bar.txt"); + TEST_ASSERT(args["content"] == "hello"); + } +} + +static void test_emitter_native_bash_tag_detected() { + // When the model emits cmd, the SSE emitter should route + // it to the tool buffer and parse it as a Bash tool call. + json tools = make_tools("Bash"); + SseEmitter em(ApiFormat::ANTHROPIC, "req_bash_001", "test-model", 10, + tools, nullptr, false); + em.emit_start(); + em.emit_token("I'll run: ls /tmp"); + auto finish = em.emit_finish(10); + std::string s = concat(finish); + + TEST_ASSERT(!em.tool_calls().empty()); + if (!em.tool_calls().empty()) { + TEST_ASSERT(em.tool_calls()[0].name == "Bash"); + auto args = json::parse(em.tool_calls()[0].arguments); + TEST_ASSERT(args["command"] == "ls /tmp"); + } + TEST_ASSERT(s.find("\"type\":\"tool_use\"") != std::string::npos); + TEST_ASSERT(s.find("\"name\":\"Bash\"") != std::string::npos); + TEST_ASSERT(s.find("\"stop_reason\":\"tool_use\"") != std::string::npos); } int main() { @@ -2608,6 +3241,45 @@ int main() { RUN_TEST(test_generate_result_accept_rate_in_usage_openai); RUN_TEST(test_generate_result_accept_rate_in_usage_anthropic); RUN_TEST(test_generate_result_accept_rate_zero_when_no_spec_decode); + std::fprintf(stderr, "\n── normalize_tools_for_qwen ──\n"); + RUN_TEST(test_normalize_tools_anthropic_bare); + RUN_TEST(test_normalize_tools_openai_passthrough); + RUN_TEST(test_normalize_tools_bare_qwen_passthrough); + RUN_TEST(test_normalize_tools_mixed); + RUN_TEST(test_normalize_tools_empty); + RUN_TEST(test_normalize_tools_strips_schema_metadata); + RUN_TEST(test_normalize_tools_strips_metadata_recursively); + RUN_TEST(test_normalize_tools_preserves_real_fields); + RUN_TEST(test_scrub_recurses_into_oneOf); + RUN_TEST(test_scrub_recurses_into_anyOf_allOf_not); + + std::fprintf(stderr, "\n── Tool description truncation ──\n"); + RUN_TEST(test_truncate_short_description_unchanged); + RUN_TEST(test_truncate_at_paragraph_break); + RUN_TEST(test_truncate_at_sentence_boundary); + RUN_TEST(test_truncate_hard_cut); + RUN_TEST(test_truncate_applies_to_parameter_descriptions); + RUN_TEST(test_truncate_preserves_unicode); + RUN_TEST(test_truncate_preserves_unicode_2byte); + RUN_TEST(test_truncate_preserves_unicode_4byte); + + std::fprintf(stderr, "\n── Native claude-code XML tags ( etc.) ──\n"); + RUN_TEST(test_parse_tool_call_bash_simple); + RUN_TEST(test_parse_tool_call_bash_multiline); + RUN_TEST(test_parse_tool_call_ls_with_path); + RUN_TEST(test_parse_tool_call_bash_name_lookup); + RUN_TEST(test_parse_tool_call_bash_no_match); + RUN_TEST(test_parse_tool_call_no_tools_no_fabrication); + RUN_TEST(test_parse_tool_call_no_tools_no_fabrication_prose); + RUN_TEST(test_parse_tool_call_bash_text_around); + RUN_TEST(test_parse_tool_call_existing_tool_call_still_works); + RUN_TEST(test_emitter_native_bash_tag_detected); + + std::fprintf(stderr, "\n── Param-name alias resolution ──\n"); + RUN_TEST(test_param_alias_cmd_to_command); + RUN_TEST(test_param_alias_path_to_file_path); + RUN_TEST(test_param_alias_case_insensitive_direct); + RUN_TEST(test_param_alias_no_match_passthrough); std::fprintf(stderr, "\n══════════════════════════════════════════\n"); std::fprintf(stderr, " Results: %d assertions, %d failures\n",