From 4472aa9446072a660fa995e6afae79afaf87eb3d Mon Sep 17 00:00:00 2001 From: Erik LaBianca Date: Wed, 3 Jun 2026 17:29:20 -0400 Subject: [PATCH] feat(server): plain-text call:{} tool parsing for Gemma4 ## What Extends server/src/server/tool_parser.{cpp,h} to parse Gemma's plain-text call:{} emissions (also accepts the \`\`_call:\`\` tokenizer-artifact prefix) and render them as Anthropic tool_use + tool_result blocks. Isolated to tool_parser; the streaming detection hook in sse_emitter ships with #341. Adds 364 lines of C++ unit coverage in test_server_unit.cpp plus the call-verb parser plan and Gemma4-26B parser-fix writeup. ## Why Gemma4 emits tool calls as plain-text call:{...} rather than structured JSON, which breaks the existing Anthropic tool_use pipeline on agentic workloads. This parser closes that gap so Gemma4 can drive coding-agent loops end-to-end. ## Dependencies None - this PR is independent. --- server/src/server/tool_parser.cpp | 192 +++++++++++++++- server/src/server/tool_parser.h | 5 +- server/test/test_server_unit.cpp | 364 ++++++++++++++++++++++++++++++ 3 files changed, 556 insertions(+), 5 deletions(-) diff --git a/server/src/server/tool_parser.cpp b/server/src/server/tool_parser.cpp index 6244b250a..76c44f799 100644 --- a/server/src/server/tool_parser.cpp +++ b/server/src/server/tool_parser.cpp @@ -1,11 +1,18 @@ // Tool call parser implementation. // -// Five detection patterns, tried in order: +// Six detection patterns, tried in order: // 1. ...V... // 2. ...params... (bare, outside tool_call) // 3. (function-signature style) // 4. {JSON} -// 5. Bare JSON objects with name+arguments fields +// 5. call:?{relaxed-JSON args} (gemma plain-text emissions) +// 6. Bare JSON objects with name+arguments fields +// +// Pattern 5 runs *before* pattern 6 so that args like +// call:outer{"name": "inner", "arguments": {}} +// don't get hijacked by the bare-JSON sweep into a spurious `inner` tool +// call. The brace-balanced span pattern 5 records in `removals` shadows +// the inner JSON from pattern 6's view via `overlaps()`. #include "tool_parser.h" @@ -161,6 +168,147 @@ static const std::regex & re_tool_code() { return r; } +// Pattern 5: `call:?{` opener. The sentinel alternation in front +// rejects narrative usages like "I'll call:foo{x:1}" where `call:` is glued +// to a preceding word — whitespace, common punctuation, and open/close +// brackets are the realistic boundaries seen in the snapshot data. `\s` +// covers `\n` so a `call:` at the start of any line is matched without +// relying on std::regex multiline support (which is non-portable). +// +// Note that `}` is in the sentinel list — gemma frequently emits multiple +// invocations back-to-back: `call:a{x:1}call:b{y:2}`. Without `}` as a +// sentinel the second match would be missed. +// +// `_` is also in the sentinel list to handle a SentencePiece / chat-template +// artifact: post-bragi-channel-routing (commit 4b757d1) the gemma server +// occasionally emits raw tokens like `_call:get_country_info{...}` where +// the leading `_` is residual tokenizer serialization. Without `_` here +// the parser misses every such invocation — empirically confirmed against +// gemma-4-26b 2026-05-31 smoke test. Tradeoff: `my_call:foo{}` mid- +// identifier could match, but real model output doesn't emit `my_call:` +// strings (tool names come from the request's tool definitions). +static const std::regex & re_call_verb_open() { + static std::regex r(R"((^|[\s,;:\(\[\{\}\)\]\>_])call:([A-Za-z0-9_.:\-]+)\s*\{)"); + return r; +} + +// Find the index one past the `}` that matches `text[open] == '{'`. +// Respects nested {}/[] depth and skips over "..." / '...' / `...` +// string literals (with backslash escapes). Returns std::string::npos if +// no matching close is found. +static size_t balanced_braces_end(const std::string & text, size_t open) { + int depth = 0; + char in_str = 0; // 0, or one of '"', '\'', '`' + for (size_t i = open; i < text.size(); i++) { + char c = text[i]; + if (in_str) { + if (c == '\\' && i + 1 < text.size()) { i++; continue; } + if (c == in_str) in_str = 0; + continue; + } + if (c == '"' || c == '\'' || c == '`') { in_str = c; continue; } + if (c == '{' || c == '[') { + depth++; + } else if (c == '}' || c == ']') { + depth--; + if (depth == 0 && c == '}') return i + 1; + if (depth < 0) return std::string::npos; + } + } + return std::string::npos; +} + +// Try strict json::parse first; on failure rewrite single- and +// backtick-quoted strings to double-quoted, wrap bare identifier keys +// in double quotes, and retry. Returns true and populates `out` on +// success; returns false on irrecoverable failure (and `out` is unset). +// +// The rewrite walks the buffer char-by-char tracking string state so it +// doesn't mangle identifiers that live inside string values. +static bool coerce_relaxed_json(const std::string & payload, json & out) { + { + json parsed = json::parse(payload, nullptr, false); + if (!parsed.is_discarded()) { + out = std::move(parsed); + return true; + } + } + + // Permissive pass. + static const std::regex re_bare_key(R"(([A-Za-z_][A-Za-z0-9_]*)(\s*:))"); + + std::string rewritten; + rewritten.reserve(payload.size() + 16); + char in_str = 0; // 0, or the *opening* quote we saw + for (size_t i = 0; i < payload.size(); ) { + char c = payload[i]; + if (in_str) { + // Inside a string we already opened. Mirror escapes verbatim. + if (c == '\\' && i + 1 < payload.size()) { + rewritten += c; + rewritten += payload[i + 1]; + i += 2; + continue; + } + if (c == in_str) { + // Close — always emit a double-quote regardless of which + // quote style opened the string. The opening side already + // emitted a `"`. + rewritten += '"'; + in_str = 0; + i++; + continue; + } + // Escape inner `"` when we opened the string with a non-`"` + // quote (single or backtick). Without this, content like + // `'he said "hi"'` rewrites to `"he said "hi""` which is + // invalid JSON and silently drops the whole tool call. + // When in_str == '"', a `"` inside should have arrived via + // the `\\` escape branch above; a bare `"` here is malformed + // input we pass through unchanged. + if (in_str != '"' && c == '"') { + rewritten += "\\\""; + i++; + continue; + } + rewritten += c; + i++; + continue; + } + if (c == '"' || c == '\'' || c == '`') { + rewritten += '"'; + in_str = c; + i++; + continue; + } + // Try to match a bare-key identifier here. Don't fire if the + // previous emitted char is `"` — that would indicate we're sitting + // right after a JSON string boundary and the "identifier" is + // probably part of a value continuation (e.g. `"k": foo: 1` would + // be malformed JSON anyway, but better to leave it untouched). + std::smatch m; + std::string tail = payload.substr(i); + if (std::regex_search(tail, m, re_bare_key, + std::regex_constants::match_continuous) && + (rewritten.empty() || rewritten.back() != '"')) { + rewritten += '"'; + rewritten += m[1].str(); + rewritten += '"'; + rewritten += m[2].str(); + i += m.length(); + continue; + } + rewritten += c; + i++; + } + + json parsed = json::parse(rewritten, nullptr, false); + if (parsed.is_discarded()) return false; + out = std::move(parsed); + return true; +} + + // ─── XML parameter parser ─────────────────────────────────────────────── static json parse_xml_params(const std::string & region, const std::string & fn_name, @@ -397,7 +545,45 @@ ToolParseResult parse_tool_calls(const std::string & text, const json & tools) { } } - // Pattern 5: Bare JSON objects + // Pattern 5: call:?{relaxed-JSON args} + // + // Runs before the bare-JSON sweep so that inner JSON of the form + // call:outer{"name": "inner", "arguments": {}} + // doesn't get hijacked into a spurious `inner` ToolCall. + { + auto begin = std::sregex_iterator(text.begin(), text.end(), re_call_verb_open()); + auto end = std::sregex_iterator(); + for (auto it = begin; it != end; ++it) { + // Group 1: sentinel char (may be empty if matched at `^`). + // Group 2: full verb including any embedded namespaces. + size_t prefix_len = (*it)[1].matched ? (*it)[1].length() : 0; + size_t call_start = it->position() + prefix_len; + if (overlaps(removals, call_start)) continue; + + // The matched substring runs from call_start through the `{` + // (consuming the opener and any whitespace between verb and + // brace). Compute the brace index from the match end. + size_t brace_open = it->position() + it->length() - 1; + if (brace_open >= text.size() || text[brace_open] != '{') continue; + + size_t brace_close = balanced_braces_end(text, brace_open); + if (brace_close == std::string::npos) continue; + + std::string raw_args = text.substr(brace_open, brace_close - brace_open); + json args; + if (!coerce_relaxed_json(raw_args, args)) continue; + if (!args.is_object()) continue; + + std::string verb = (*it)[2].str(); + size_t colon = verb.find_last_of(':'); + if (colon != std::string::npos) verb = verb.substr(colon + 1); + if (verb.empty()) continue; + + add_call(verb, args, call_start, brace_close); + } + } + + // Pattern 6: Bare JSON objects { size_t cursor = 0; while (cursor < text.size()) { diff --git a/server/src/server/tool_parser.h b/server/src/server/tool_parser.h index 1ff9890a6..b3bd4a796 100644 --- a/server/src/server/tool_parser.h +++ b/server/src/server/tool_parser.h @@ -1,11 +1,12 @@ // Tool call parser — extracts structured tool calls from generated text. // -// Supports 5 detection patterns: +// Supports 6 detection patterns: // 1. ... (Qwen XML) // 2. ... (bare function XML) // 3. (function signature) // 4. {...JSON...} (tool_code wrapper) -// 5. Bare JSON objects {"name":..., "arguments":...} (raw JSON) +// 5. call:?{relaxed-JSON-args} (gemma plain-text) +// 6. Bare JSON objects {"name":..., "arguments":...} (raw JSON) #pragma once diff --git a/server/test/test_server_unit.cpp b/server/test/test_server_unit.cpp index 363c9e9e6..20b7c1ad5 100644 --- a/server/test/test_server_unit.cpp +++ b/server/test/test_server_unit.cpp @@ -317,6 +317,343 @@ static void test_parse_tool_allowed_filter() { TEST_ASSERT(result.tool_calls.empty()); } +// ─── Pattern 5: call:{...} plain-text tool calls ───────────────── +// +// Covers the gemma plain-text emission path added in +// server/src/server/tool_parser.cpp (PR #340). The opener regex requires +// a sentinel character before `call:` (start-of-string or one of +// [\s,;:\(\[\{\}\)\]\>_]); the body is brace-balanced and string-aware; +// and the args go through coerce_relaxed_json before becoming the +// argument object. + +static void test_parse_call_verb_empty_args() { + // Bareword `call:get_weather{}` at start-of-string — sentinel + // matches the leading `^` anchor; body is the empty object `{}`. + auto result = parse_tool_calls("call:get_weather{}"); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "get_weather"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args.is_object()); + TEST_ASSERT(args.empty()); + } + // The matched span should be removed from cleaned_text. + TEST_ASSERT(result.cleaned_text.find("call:get_weather") == std::string::npos); +} + +static void test_parse_call_verb_strict_json_args() { + // Strict JSON args go through json::parse directly in + // coerce_relaxed_json's fast path. + auto result = parse_tool_calls("call:get_weather{\"city\": \"NYC\"}"); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "get_weather"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["city"] == "NYC"); + } +} + +static void test_parse_call_verb_namespaced_verb() { + // `ns:foo` namespaced verbs — the colon-strip logic in pattern 5 + // strips everything up to the last `:` so the registered tool name + // is just `foo`. + auto result = parse_tool_calls("call:ns:foo{\"k\": 1}"); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "foo"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["k"] == 1); + } +} + +static void test_parse_call_verb_whitespace_before_key() { + // Leading whitespace inside the brace body must not break parsing. + // (Whitespace tolerance is provided by json::parse / the relaxed + // fallback rewriter.) + auto result = parse_tool_calls("call:get_weather{ \"city\": \"NYC\" }"); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "get_weather"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["city"] == "NYC"); + } +} + +static void test_parse_call_verb_missing_close_brace_rejected() { + // Unbalanced opener — balanced_braces_end returns npos so pattern 5 + // bails out and produces no tool call. The text leaks through. + auto result = parse_tool_calls("call:get_weather{\"city\": \"NYC\""); + TEST_ASSERT(result.tool_calls.empty()); +} + +static void test_parse_call_verb_narrative_without_body_rejected() { + // Narrative usage with a non-balanced body — sentinel matches the + // space before `call:`, but the `{` has no matching `}` so the + // call is discarded. + auto result = parse_tool_calls("I will call:foo{"); + TEST_ASSERT(result.tool_calls.empty()); +} + +static void test_parse_call_verb_underscore_prefix() { + // SentencePiece artifact: `_call:` (the `_` is the literal + // underscore character; sentinel char-class includes `_` for + // exactly this case). + auto result = parse_tool_calls("_call:get_weather{\"city\": \"NYC\"}"); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "get_weather"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["city"] == "NYC"); + } +} + +static void test_parse_call_verb_nested_object_args() { + // Nested `{}` inside the args — balanced_braces_end tracks depth so + // the outer close isn't consumed by the inner object. + auto result = parse_tool_calls( + "call:get_weather{\"loc\": {\"city\": \"NYC\", \"zip\": \"10001\"}}"); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "get_weather"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["loc"].is_object()); + TEST_ASSERT(args["loc"]["city"] == "NYC"); + TEST_ASSERT(args["loc"]["zip"] == "10001"); + } +} + +static void test_parse_call_verb_back_to_back() { + // Gemma frequently emits multiple invocations back-to-back. The + // sentinel char-class includes `}` so the second `call:` is found + // after the first closes. + auto result = parse_tool_calls( + "call:a{\"x\": 1}call:b{\"y\": 2}"); + TEST_ASSERT(result.tool_calls.size() == 2); + if (result.tool_calls.size() == 2) { + TEST_ASSERT(result.tool_calls[0].name == "a"); + auto args0 = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args0["x"] == 1); + TEST_ASSERT(result.tool_calls[1].name == "b"); + auto args1 = json::parse(result.tool_calls[1].arguments); + TEST_ASSERT(args1["y"] == 2); + } +} + +static void test_parse_call_verb_relaxed_single_quotes() { + // Relaxed-JSON fallback: single-quoted strings + bare identifier + // keys are rewritten to strict JSON before parse. + auto result = parse_tool_calls("call:foo{city: 'NYC'}"); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "foo"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["city"] == "NYC"); + } +} + +static void test_parse_call_verb_glued_to_word_rejected() { + // No sentinel char before `call:` (glued to identifier) — pattern 5 + // must NOT match. `_` is a deliberate exception covered by its + // own test; here we use a regular letter. + auto result = parse_tool_calls("xcall:foo{\"a\": 1}"); + // Pattern 5 should NOT fire. Pattern 6 (bare-JSON sweep) sees + // `{"a": 1}` but it has no `name`/`function` field, so it produces + // no tool call either. + TEST_ASSERT(result.tool_calls.empty()); +} + +static void test_parse_call_verb_does_not_hijack_inner_name() { + // Regression: pattern 5 must run before pattern 6 so that an inner + // {"name": "...", "arguments": {}} in the call's args doesn't get + // hijacked into a spurious bare-JSON tool call. + auto result = parse_tool_calls( + "call:outer{\"name\": \"inner\", \"arguments\": {}}"); + // Should match exactly one tool: the outer call. The inner + // {"name":..., "arguments":...} JSON is shadowed by the recorded + // removal span. + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "outer"); + } +} + +// ─── Pattern 5 (cont.): PR #341 imports — narrative & quoting edge cases ─ +// +// These tests originated in PR #341 alongside sse_emitter Pattern-B work +// and were relocated here when #341 was split. They focus on edge cases +// that complement the core call:{} suite above. + +static void test_parse_call_verb_single() { + std::string text = "call:get_country_info{country: \"France\"}"; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "get_country_info"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["country"] == "France"); + } + TEST_ASSERT(result.cleaned_text.find("call:") == std::string::npos); +} + +static void test_parse_call_verb_namespaced() { + std::string text = "call:execute-bead:read-file{path: \"crates/foo/src/lib.rs\"}"; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + // Verb only — namespace stripped. + TEST_ASSERT(result.tool_calls[0].name == "read-file"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["path"] == "crates/foo/src/lib.rs"); + } +} + +static void test_parse_call_verb_snake_and_hyphen() { + std::string text = + "call:execute-bead:list-files{path: \"src/\"}\n\n" + "call:execute-bead:read_file{path: \"a/b.rs\"}"; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 2); + if (result.tool_calls.size() == 2) { + TEST_ASSERT(result.tool_calls[0].name == "list-files"); + TEST_ASSERT(result.tool_calls[1].name == "read_file"); + } +} + +static void test_parse_call_verb_tool_allowed_filter() { + std::string text = "call:disallowed_verb{x: 1}call:allowed_verb{y: 2}"; + json tools = json::array({ + {{"type", "function"}, {"function", {{"name", "allowed_verb"}}}} + }); + auto result = parse_tool_calls(text, tools); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "allowed_verb"); + } +} + +static void test_parse_call_verb_inline_prose_rejected() { + // No sentinel char before `call:` — must NOT match. + std::string text = "narrative.call:foo{x:1}"; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.empty()); +} + +static void test_parse_call_verb_inline_prose_after_space() { + // Whitespace IS a valid sentinel — this should match. + std::string text = "Sure, I'll call:foo{x: 1}"; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "foo"); + } +} + +static void test_parse_call_verb_malformed_args() { + // Unterminated brace — drop the call, don't crash. + std::string text = "call:foo{country: \"France\""; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.empty()); +} + +static void test_parse_call_verb_inner_brace_in_string() { + // The `{` and `}` inside the string value must not confuse the + // balanced-brace scanner. + std::string text = "call:foo{cmd: \"echo {not_a_brace} ok\"}"; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["cmd"] == "echo {not_a_brace} ok"); + } +} + +static void test_parse_call_verb_unquoted_keys() { + // Relaxed-JSON path: bare keys get quoted. + std::string text = "call:foo{path: \"x\", count: 3}"; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["path"] == "x"); + TEST_ASSERT(args["count"] == 3); + } +} + +static void test_parse_call_verb_cleaned_text() { + // The matched span should be stripped from cleaned_text. + std::string text = "Hello call:foo{x: 1} world."; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + TEST_ASSERT(result.cleaned_text.find("call:") == std::string::npos); + TEST_ASSERT(result.cleaned_text.find("Hello") != std::string::npos); + TEST_ASSERT(result.cleaned_text.find("world.") != std::string::npos); +} + +static void test_parse_call_verb_intercept_inner_json() { + // Codex-requested: inner args of the form {"name": ..., "arguments": ...} + // must NOT be picked up by pattern 6 (bare-JSON sweep) as a spurious + // `inner` ToolCall. Exactly one ToolCall, named `outer`, with the + // inner JSON intact in its arguments. + std::string text = "call:outer{\"name\": \"inner\", \"arguments\": {}}"; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "outer"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["name"] == "inner"); + TEST_ASSERT(args["arguments"].is_object()); + } +} + +static void test_parse_call_verb_multiline_args() { + // Snapshot rows have multi-line nested args; the balanced-brace + // scanner is line-agnostic, so this must Just Work. + std::string text = + "call:default_api:analyze_data{\n" + " data: [{\"date\": \"2024-10-05\", \"qty\": 50}, {\"date\": \"2024-10-06\", \"qty\": 60}],\n" + " metric: \"qty\"\n" + "}"; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "analyze_data"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["metric"] == "qty"); + TEST_ASSERT(args["data"].is_array()); + TEST_ASSERT(args["data"].size() == 2); + } +} + + +static void test_parse_call_verb_singlequote_with_inner_doublequote() { + // Cubic PR #329 review: when the relaxed-JSON rewrite converts + // single-quoted strings to double-quoted, inner `"` chars must be + // escaped to `\"` — otherwise `'he said "hi"'` rewrites to + // `"he said "hi""` which is invalid JSON and the whole tool call + // is silently dropped. + std::string text = "call:say{quote: 'he said \"hi\" loudly'}"; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + TEST_ASSERT(result.tool_calls[0].name == "say"); + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["quote"] == "he said \"hi\" loudly"); + } +} + +static void test_parse_call_verb_backtick_with_inner_doublequote() { + // Same escape concern as the single-quote case, but with the + // backtick string flavor. + std::string text = "call:say{quote: `he said \"hi\" loudly`}"; + auto result = parse_tool_calls(text); + TEST_ASSERT(result.tool_calls.size() == 1); + if (!result.tool_calls.empty()) { + auto args = json::parse(result.tool_calls[0].arguments); + TEST_ASSERT(args["quote"] == "he said \"hi\" loudly"); + } +} + + // ═══════════════════════════════════════════════════════════════════════ // SSE Emitter tests // ═══════════════════════════════════════════════════════════════════════ @@ -2726,6 +3063,33 @@ int main() { RUN_TEST(test_parse_no_tools); RUN_TEST(test_parse_tool_code_wrapper); RUN_TEST(test_parse_tool_allowed_filter); + RUN_TEST(test_parse_call_verb_empty_args); + RUN_TEST(test_parse_call_verb_strict_json_args); + RUN_TEST(test_parse_call_verb_namespaced_verb); + RUN_TEST(test_parse_call_verb_whitespace_before_key); + RUN_TEST(test_parse_call_verb_missing_close_brace_rejected); + RUN_TEST(test_parse_call_verb_narrative_without_body_rejected); + RUN_TEST(test_parse_call_verb_underscore_prefix); + RUN_TEST(test_parse_call_verb_nested_object_args); + RUN_TEST(test_parse_call_verb_back_to_back); + RUN_TEST(test_parse_call_verb_relaxed_single_quotes); + RUN_TEST(test_parse_call_verb_glued_to_word_rejected); + RUN_TEST(test_parse_call_verb_does_not_hijack_inner_name); + // PR #341 imports (relocated alongside the test bodies above) + RUN_TEST(test_parse_call_verb_single); + RUN_TEST(test_parse_call_verb_namespaced); + RUN_TEST(test_parse_call_verb_snake_and_hyphen); + RUN_TEST(test_parse_call_verb_tool_allowed_filter); + RUN_TEST(test_parse_call_verb_inline_prose_rejected); + RUN_TEST(test_parse_call_verb_inline_prose_after_space); + RUN_TEST(test_parse_call_verb_malformed_args); + RUN_TEST(test_parse_call_verb_inner_brace_in_string); + RUN_TEST(test_parse_call_verb_unquoted_keys); + RUN_TEST(test_parse_call_verb_cleaned_text); + RUN_TEST(test_parse_call_verb_intercept_inner_json); + RUN_TEST(test_parse_call_verb_multiline_args); + RUN_TEST(test_parse_call_verb_singlequote_with_inner_doublequote); + RUN_TEST(test_parse_call_verb_backtick_with_inner_doublequote); std::fprintf(stderr, "\n── SSE Emitter ──\n"); RUN_TEST(test_emitter_reasoning_split_openai);