From 870e5d526b29acc54d2ebc78946a3cfb8be06469 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Tue, 31 Mar 2026 03:03:15 -0500 Subject: [PATCH 1/8] common : simplify autoparser tagged parser rules --- common/chat-auto-parser-generator.cpp | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 3f036bb5b2..3a0d7bf6d8 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -279,36 +279,32 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte const auto & inputs = ctx.inputs; bool force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; + auto until_suffix = p.rule("until-suffix", p.until(arguments.value_suffix)); + common_peg_parser tool_choice = p.choice(); foreach_function(inputs.tools, [&](const json & tool) { const auto & func = tool.at("function"); std::string name = func.at("name"); - const auto & params = func.contains("parameters") ? func.at("parameters") : json::object(); + auto params = func.contains("parameters") ? func.at("parameters") : json::object(); const auto & properties = params.contains("properties") ? params.at("properties") : json::object(); std::set required; + auto schema_info = common_schema_info(); + schema_info.resolve_refs(params); + // Build parser for each argument, separating required and optional std::vector required_parsers; std::vector optional_parsers; for (const auto & [param_name, param_schema] : properties.items()) { - bool is_required = required.find(param_name) != required.end(); - std::string type = "object"; - auto type_obj = param_schema.contains("type") ? param_schema.at("type") : json::object(); - if (type_obj.is_string()) { - type_obj.get_to(type); - } else if (type_obj.is_object()) { - if (type_obj.contains("type") && type_obj.at("type").is_string()) { - type_obj.at("type").get_to(type); - } - } + bool is_required = required.find(param_name) != required.end(); auto arg = p.tool_arg(p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) + arguments.name_suffix) + arguments.value_prefix + - (type == "string" ? - p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix), + (schema_info.resolves_to_string(param_schema) ? + p.tool_arg_string_value(p.schema(until_suffix, "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : p.tool_arg_json_value(p.schema( @@ -405,7 +401,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte if (!format.section_start.empty()) { tool_calls = p.trigger_rule("tool-calls", p.literal(format.section_start) + p.space() + tool_calls + p.space() + - (format.section_end.empty() ? p.end() : p.literal(format.section_end))); + (format.section_end.empty() ? p.eps() : p.literal(format.section_end))); } } else { std::string separator = ", "; // Default @@ -426,8 +422,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start; auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); - return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls + - p.end(); + return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls; } } // namespace autoparser From d417cd6a00c7a6f6781d9675629e3152bbb0baed Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Tue, 31 Mar 2026 22:27:26 -0500 Subject: [PATCH 2/8] cont : remove upper limit on optional args --- common/chat-auto-parser-generator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 3a0d7bf6d8..6266861a10 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -335,7 +335,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte for (const auto & opt : optional_parsers) { any_opt |= opt; } - args_seq = args_seq + p.repeat(p.space() + any_opt, 0, (int) optional_parsers.size()); + args_seq = args_seq + p.repeat(p.space() + any_opt, 0, -1); } // Build call_id parser based on position (if supported) From 508af9098763a88365c367a8e73f4b878c57b70d Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Tue, 31 Mar 2026 22:31:05 -0500 Subject: [PATCH 3/8] cont : revert changes to parsing at the end --- common/chat-auto-parser-generator.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 6266861a10..3461566677 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -401,7 +401,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte if (!format.section_start.empty()) { tool_calls = p.trigger_rule("tool-calls", p.literal(format.section_start) + p.space() + tool_calls + p.space() + - (format.section_end.empty() ? p.eps() : p.literal(format.section_end))); + (format.section_end.empty() ? p.end() : p.literal(format.section_end))); } } else { std::string separator = ", "; // Default @@ -422,7 +422,8 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start; auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); - return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls; + return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls + + p.end(); } } // namespace autoparser From fd78b137cba24df08865a2e675b48c330386374f Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Wed, 1 Apr 2026 00:04:57 -0500 Subject: [PATCH 4/8] cont : undo arbitrary ordering of optional args --- common/chat-auto-parser-generator.cpp | 32 +++++-------------- tests/test-chat.cpp | 44 ++------------------------- 2 files changed, 10 insertions(+), 66 deletions(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 3461566677..4eed6f27b6 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -293,9 +293,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte auto schema_info = common_schema_info(); schema_info.resolve_refs(params); - // Build parser for each argument, separating required and optional - std::vector required_parsers; - std::vector optional_parsers; + std::vector args; for (const auto & [param_name, param_schema] : properties.items()) { bool is_required = required.find(param_name) != required.end(); @@ -312,31 +310,15 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte p.space()) + p.tool_arg_close(p.literal(arguments.value_suffix))); - auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg); - if (is_required) { - required_parsers.push_back(named_arg); - } else { - optional_parsers.push_back(named_arg); - } - } + auto named_arg = p.repeat(p.rule("tool-" + name + "-arg-" + param_name, arg), is_required ? 1 : 0, 1); - // Build required arg sequence in definition order - common_peg_parser args_seq = p.eps(); - for (size_t i = 0; i < required_parsers.size(); i++) { - if (i > 0) { - args_seq = args_seq + p.space(); + if (!args.empty()) { + args.push_back(p.space()); } - args_seq = args_seq + required_parsers[i]; + args.push_back(named_arg); } - // Build optional args with flexible ordering - if (!optional_parsers.empty()) { - common_peg_parser any_opt = p.choice(); - for (const auto & opt : optional_parsers) { - any_opt |= opt; - } - args_seq = args_seq + p.repeat(p.space() + any_opt, 0, -1); - } + common_peg_parser args_seq = p.sequence(args); // Build call_id parser based on position (if supported) common_peg_parser call_id_section = p.eps(); @@ -357,7 +339,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte func_parser = p.atomic(p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) + call_id_section) + p.space() + args_seq; matched_atomic = true; - } else if (!arguments.name_prefix.empty() && !required_parsers.empty()) { + } else if (!arguments.name_prefix.empty() && !required.empty()) { // Only peek for an arg tag when there are required args that must follow. // When all args are optional, the model may emit no arg tags at all (#20650). func_parser = p.atomic(p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) + diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 1c4da68195..563a4a6605 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -2144,57 +2144,19 @@ static void test_template_output_peg_parsers(bool detailed_debug) { .expect_reconstruction() .run(); - // Test flexible optional argument ordering (2 required + 4 optional, reversed optional order) + // Test optional argument ordering (2 required + 4 optional) tst.test( "\n" "\n" "\nhello\n\n" "\n42\n\n" - "\n100\n\n" "\n200\n\n" + "\n100\n\n" "\n" "") .tools({ tool_2req_4opt }) .expect_tool_calls({ - { "tool_2req_4opt", R"({"req1": "hello", "req2": 42, "opt4": 100, "opt2": 200})", {} }, - }) - .expect_reconstruction() - .run(); - - // Test flexible optional argument ordering (2 required + 5 optional, reversed optional order) - tst.test( - "\n" - "\n" - "\nworld\n\n" - "\n7\n\n" - "\nlast\n\n" - "\nmiddle\n\n" - "\nfirst\n\n" - "\n" - "") - .tools({ tool_2req_5opt }) - .expect_tool_calls({ - { "tool_2req_5opt", R"({"req1": "world", "req2": 7, "opt5": "last", "opt3": "middle", "opt1": "first"})", {} }, - }) - .expect_reconstruction() - .run(); - - // Test flexible optional argument ordering (2 required + 5 optional, all 5 in shuffled order) - tst.test( - "\n" - "\n" - "\ntest\n\n" - "\n99\n\n" - "\nc\n\n" - "\na\n\n" - "\ne\n\n" - "\n4\n\n" - "\n2\n\n" - "\n" - "") - .tools({ tool_2req_5opt }) - .expect_tool_calls({ - { "tool_2req_5opt", R"({"req1": "test", "req2": 99, "opt3": "c", "opt1": "a", "opt5": "e", "opt4": 4, "opt2": 2})", {} }, + { "tool_2req_4opt", R"({"req1": "hello", "req2": 42, "opt2": 200, "opt4": 100})", {} }, }) .expect_reconstruction() .run(); From e6871d2389e42f13c01661baf85cadd759c67402 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Wed, 1 Apr 2026 00:42:59 -0500 Subject: [PATCH 5/8] cont : fix uninitialized required parameters --- common/chat-auto-parser-generator.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 4eed6f27b6..d83079bba0 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -288,7 +288,11 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte std::string name = func.at("name"); auto params = func.contains("parameters") ? func.at("parameters") : json::object(); const auto & properties = params.contains("properties") ? params.at("properties") : json::object(); + std::set required; + if (params.contains("required")) { + params.at("required").get_to(required); + } auto schema_info = common_schema_info(); schema_info.resolve_refs(params); From f285719ae1fb7220774a0dfe3dd9860fb51f410f Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Sun, 5 Apr 2026 19:00:18 -0500 Subject: [PATCH 6/8] revert to simplify merge --- common/chat-auto-parser-generator.cpp | 58 ++++++++++++++++++--------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index d83079bba0..25b30f86cc 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -279,34 +279,36 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte const auto & inputs = ctx.inputs; bool force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; - auto until_suffix = p.rule("until-suffix", p.until(arguments.value_suffix)); - common_peg_parser tool_choice = p.choice(); foreach_function(inputs.tools, [&](const json & tool) { const auto & func = tool.at("function"); std::string name = func.at("name"); - auto params = func.contains("parameters") ? func.at("parameters") : json::object(); + const auto & params = func.contains("parameters") ? func.at("parameters") : json::object(); const auto & properties = params.contains("properties") ? params.at("properties") : json::object(); - std::set required; - if (params.contains("required")) { - params.at("required").get_to(required); - } - auto schema_info = common_schema_info(); - schema_info.resolve_refs(params); - - std::vector args; + // Build parser for each argument, separating required and optional + std::vector required_parsers; + std::vector optional_parsers; for (const auto & [param_name, param_schema] : properties.items()) { - bool is_required = required.find(param_name) != required.end(); + bool is_required = required.find(param_name) != required.end(); + std::string type = "object"; + auto type_obj = param_schema.contains("type") ? param_schema.at("type") : json::object(); + if (type_obj.is_string()) { + type_obj.get_to(type); + } else if (type_obj.is_object()) { + if (type_obj.contains("type") && type_obj.at("type").is_string()) { + type_obj.at("type").get_to(type); + } + } auto arg = p.tool_arg(p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) + arguments.name_suffix) + arguments.value_prefix + - (schema_info.resolves_to_string(param_schema) ? - p.tool_arg_string_value(p.schema(until_suffix, + (type == "string" ? + p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : p.tool_arg_json_value(p.schema( @@ -314,15 +316,31 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte p.space()) + p.tool_arg_close(p.literal(arguments.value_suffix))); - auto named_arg = p.repeat(p.rule("tool-" + name + "-arg-" + param_name, arg), is_required ? 1 : 0, 1); + auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg); + if (is_required) { + required_parsers.push_back(named_arg); + } else { + optional_parsers.push_back(named_arg); + } + } - if (!args.empty()) { - args.push_back(p.space()); + // Build required arg sequence in definition order + common_peg_parser args_seq = p.eps(); + for (size_t i = 0; i < required_parsers.size(); i++) { + if (i > 0) { + args_seq = args_seq + p.space(); } - args.push_back(named_arg); + args_seq = args_seq + required_parsers[i]; } - common_peg_parser args_seq = p.sequence(args); + // Build optional args with flexible ordering + if (!optional_parsers.empty()) { + common_peg_parser any_opt = p.choice(); + for (const auto & opt : optional_parsers) { + any_opt |= opt; + } + args_seq = args_seq + p.repeat(p.space() + any_opt, 0, -1); + } // Build call_id parser based on position (if supported) common_peg_parser call_id_section = p.eps(); @@ -343,7 +361,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte func_parser = p.atomic(p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) + call_id_section) + p.space() + args_seq; matched_atomic = true; - } else if (!arguments.name_prefix.empty() && !required.empty()) { + } else if (!arguments.name_prefix.empty() && !required_parsers.empty()) { // Only peek for an arg tag when there are required args that must follow. // When all args are optional, the model may emit no arg tags at all (#20650). func_parser = p.atomic(p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) + From a8e55236384daae020eaab46373cdd861c3a2f96 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Sun, 5 Apr 2026 20:16:32 -0500 Subject: [PATCH 7/8] re-apply patches --- common/chat-auto-parser-generator.cpp | 48 ++++++++------------------- 1 file changed, 13 insertions(+), 35 deletions(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index af923fe766..d3086725d4 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -332,58 +332,36 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte const auto & inputs = ctx.inputs; bool force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; + auto until_suffix = p.rule("until-suffix", p.until(arguments.value_suffix)); + common_peg_parser tool_choice = p.choice(); foreach_function(inputs.tools, [&](const json & tool) { const auto & func = tool.at("function"); std::string name = func.at("name"); - const auto & params = func.contains("parameters") ? func.at("parameters") : json::object(); + auto params = func.contains("parameters") ? func.at("parameters") : json::object(); const auto & properties = params.contains("properties") ? params.at("properties") : json::object(); + std::set required; + if (params.contains("required")) { + params.at("required").get_to(required); + } + + auto schema_info = common_schema_info(); + schema_info.resolve_refs(params); // Build parser for each argument, separating required and optional std::vector required_parsers; std::vector optional_parsers; for (const auto & [param_name, param_schema] : properties.items()) { - bool is_required = required.find(param_name) != required.end(); - std::string type = "object"; - if (param_schema.contains("type")) { - const auto & type_obj = param_schema.at("type"); - if (type_obj.is_string()) { - type_obj.get_to(type); - } else if (type_obj.is_array()) { - // Handle nullable types like ["string", "null"] - for (const auto & t : type_obj) { - if (t.is_string() && t.get() != "null") { - type = t.get(); - break; - } - } - } else if (type_obj.is_object()) { - if (type_obj.contains("type") && type_obj.at("type").is_string()) { - type_obj.at("type").get_to(type); - } - } - } - // Infer string type from enum values when type is unspecified - if (type == "object" && param_schema.contains("enum")) { - const auto & enum_vals = param_schema.at("enum"); - if (enum_vals.is_array()) { - for (const auto & v : enum_vals) { - if (v.is_string()) { - type = "string"; - break; - } - } - } - } + bool is_required = required.find(param_name) != required.end(); auto arg = p.tool_arg(p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) + arguments.name_suffix) + arguments.value_prefix + - (type == "string" ? - p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix), + (schema_info.resolves_to_string(param_schema) ? + p.tool_arg_string_value(p.schema(until_suffix, "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : p.tool_arg_json_value(p.schema( From b24d87fbb7cdf25b1bba104026e7266e0d1cdc58 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Sun, 5 Apr 2026 20:24:48 -0500 Subject: [PATCH 8/8] restore flexible optional arg ordering tests --- tests/test-chat.cpp | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 1682c8ff61..72deeeab3c 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -2404,19 +2404,57 @@ static void test_template_output_peg_parsers(bool detailed_debug) { .expect_reconstruction() .run(); - // Test optional argument ordering (2 required + 4 optional) + // Test flexible optional argument ordering (2 required + 4 optional, reversed optional order) tst.test( "\n" "\n" "\nhello\n\n" "\n42\n\n" - "\n200\n\n" "\n100\n\n" + "\n200\n\n" "\n" "") .tools({ tool_2req_4opt }) .expect_tool_calls({ - { "tool_2req_4opt", R"({"req1": "hello", "req2": 42, "opt2": 200, "opt4": 100})", {} }, + { "tool_2req_4opt", R"({"req1": "hello", "req2": 42, "opt4": 100, "opt2": 200})", {} }, + }) + .expect_reconstruction() + .run(); + + // Test flexible optional argument ordering (2 required + 5 optional, reversed optional order) + tst.test( + "\n" + "\n" + "\nworld\n\n" + "\n7\n\n" + "\nlast\n\n" + "\nmiddle\n\n" + "\nfirst\n\n" + "\n" + "") + .tools({ tool_2req_5opt }) + .expect_tool_calls({ + { "tool_2req_5opt", R"({"req1": "world", "req2": 7, "opt5": "last", "opt3": "middle", "opt1": "first"})", {} }, + }) + .expect_reconstruction() + .run(); + + // Test flexible optional argument ordering (2 required + 5 optional, all 5 in shuffled order) + tst.test( + "\n" + "\n" + "\ntest\n\n" + "\n99\n\n" + "\nc\n\n" + "\na\n\n" + "\ne\n\n" + "\n4\n\n" + "\n2\n\n" + "\n" + "") + .tools({ tool_2req_5opt }) + .expect_tool_calls({ + { "tool_2req_5opt", R"({"req1": "test", "req2": 99, "opt3": "c", "opt1": "a", "opt5": "e", "opt4": 4, "opt2": 2})", {} }, }) .expect_reconstruction() .run();