From 20ccede644d52783565b0c890dcaa89a15a0184e Mon Sep 17 00:00:00 2001 From: Jeremiah Blanchard Date: Tue, 14 Apr 2026 04:26:47 +0000 Subject: [PATCH] chat : add MiniMax M2 specialized tool-call handler The autoparser (peg-native) cannot parse MiniMax's XML-based tool-call format, causing GGML_ABORT crashes when tools are present. Add a specialized handler following the Kimi K2 pattern with XML parameter parsing via tool_arg_name/tool_arg_value tags. --- common/chat.cpp | 130 ++++++++++++++ tests/test-chat.cpp | 415 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 541 insertions(+), 4 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 5b93c5887..3d43186c2 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1846,6 +1846,129 @@ static void func_args_not_string(json & messages) { } +// MiniMax M2 format: uses ...value...... +// - Reasoning: {reasoning} (optional) +static common_chat_params common_chat_params_init_minimax(const common_chat_template & tmpl, + const autoparser::generation_params & inputs) { + common_chat_params data; + + data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs); + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + data.supports_thinking = true; + data.thinking_start_tag = ""; + data.thinking_end_tag = ""; + data.preserved_tokens = { + "", + "", + "", + "", + }; + + auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; + auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; + + const std::string TC_START = ""; + const std::string TC_END = ""; + const std::string THINK_START = ""; + const std::string THINK_END = ""; + + auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) { + auto end = p.end(); + + // Reasoning extraction (same pattern as Kimi K2) + auto reasoning = extract_reasoning ? p.optional(THINK_START + p.reasoning( + p.until_one_of({ THINK_END, TC_START })) + + p.optional(p.literal(THINK_END))) : p.eps(); + auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START); + + // Content only parser (no tools) + if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) { + return generation_prompt + reasoning + p.content(p.rest()) + end; + } + + // Build tool call parsers for each available function + // MiniMax format: value... + auto tool_choice = p.choice(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + const auto & params = function.at("parameters"); + + // Build parameter parsers + auto arg_choice = p.choice(); + if (params.contains("properties") && !params["properties"].empty()) { + for (const auto & el : params["properties"].items()) { + const std::string & prop_name = el.key(); + const auto & prop_def = el.value(); + bool is_string_type = (prop_def.contains("type") && prop_def["type"] == "string"); + + // value + auto arg_rule = p.tool_arg( + p.tool_arg_open(p.literal("") + + (is_string_type + ? p.tool_arg_string_value(p.until("")) + : p.tool_arg_value(p.until(""))) + + p.tool_arg_close(p.literal("")) + ); + arg_choice |= arg_rule; + } + } + auto args = p.zero_or_more(p.space() + arg_choice); + + // ...params... + auto tool_parser = p.tool( + p.tool_open( + p.literal("") + ) + + p.tool_args(args) + + p.space() + + p.tool_close(p.literal("")) + ); + + tool_choice |= p.rule("tool-" + name, tool_parser); + }); + + // Tool calls section: \n...tool calls...\n + auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0; + auto max_calls = inputs.parallel_tool_calls ? -1 : 1; + auto tool_calls = p.rule("tool-calls", + p.trigger_rule("tool-call", + p.literal(TC_START) + p.space() + + p.repeat(tool_choice + p.space(), min_calls, max_calls) + + p.optional(p.literal(TC_END))) + ); + + auto content_before_tools = p.content(p.until_one_of({ TC_START })); + + return generation_prompt + reasoning + content_before_tools + tool_calls + end; + }); + + data.parser = parser.save(); + + if (include_grammar) { + data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + auto schema = function.at("parameters"); + builder.resolve_refs(schema); + }); + parser.build_grammar(builder, data.grammar_lazy); + }); + + data.grammar_triggers = { + { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "" } + }; + } + + return data; +} + static json common_chat_extra_context() { json ctx = json::object(); std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); @@ -1912,6 +2035,13 @@ std::optional common_chat_try_specialized_template( return common_chat_params_init_gigachat_v3(tmpl, params); } + // MiniMax M2 format detection + if (src.find("") != std::string::npos && + src.find("call:'") != std::string::npos) { workaround::convert_tool_responses_gemma4(params.messages); diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 72deeeab3..692182940 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -998,6 +998,7 @@ static void test_peg_parser(common_chat_templates * tmpls, auto parser = make_peg_parser(tmpls, tc.params, detailed_debug); if (detailed_debug) { LOG_DBG("Using parser: \n%s\n", parser.arena_.dump(parser.arena_.root()).c_str()); + LOG_DBG("Generation prompt: '%s'\n", parser.params_.generation_prompt.c_str()); } common_chat_msg msg_accum; @@ -1987,6 +1988,13 @@ static void test_template_output_peg_parsers(bool detailed_debug) { .expect(message_assist_thoughts) .run(); + // Empty reasoning (budget=0: sampler forces end tag before newline) + tst.test( + "<|channel>thoughtHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(simple_assist_msg("Hello, world!\nWhat's up?", "")) + .run(); + // Reasoning and content with reasoning_format = none tst.test( "<|channel>thought\nI'm\nthinkingHello, world!\nWhat's up?") @@ -2110,6 +2118,31 @@ static void test_template_output_peg_parsers(bool detailed_debug) { .tools({ amount_tool }) .expect(message_with_tool_calls("amount", R"({"orig": 1.5e10})")) .run(); + + // Edge cases + tst.test( + "<|channel>thought\nHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist) + .run(); + + tst.test( + "<|channel>thought\nHello, world!\nWhat's up?<|channel>thought\n") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist) + .run(); + + tst.test( + "<|channel>thought\nHello, world!\nWhat's up?<|channel>thought\n") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist) + .run(); + + tst.test( + "<|channel><|channel>thought\nHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist) + .run(); } { @@ -2131,7 +2164,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) { tst.test( "\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" + "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}" "") .tools({ special_function_tool }) .expect(message_assist_call) @@ -2139,7 +2172,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) { tst.test( "Hello, world!\nWhat's up?\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" + "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}" "") .tools({ special_function_tool }) .expect(message_assist_call_content) @@ -2568,6 +2601,215 @@ static void test_template_output_peg_parsers(bool detailed_debug) { expect(simple_assist_msg("CONTENT", "")).run(); } + // DeepSeek V3.2 tests - format uses DSML markup: + // <|DSML|function_calls> + // <|DSML|invoke name="foo"> + // <|DSML|parameter name="bar" string="true|false">value + // + // + // Reasoning uses .... The generation prompt ends in (thinking mode) + // or (non-thinking mode). + { + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.2.jinja", detailed_debug); + + // Pure content (non-thinking mode) + tst.test("Hello, world!\nWhat's up?") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist) + .run(); + + // Thinking + content + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist_thoughts) + .run(); + + // Thinking + tool call (single, string param) + tst.test( + "Let me check the time\n\n" + "<|DSML|function_calls>\n" + "<|DSML|invoke name=\"get_time\">\n" + "<|DSML|parameter name=\"city\" string=\"true\">Tokyo\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ get_time_tool }) + .expect(message_with_tool_calls_and_reasoning("get_time", R"({"city": "Tokyo"})", "Let me check the time")) + .run(); + + // Tool call without reasoning (non-thinking mode), integer param (string="false") + tst.test( + "<|DSML|function_calls>\n" + "<|DSML|invoke name=\"special_function\">\n" + "<|DSML|parameter name=\"arg1\" string=\"false\">1\n" + "\n" + "") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + // Multiple parallel tool calls with reasoning + tst.test( + "Calling both\n\n" + "<|DSML|function_calls>\n" + "<|DSML|invoke name=\"get_time\">\n" + "<|DSML|parameter name=\"city\" string=\"true\">Paris\n" + "\n" + "<|DSML|invoke name=\"get_weather\">\n" + "<|DSML|parameter name=\"city\" string=\"true\">Paris\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .parallel_tool_calls(true) + .tools({ get_time_tool, get_weather_tool }) + .expect(message_with_reasoning_content_and_multiple_tool_calls( + "Calling both", "", + { { "get_time", R"({"city": "Paris"})" }, { "get_weather", R"({"city": "Paris"})" } })) + .run(); + + // Tool call with content before tool calls + tst.test( + "Thinking about it" + "Let me call the function.\n\n" + "<|DSML|function_calls>\n" + "<|DSML|invoke name=\"special_function\">\n" + "<|DSML|parameter name=\"arg1\" string=\"false\">1\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .expect_reasoning("Thinking about it") + .expect_content("Let me call the function.") + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + }) + .run(); + + // Tool call with negative number + tst.test( + "Test negative\n\n" + "<|DSML|function_calls>\n" + "<|DSML|invoke name=\"magic_int\">\n" + "<|DSML|parameter name=\"ref\" string=\"false\">-14\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ magic_int_tool }) + .expect_reasoning("Test negative") + .expect_tool_calls({ + { "magic_int", R"({"ref": -14})", {} }, + }) + .run(); + + // Tool call with decimal number + tst.test( + "Test decimal\n\n" + "<|DSML|function_calls>\n" + "<|DSML|invoke name=\"amount\">\n" + "<|DSML|parameter name=\"orig\" string=\"false\">3.14\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ amount_tool }) + .expect_reasoning("Test decimal") + .expect_tool_calls({ + { "amount", R"({"orig": 3.14})", {} }, + }) + .run(); + + // Tool call with boolean + tst.test( + "Test boolean\n\n" + "<|DSML|function_calls>\n" + "<|DSML|invoke name=\"toggle\">\n" + "<|DSML|parameter name=\"enabled\" string=\"false\">true\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ toggle_tool }) + .expect_reasoning("Test boolean") + .expect_tool_calls({ + { "toggle", R"({"enabled": true})", {} }, + }) + .run(); + + // Tool call with array parameter (JSON-formatted) + tst.test( + "Test array\n\n" + "<|DSML|function_calls>\n" + "<|DSML|invoke name=\"todo_list\">\n" + "<|DSML|parameter name=\"todos\" string=\"false\">[\"buy milk\",\"walk dog\"]\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ todo_list }) + .expect_reasoning("Test array") + .expect_tool_calls({ + { "todo_list", R"({"todos": ["buy milk", "walk dog"]})", {} }, + }) + .run(); + + // Tool call with object parameter (JSON-formatted) + tst.test( + "Test object\n\n" + "<|DSML|function_calls>\n" + "<|DSML|invoke name=\"set_config\">\n" + "<|DSML|parameter name=\"config\" string=\"false\">{\"theme\":\"dark\",\"level\":2}\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ config_tool }) + .expect_reasoning("Test object") + .expect_tool_calls({ + { "set_config", R"({"config": {"theme": "dark", "level": 2}})", {} }, + }) + .run(); + + // Edge case: empty reasoning + tst.test( + "\n\n" + "<|DSML|function_calls>\n" + "<|DSML|invoke name=\"get_time\">\n" + "<|DSML|parameter name=\"city\" string=\"true\">XYZCITY\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ get_time_tool }) + .expect(message_with_tool_calls("get_time", R"({"city": "XYZCITY"})")) + .run(); + + // Edge case: tool call with multiple params (mixed types, string first) + tst.test( + "Multi-arg call\n\n" + "<|DSML|function_calls>\n" + "<|DSML|invoke name=\"magic_int\">\n" + "<|DSML|parameter name=\"ref\" string=\"false\">42\n" + "<|DSML|parameter name=\"name\" string=\"true\">foo bar\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ magic_int_tool }) + .expect_reasoning("Multi-arg call") + .expect_tool_calls({ + { "magic_int", R"({"ref": 42, "name": "foo bar"})", {} }, + }) + .run(); + } + // GLM-4.6 tests - format: function_name\n...\n...\n { auto tst = peg_tester("models/templates/GLM-4.6.jinja", detailed_debug); @@ -3087,6 +3329,92 @@ static void test_template_output_peg_parsers(bool detailed_debug) { .run(); } + // Reka-Edge tests - uses native JSON format with per-call wrapper + { + auto tst = peg_tester("models/templates/Reka-Edge.jinja", detailed_debug); + + // Basic content only + tst.test("Hello, world!\nWhat's up?").enable_thinking(false).expect(message_assist).run(); + + // Single tool call without reasoning + tst.test("\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}") + .enable_thinking(false) + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + // Tool call with string argument + tst.test("\n{\"name\": \"get_time\", \"arguments\": {\"city\": \"XYZCITY\"}}") + .enable_thinking(false) + .tools({ get_time_tool }) + .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}")) + .run(); + + // Tool call with reasoning (enable_thinking=true) + tst.test("I'm\nthinking\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + + // Multiple tool calls (parallel) + tst.test( + "\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}" + "\n{\"name\": \"special_function_with_opt\", \"arguments\": {\"arg1\": 1, \"arg2\": 2}}" + ) + .enable_thinking(false) + .parallel_tool_calls(true) + .tools({ + special_function_tool, special_function_tool_with_optional_param + }) + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, + }) + .run(); + + // Tool call with reasoning and content + tst.test("I need to call a function" + "Let me check the time.\n{\"name\": \"get_time\", \"arguments\": {\"city\": \"XYZCITY\"}}") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ get_time_tool }) + .expect(message_with_reasoning_content_and_multiple_tool_calls( + "I need to call a function", "Let me check the time.", { { "get_time", "{\"city\":\"XYZCITY\"}" } } + )) + .run(); + + // Partial tool call (streaming) + tst.test("\n{\"name\": \"special_function\", \"arguments\": {\"arg1\":") + .tools({ special_function_tool }) + .enable_thinking(false) + .is_partial(true) + .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": ")) + .run(); + + // Tool call with empty arguments + tst.test("\n{\"name\": \"empty_args\", \"arguments\": {}}") + .enable_thinking(false) + .tools({ empty_args_tool }) + .expect(simple_assist_msg("", "", "empty_args", "{}")) + .run(); + + // fake tool call marker in reasoning + tst.test( + "Let me think about \n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 2}} hmm" + "\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect_reasoning("Let me think about \n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 2}} hmm") + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + }) + .run(); + } + + // Apertus-8B-Instruct tests - FUNC_NAME_AS_KEY format // Format: <|tools_prefix|>[{"function_name": {...arguments...}}]<|tools_suffix|> { @@ -3102,13 +3430,92 @@ static void test_template_output_peg_parsers(bool detailed_debug) { // Format: value { auto tst = peg_tester("models/templates/MiniMax-M2.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").enable_thinking(true).reasoning_format(COMMON_REASONING_FORMAT_AUTO).expect(message_assist).run(); + + tst.test("I'm\nthinkingHello, world!\nWhat's up?").enable_thinking(true).reasoning_format(COMMON_REASONING_FORMAT_AUTO).expect(message_assist_thoughts).run(); + + tst.test("Let's call a tool:\n\n\n"). + enable_thinking(true). + reasoning_format(COMMON_REASONING_FORMAT_AUTO). + tools({ empty_args_tool }). + expect(message_with_reasoning_and_tool_call("Let's call a tool:", "empty_args", "{}")). + run(); + tst.test( - "\n\n\n\n1\n\n") .tools({ special_function_tool }) .expect(message_assist_call) .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) .run(); + + // Parallel tool calls (two different tools) inside one wrapper + tst.test( + "\n" + "\n1\n\n" + "\nprint('hey')\n\n" + "") + .tools({ special_function_tool, python_tool }) + .parallel_tool_calls(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_with_reasoning_content_and_multiple_tool_calls( + "", "", + { + { "special_function", R"x({"arg1": 1})x" }, + { "python", R"x({"code": "print('hey')"})x" }, + })) + .run(); + + // String parameter with embedded XML-ish content + tst.test( + "\n\n" + "
\n" + "
\n
") + .tools({ html_tool }) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_with_tool_calls( + "html", R"x({"markup": "
"})x")) + .run(); + + // Multi-line string parameter value + tst.test( + "\n\n" + "import os\nfor k in os.environ:\n print(k)\n" + "\n") + .tools({ python_tool }) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_with_tool_calls( + "python", R"x({"code": "import os\nfor k in os.environ:\n print(k)"})x")) + .run(); + + // Tool with two integer parameters + tst.test( + "\n\n" + "1\n" + "42\n" + "\n") + .tools({ special_function_tool_with_optional_param }) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_with_tool_calls( + "special_function_with_opt", R"x({"arg1": 1, "arg2": 42})x")) + .run(); + + // Parallel calls to the same tool inside one wrapper + tst.test( + "\n" + "\nprint('a')\n\n" + "\nprint('b')\n\n" + "") + .tools({ python_tool }) + .parallel_tool_calls(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_with_reasoning_content_and_multiple_tool_calls( + "", "", + { + { "python", R"x({"code": "print('a')"})x" }, + { "python", R"x({"code": "print('b')"})x" }, + })) + .run(); } // NVIDIA-Nemotron-Nano-v2 tests - ... format @@ -3442,7 +3849,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) { }, "replaceAll": { "type": "boolean", - "description": "Whether to replace all occurences." + "description": "Whether to replace all occurrences." } }, "required": ["oldString", "newString"]