From 382dc24d178e90064c52eb292fff702b1b234b34 Mon Sep 17 00:00:00 2001 From: Kris Kersey Date: Sun, 14 Jun 2026 15:10:43 +0000 Subject: [PATCH] fix(mcp): prefer definitions and report ambiguity in name resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit trace_path resolved a function_name from the first row of an unordered name query with no ambiguity check, so a same-named entity (e.g. a shell script's main()) could silently shadow the intended C main(). get_code_snippet reported "ambiguous" for a short name even when one match was the obvious definition (the .c body vs a .h declaration). Fix: add a deterministic resolution ranking — a callable label outranks a module, then the larger definition by line span wins, preferring a real definition without hardcoding file extensions — and a picker that flags a genuine tie. trace_path now traces the preferred node and returns the existing ambiguous-suggestions response on a true tie instead of silently taking nodes[0]; get_code_snippet resolves directly to the preferred match, reporting ambiguity only for real ties. Adds regression tests tool_trace_call_path_ambiguous and tool_trace_call_path_prefers_definition. Signed-off-by: Kris Kersey --- src/mcp/mcp.c | 95 +++++++++++++++++++++++++++++++++++++++++++++- tests/test_mcp.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 192 insertions(+), 2 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index d3aa3bf3..a99f52b3 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -2202,6 +2202,66 @@ static yyjson_mut_val *bfs_to_json_array(yyjson_mut_doc *doc, cbm_traverse_resul return arr; } +static char *snippet_suggestions(const char *input, cbm_node_t *nodes, int count); + +/* Rank a candidate for name resolution. The label tier (callable > class-like > + * module/file) is the primary key; WITHIN a tier the larger definition by line + * span wins. In practice the .c-over-.h and C-main-over-shell-main preferences + * come primarily from span (the real definition has the larger body), since the + * competing matches usually share a tier — no file extension is hardcoded. + * Consequence: two same-tier candidates with equal span tie and are reported + * ambiguous (see pick_resolved_node) rather than guessed. */ +enum { + RES_RANK_CALLABLE = 2, /* Function / Method */ + RES_RANK_OTHER = 1, /* Class / Struct / etc. */ + RES_RANK_MODULE = 0, /* Module / File */ + RES_LABEL_WEIGHT = 1000000 /* label tier dominates span */ +}; +static long node_resolution_score(const cbm_node_t *n) { + long label_rank = RES_RANK_MODULE; + if (n->label) { + if (strcmp(n->label, "Function") == 0 || strcmp(n->label, "Method") == 0) { + label_rank = RES_RANK_CALLABLE; + } else if (strcmp(n->label, "Module") != 0 && strcmp(n->label, "File") != 0) { + label_rank = RES_RANK_OTHER; + } + } + long span = (long)n->end_line - (long)n->start_line; + if (span < 0) { + span = 0; + } + return label_rank * (long)RES_LABEL_WEIGHT + span; +} + +/* Pick the best-resolving node among name matches. Sets *ambiguous when the top + * score is shared by more than one candidate (a genuine tie the caller must + * disambiguate) so resolution never silently traces the wrong same-named node. */ +static int pick_resolved_node(const cbm_node_t *nodes, int count, bool *ambiguous) { + *ambiguous = false; + if (count <= 1) { + return 0; + } + int best = 0; + long best_score = node_resolution_score(&nodes[0]); + for (int i = 1; i < count; i++) { + long s = node_resolution_score(&nodes[i]); + if (s > best_score) { + best_score = s; + best = i; + } + } + int top_count = 0; + for (int i = 0; i < count; i++) { + if (node_resolution_score(&nodes[i]) == best_score) { + top_count++; + } + } + if (top_count > 1) { + *ambiguous = true; + } + return best; +} + static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { char *func_name = cbm_mcp_get_string_arg(args, "function_name"); char *project = cbm_mcp_get_string_arg(args, "project"); @@ -2286,6 +2346,22 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { return cbm_mcp_text_result(hint, true); } + /* Disambiguate same-named matches: prefer the real definition, and report + * ambiguity (rather than silently tracing nodes[0]) on a genuine tie — e.g. + * a C main() vs a same-named shell-script main(). */ + bool trace_ambiguous = false; + int sel = pick_resolved_node(nodes, node_count, &trace_ambiguous); + if (trace_ambiguous) { + char *result = snippet_suggestions(func_name, nodes, node_count); + free(func_name); + free(project); + free(direction); + free(mode); + free(param_name); + cbm_store_free_nodes(nodes, node_count); + return result; + } + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); yyjson_mut_val *root = yyjson_mut_obj(doc); yyjson_mut_doc_set_root(doc, root); @@ -2311,14 +2387,14 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { cbm_traverse_result_t tr_in = {0}; if (do_outbound) { - cbm_store_bfs(store, nodes[0].id, "outbound", edge_types, edge_type_count, depth, + cbm_store_bfs(store, nodes[sel].id, "outbound", edge_types, edge_type_count, depth, MCP_BFS_LIMIT, &tr_out); yyjson_mut_obj_add_val(doc, root, "callees", bfs_to_json_array(doc, &tr_out, risk_labels, include_tests)); } if (do_inbound) { - cbm_store_bfs(store, nodes[0].id, "inbound", edge_types, edge_type_count, depth, + cbm_store_bfs(store, nodes[sel].id, "inbound", edge_types, edge_type_count, depth, MCP_BFS_LIMIT, &tr_in); yyjson_mut_obj_add_val(doc, root, "callers", bfs_to_json_array(doc, &tr_in, risk_labels, include_tests)); @@ -2960,6 +3036,21 @@ static char *handle_get_code_snippet(cbm_mcp_server_t *srv, const char *args) { } if (suffix_count > SKIP_ONE) { + /* Prefer the real definition (a .c body over a .h declaration, a Function + * over a Module) so an unambiguous-by-preference match resolves directly + * instead of forcing a disambiguation round trip; only a genuine tie still + * returns suggestions. */ + bool snip_ambiguous = false; + int ssel = pick_resolved_node(suffix_nodes, suffix_count, &snip_ambiguous); + if (!snip_ambiguous) { + copy_node(&suffix_nodes[ssel], &node); + cbm_store_free_nodes(suffix_nodes, suffix_count); + char *result = build_snippet_response(srv, &node, "suffix", include_neighbors, NULL, 0); + free_node_contents(&node); + free(qn); + free(project); + return result; + } char *result = snippet_suggestions(qn, suffix_nodes, suffix_count); cbm_store_free_nodes(suffix_nodes, suffix_count); free(qn); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 61c71b89..d8c5fabb 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -532,6 +532,103 @@ TEST(tool_trace_missing_function_name) { PASS(); } +/* Regression: two same-named definitions with equal rank must be reported + * ambiguous, not silently traced (trace_path previously took nodes[0]). */ +TEST(tool_trace_call_path_ambiguous) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + cbm_store_t *st = cbm_mcp_server_store(srv); + const char *proj = "amb-proj"; + cbm_mcp_server_set_project(srv, proj); + cbm_store_upsert_project(st, proj, "/tmp/amb"); + cbm_node_t a = {.project = proj, + .label = "Function", + .name = "amb", + .qualified_name = "amb-proj.a.amb", + .file_path = "a.c", + .start_line = 10, + .end_line = 20}; + cbm_node_t b = {.project = proj, + .label = "Function", + .name = "amb", + .qualified_name = "amb-proj.b.amb", + .file_path = "b.c", + .start_line = 10, + .end_line = 20}; /* equal span -> genuine tie */ + ASSERT_GT(cbm_store_upsert_node(st, &a), 0); + ASSERT_GT(cbm_store_upsert_node(st, &b), 0); + + char *resp = cbm_mcp_server_handle( + srv, "{\"jsonrpc\":\"2.0\",\"id\":61,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"trace_call_path\"," + "\"arguments\":{\"function_name\":\"amb\",\"project\":\"amb-proj\"}}}"); + ASSERT_NOT_NULL(resp); + char *inner = extract_text_content(resp); + ASSERT_NOT_NULL(inner); + ASSERT_NOT_NULL(strstr(inner, "ambiguous")); + ASSERT_NOT_NULL(strstr(inner, "suggestions")); + ASSERT_NULL(strstr(inner, "\"callees\"")); + free(inner); + free(resp); + cbm_mcp_server_free(srv); + PASS(); +} + +/* Regression: when same-named nodes differ in rank, trace must pick the real + * definition (callable, larger body) — NOT nodes[0]. The Module is inserted + * first; if trace took nodes[0] the outbound trace would be empty. */ +TEST(tool_trace_call_path_prefers_definition) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + cbm_store_t *st = cbm_mcp_server_store(srv); + const char *proj = "pref-proj"; + cbm_mcp_server_set_project(srv, proj); + cbm_store_upsert_project(st, proj, "/tmp/pref"); + /* nodes[0]: the WRONG match (a Module, tiny span), inserted first. */ + cbm_node_t wrong = {.project = proj, + .label = "Module", + .name = "dup", + .qualified_name = "pref-proj.dup", + .file_path = "dup.x", + .start_line = 1, + .end_line = 1}; + /* the real definition: a Function with a body. */ + cbm_node_t def = {.project = proj, + .label = "Function", + .name = "dup", + .qualified_name = "pref-proj.src.dup", + .file_path = "src/dup.c", + .start_line = 10, + .end_line = 50}; + cbm_node_t callee = {.project = proj, + .label = "Function", + .name = "callee", + .qualified_name = "pref-proj.src.callee", + .file_path = "src/dup.c", + .start_line = 60, + .end_line = 70}; + ASSERT_GT(cbm_store_upsert_node(st, &wrong), 0); + int64_t id_def = cbm_store_upsert_node(st, &def); + int64_t id_callee = cbm_store_upsert_node(st, &callee); + ASSERT_GT(id_def, 0); + ASSERT_GT(id_callee, 0); + cbm_edge_t e = {.project = proj, .source_id = id_def, .target_id = id_callee, .type = "CALLS"}; + cbm_store_insert_edge(st, &e); + + char *resp = cbm_mcp_server_handle( + srv, "{\"jsonrpc\":\"2.0\",\"id\":62,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"trace_call_path\",\"arguments\":{\"function_name\":\"dup\"," + "\"project\":\"pref-proj\",\"direction\":\"outbound\"}}}"); + ASSERT_NOT_NULL(resp); + char *inner = extract_text_content(resp); + ASSERT_NOT_NULL(inner); + ASSERT_NULL(strstr(inner, "ambiguous")); + /* picked the Function definition -> its outbound CALLS edge to "callee" shows */ + ASSERT_NOT_NULL(strstr(inner, "callee")); + free(inner); + free(resp); + cbm_mcp_server_free(srv); + PASS(); +} + TEST(tool_delete_project_not_found) { cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); @@ -2044,6 +2141,8 @@ SUITE(mcp) { /* Tool handlers with validation */ RUN_TEST(tool_trace_call_path_not_found); RUN_TEST(tool_trace_missing_function_name); + RUN_TEST(tool_trace_call_path_ambiguous); + RUN_TEST(tool_trace_call_path_prefers_definition); RUN_TEST(tool_delete_project_not_found); RUN_TEST(tool_get_architecture_empty); RUN_TEST(tool_get_architecture_emits_populated_sections);