From f32d1409013fb757cf40b185470f6f8cdf2f0b15 Mon Sep 17 00:00:00 2001 From: Josh Ferge Date: Wed, 13 May 2026 17:49:20 -0400 Subject: [PATCH] chore(evals): unblock ToolPredictionScorer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop the stale `--all-scopes` flag the bin no longer accepts; the scorer-side stdio spawn was printing usage and exiting, surfacing as `MCPClientError: Connection closed` across every eval. - Switch the `predictedTools[*].arguments` field to a JSON-encoded string; `z.record(z.any())` emits `additionalProperties` with no `type`, which OpenAI's strict response_format rejects. With both fixes, `autofix.eval.ts` scores 1.00 / 1.00 against the new explorer-mode tool flow. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/evals/utils/toolPredictionScorer.ts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/mcp-server-evals/src/evals/utils/toolPredictionScorer.ts b/packages/mcp-server-evals/src/evals/utils/toolPredictionScorer.ts index dcfaf1bbe..d0e2aaf4e 100644 --- a/packages/mcp-server-evals/src/evals/utils/toolPredictionScorer.ts +++ b/packages/mcp-server-evals/src/evals/utils/toolPredictionScorer.ts @@ -19,12 +19,7 @@ async function getAvailableTools(): Promise { // Use pnpm exec to run the binary from the workspace const transport = new Experimental_StdioMCPTransport({ command: "pnpm", - args: [ - "exec", - "sentry-mcp", - "--access-token=mocked-access-token", - "--all-scopes", - ], + args: ["exec", "sentry-mcp", "--access-token=mocked-access-token"], env: { ...process.env, SENTRY_ACCESS_TOKEN: "mocked-access-token", @@ -73,7 +68,12 @@ const predictionSchema = z.object({ .array( z.object({ name: z.string(), - arguments: z.record(z.any()).optional().default({}), + // Serialize arguments as a JSON string so the schema stays compatible + // with OpenAI's strict response_format (z.record(z.any()) emits + // `additionalProperties` without a `type`, which the API rejects). + argumentsJson: z + .string() + .describe("JSON-encoded tool arguments, e.g. '{}' for none."), }), ) .describe("What tools the AI would likely call"),