diff --git a/packages/mcp-server-evals/src/evals/utils/toolPredictionScorer.ts b/packages/mcp-server-evals/src/evals/utils/toolPredictionScorer.ts index dcfaf1bbe..d0e2aaf4e 100644 --- a/packages/mcp-server-evals/src/evals/utils/toolPredictionScorer.ts +++ b/packages/mcp-server-evals/src/evals/utils/toolPredictionScorer.ts @@ -19,12 +19,7 @@ async function getAvailableTools(): Promise { // Use pnpm exec to run the binary from the workspace const transport = new Experimental_StdioMCPTransport({ command: "pnpm", - args: [ - "exec", - "sentry-mcp", - "--access-token=mocked-access-token", - "--all-scopes", - ], + args: ["exec", "sentry-mcp", "--access-token=mocked-access-token"], env: { ...process.env, SENTRY_ACCESS_TOKEN: "mocked-access-token", @@ -73,7 +68,12 @@ const predictionSchema = z.object({ .array( z.object({ name: z.string(), - arguments: z.record(z.any()).optional().default({}), + // Serialize arguments as a JSON string so the schema stays compatible + // with OpenAI's strict response_format (z.record(z.any()) emits + // `additionalProperties` without a `type`, which the API rejects). + argumentsJson: z + .string() + .describe("JSON-encoded tool arguments, e.g. '{}' for none."), }), ) .describe("What tools the AI would likely call"),