diff --git a/packages/logic-grid-ai/README.md b/packages/logic-grid-ai/README.md index e9fe67c..457ec22 100644 --- a/packages/logic-grid-ai/README.md +++ b/packages/logic-grid-ai/README.md @@ -67,14 +67,34 @@ interface ThemeResult { At least one category must have `ordered: true` with `orderingPhrases.comparators` defining all 7 comparator phrases. The result is validated against structural and semantic rules (value uniqueness, noun consistency, category count, ordered category presence, etc.). If validation fails, the AI is retried with error feedback up to 3 times. -### `createAnthropicClient(apiKey?)` +If all retries fail, `generateTheme` throws a `ThemeGenerationError`. The error carries an `errors` array of structured `ThemeValidationError` objects (each with a stable `code` like `"no_ordered_category"` or `"duplicate_value"` and a human-readable `message`), so callers can branch on the failure mode: -Create the default AI client backed by the Anthropic SDK. If no key is provided, reads from `ANTHROPIC_API_KEY`. +```typescript +import { generateTheme, ThemeGenerationError } from "logic-grid-ai"; + +try { + const theme = await generateTheme({ theme: "...", size: 4, categories: 4 }); +} catch (err) { + if (err instanceof ThemeGenerationError) { + if (err.errors.some((e) => e.code === "no_ordered_category")) { + // Show a hint about ordered categories + } + } + throw err; +} +``` + +> Transport-level retries (429s, 5xx, network errors) are already handled inside the Anthropic SDK with exponential backoff — they don't consume one of the 3 semantic-retry attempts. + +### `createAnthropicClient(apiKey?, options?)` + +Create the default AI client backed by the Anthropic SDK. If no key is provided, reads from `ANTHROPIC_API_KEY`. Pass `{ model }` to override the default model (`claude-sonnet-4-6`): ```typescript import { createAnthropicClient } from "logic-grid-ai"; -const client = createAnthropicClient("sk-ant-..."); +const fast = createAnthropicClient(undefined, { model: "claude-haiku-4-5" }); +const explicit = createAnthropicClient("sk-ant-..."); ``` ### Custom AI Client @@ -100,14 +120,14 @@ const theme = await generateTheme({ ### `validateThemeResult(result, size, categories)` -Validate AI output against structural and semantic rules. Returns an array of error messages (empty = valid). Used internally by `generateTheme`, but exported for custom pipelines. +Validate AI output against structural and semantic rules. Returns `ThemeValidationError[]` (empty = valid). Each error has a stable `code`, a human-readable `message`, and an optional `category` field naming the offending category. Used internally by `generateTheme`, but exported for custom pipelines. ```typescript import { validateThemeResult } from "logic-grid-ai"; const errors = validateThemeResult(result, 4, 4); if (errors.length > 0) { - console.error("Invalid theme:", errors); + for (const e of errors) console.error(`[${e.code}] ${e.message}`); } ``` @@ -126,11 +146,11 @@ const rewritten = await rewriteClues({ // Returns Clue[] with the same constraints and replaced text fields. ``` -All clues are rewritten in a single batched AI call. Each clue is sent alongside its constraint JSON so the AI has ground-truth semantics. Output is validated against duplicate / empty / overlong text rules; retries up to 3 times before throwing. +All clues are rewritten in a single batched AI call. Each clue is sent alongside its constraint JSON so the AI has ground-truth semantics. Output is validated against duplicate / empty / overlong text rules; retries up to 3 times before throwing a `RewriteCluesError` (parallel to `ThemeGenerationError` — carries `errors: RewriteCluesValidationError[]` with codes like `"empty_clue"`, `"long_clue"`, `"duplicate_clue"`). ### `validateRewrittenClues(result, expectedCount)` -Validate raw AI output for `rewriteClues`. Returns an array of error messages (empty = valid). +Validate raw AI output for `rewriteClues`. Returns `RewriteCluesValidationError[]` (empty = valid). Each error has a `code`, a `message`, and an optional 1-indexed `clueIndex`. ```typescript import { validateRewrittenClues } from "logic-grid-ai"; diff --git a/packages/logic-grid-ai/src/client.test.ts b/packages/logic-grid-ai/src/client.test.ts index a11d13c..8671b1d 100644 --- a/packages/logic-grid-ai/src/client.test.ts +++ b/packages/logic-grid-ai/src/client.test.ts @@ -68,4 +68,19 @@ describe("createAnthropicClient", () => { expect(mockCreate).toHaveBeenCalled(); }); + + it("uses overridden model when passed via options", async () => { + mockCreate.mockResolvedValueOnce({ + content: [{ type: "tool_use", id: "call_3", name: "respond", input: {} }], + }); + + const client = createAnthropicClient(undefined, { + model: "claude-haiku-4-5", + }); + await client.completeJSON("test", { type: "object" }); + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ model: "claude-haiku-4-5" }), + ); + }); }); diff --git a/packages/logic-grid-ai/src/client.ts b/packages/logic-grid-ai/src/client.ts index f743867..1e7d4cb 100644 --- a/packages/logic-grid-ai/src/client.ts +++ b/packages/logic-grid-ai/src/client.ts @@ -1,19 +1,38 @@ import Anthropic from "@anthropic-ai/sdk"; import type { AIClient, JSONSchema } from "./types"; +/** Default model used when no `model` option is provided. */ +export const DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-6"; + +/** Optional knobs for the default Anthropic-backed client. */ +export interface AnthropicClientOptions { + /** Override the model. Defaults to {@link DEFAULT_ANTHROPIC_MODEL}. */ + model?: string; +} + /** * Create an AIClient backed by the Anthropic SDK. * - * Uses Claude's tool_use feature for structured JSON output. - * If no apiKey is provided, the SDK reads from ANTHROPIC_API_KEY. + * Uses Claude's tool_use feature for structured JSON output. The Anthropic SDK + * already retries transport-level errors (429s, 5xx, network) with exponential + * backoff internally — `generateTheme`'s and `rewriteClues`' own retries only + * cover semantic validation failures. + * + * If no apiKey is provided, the SDK reads from `ANTHROPIC_API_KEY`. Pass a + * `model` option to swap the underlying Claude model (e.g. `claude-haiku-4-5` + * for cheaper/faster generation). */ -export function createAnthropicClient(apiKey?: string): AIClient { +export function createAnthropicClient( + apiKey?: string, + options: AnthropicClientOptions = {}, +): AIClient { const client = new Anthropic({ apiKey }); + const model = options.model ?? DEFAULT_ANTHROPIC_MODEL; return { async completeJSON(prompt: string, schema: JSONSchema): Promise { const response = await client.messages.create({ - model: "claude-sonnet-4-6", + model, max_tokens: 4096, temperature: 0.8, messages: [{ role: "user", content: prompt }], diff --git a/packages/logic-grid-ai/src/clue-validation.test.ts b/packages/logic-grid-ai/src/clue-validation.test.ts index c6e9d67..6cee378 100644 --- a/packages/logic-grid-ai/src/clue-validation.test.ts +++ b/packages/logic-grid-ai/src/clue-validation.test.ts @@ -1,5 +1,6 @@ import { describe, it, expect } from "vitest"; import { validateRewrittenClues } from "./clue-validation"; +import { hasCode } from "./test-utils"; import type { RewriteCluesResult } from "./types"; function validResult(count: number = 3): RewriteCluesResult { @@ -18,48 +19,47 @@ describe("validateRewrittenClues", () => { it("rejects wrong clue count (too few)", () => { const errors = validateRewrittenClues(validResult(2), 3); - expect(errors).toContainEqual( - expect.stringContaining("Expected 3 clues, got 2"), - ); + expect(hasCode(errors, "wrong_clue_count")).toBe(true); + expect( + errors.find((e) => e.code === "wrong_clue_count")?.message, + ).toContain("Expected 3 clues, got 2"); }); it("rejects wrong clue count (too many)", () => { const errors = validateRewrittenClues(validResult(4), 3); - expect(errors).toContainEqual( - expect.stringContaining("Expected 3 clues, got 4"), - ); + expect(hasCode(errors, "wrong_clue_count")).toBe(true); }); it("rejects empty clue text", () => { const r = validResult(); r.clues[1] = ""; const errors = validateRewrittenClues(r, 3); - expect(errors).toContainEqual(expect.stringContaining("Clue 2 is empty")); + expect(hasCode(errors, "empty_clue")).toBe(true); + expect(errors.find((e) => e.code === "empty_clue")?.clueIndex).toBe(2); }); it("rejects whitespace-only clue text", () => { const r = validResult(); r.clues[0] = " "; const errors = validateRewrittenClues(r, 3); - expect(errors).toContainEqual(expect.stringContaining("Clue 1 is empty")); + expect(hasCode(errors, "empty_clue")).toBe(true); + expect(errors.find((e) => e.code === "empty_clue")?.clueIndex).toBe(1); }); it("rejects clue exceeding max length", () => { const r = validResult(); r.clues[2] = "A".repeat(501); const errors = validateRewrittenClues(r, 3); - expect(errors).toContainEqual( - expect.stringContaining("Clue 3 is too long (501 chars, max 500)"), - ); + expect(hasCode(errors, "long_clue")).toBe(true); + expect(errors.find((e) => e.code === "long_clue")?.clueIndex).toBe(3); }); it("rejects duplicate rewritten clues (case-insensitive)", () => { const r = validResult(); r.clues[2] = r.clues[0].toLowerCase(); const errors = validateRewrittenClues(r, 3); - expect(errors).toContainEqual( - expect.stringContaining("Clue 3 is a duplicate"), - ); + expect(hasCode(errors, "duplicate_clue")).toBe(true); + expect(errors.find((e) => e.code === "duplicate_clue")?.clueIndex).toBe(3); }); it("reports multiple errors at once", () => { @@ -73,12 +73,18 @@ describe("validateRewrittenClues", () => { it("rejects non-string clue item", () => { const r = { clues: ["Valid clue.", 42 as unknown as string, "Another."] }; const errors = validateRewrittenClues(r, 3); - expect(errors).toContainEqual( - expect.stringContaining("Clue 2 is not a string"), - ); + expect(hasCode(errors, "non_string_clue")).toBe(true); + expect(errors.find((e) => e.code === "non_string_clue")?.clueIndex).toBe(2); }); it("accepts single clue", () => { expect(validateRewrittenClues(validResult(1), 1)).toEqual([]); }); + + it("omits clueIndex on count-level errors", () => { + const errors = validateRewrittenClues(validResult(2), 3); + const e = errors.find((x) => x.code === "wrong_clue_count"); + expect(e).toBeDefined(); + expect("clueIndex" in (e as object)).toBe(false); + }); }); diff --git a/packages/logic-grid-ai/src/clue-validation.ts b/packages/logic-grid-ai/src/clue-validation.ts index 0f46978..75350c0 100644 --- a/packages/logic-grid-ai/src/clue-validation.ts +++ b/packages/logic-grid-ai/src/clue-validation.ts @@ -1,43 +1,76 @@ -import type { RewriteCluesResult } from "./types"; +import type { + RewriteCluesResult, + RewriteCluesValidationCode, + RewriteCluesValidationError, +} from "./types"; + +function err( + code: RewriteCluesValidationCode, + message: string, + clueIndex?: number, +): RewriteCluesValidationError { + return clueIndex !== undefined + ? { code, message, clueIndex } + : { code, message }; +} /** * Validate AI-generated rewritten clues against structural rules. * - * Returns an array of error messages. Empty array means the result is valid. - * Used internally by rewriteClues to decide whether to retry. + * Returns an array of structured errors. Empty array means the result is valid. + * Each error has a stable `code` (machine-readable) and `message` (human-readable); + * `clueIndex` is the 1-indexed position when the error is scoped to a single clue. */ export function validateRewrittenClues( result: RewriteCluesResult, expectedCount: number, -): string[] { - const errors: string[] = []; +): RewriteCluesValidationError[] { + const errors: RewriteCluesValidationError[] = []; if (result.clues.length !== expectedCount) { - errors.push(`Expected ${expectedCount} clues, got ${result.clues.length}.`); + errors.push( + err( + "wrong_clue_count", + `Expected ${expectedCount} clues, got ${result.clues.length}.`, + ), + ); } const seen = new Set(); for (let i = 0; i < result.clues.length; i++) { const text = result.clues[i]; + const pos = i + 1; if (typeof text !== "string") { - errors.push(`Clue ${i + 1} is not a string.`); + errors.push(err("non_string_clue", `Clue ${pos} is not a string.`, pos)); continue; } if (!text || text.trim() === "") { - errors.push(`Clue ${i + 1} is empty.`); + errors.push(err("empty_clue", `Clue ${pos} is empty.`, pos)); continue; } if (text.length > 500) { - errors.push(`Clue ${i + 1} is too long (${text.length} chars, max 500).`); + errors.push( + err( + "long_clue", + `Clue ${pos} is too long (${text.length} chars, max 500).`, + pos, + ), + ); } const lower = text.toLowerCase(); if (seen.has(lower)) { - errors.push(`Clue ${i + 1} is a duplicate of an earlier clue.`); + errors.push( + err( + "duplicate_clue", + `Clue ${pos} is a duplicate of an earlier clue.`, + pos, + ), + ); } seen.add(lower); } diff --git a/packages/logic-grid-ai/src/index.ts b/packages/logic-grid-ai/src/index.ts index 4548292..3e507f1 100644 --- a/packages/logic-grid-ai/src/index.ts +++ b/packages/logic-grid-ai/src/index.ts @@ -1,6 +1,10 @@ -export { generateTheme } from "./theme"; -export { rewriteClues } from "./rewrite"; -export { createAnthropicClient } from "./client"; +export { generateTheme, ThemeGenerationError } from "./theme"; +export { rewriteClues, RewriteCluesError } from "./rewrite"; +export { + createAnthropicClient, + DEFAULT_ANTHROPIC_MODEL, + type AnthropicClientOptions, +} from "./client"; export { validateThemeResult } from "./validation"; export { validateRewrittenClues } from "./clue-validation"; export type { @@ -10,4 +14,8 @@ export type { RewriteCluesResult, AIClient, JSONSchema, + ThemeValidationCode, + ThemeValidationError, + RewriteCluesValidationCode, + RewriteCluesValidationError, } from "./types"; diff --git a/packages/logic-grid-ai/src/rewrite.test.ts b/packages/logic-grid-ai/src/rewrite.test.ts index d28362c..3929734 100644 --- a/packages/logic-grid-ai/src/rewrite.test.ts +++ b/packages/logic-grid-ai/src/rewrite.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect, vi } from "vitest"; import { generate, deduce } from "logic-grid"; -import { rewriteClues } from "./rewrite"; +import { rewriteClues, RewriteCluesError } from "./rewrite"; import type { AIClient, RewriteCluesResult } from "./types"; import type { Clue } from "logic-grid"; import * as clientModule from "./client"; @@ -123,17 +123,25 @@ describe("rewriteClues", () => { expect(result[0].text).toBe(VALID_RESULT.clues[0]); }); - it("throws after max retries", async () => { + it("throws RewriteCluesError with structured errors after max retries", async () => { const badResult: RewriteCluesResult = { clues: ["Only one clue."], }; - await expect( - rewriteClues({ + let caught: unknown; + try { + await rewriteClues({ clues: SAMPLE_CLUES, client: mockClient(badResult), - }), - ).rejects.toThrow("Clue rewriting failed after 3 attempts"); + }); + } catch (e) { + caught = e; + } + + expect(caught).toBeInstanceOf(RewriteCluesError); + const err = caught as RewriteCluesError; + expect(err.message).toContain("Clue rewriting failed after 3 attempts"); + expect(err.errors.some((e) => e.code === "wrong_clue_count")).toBe(true); }); it("propagates client errors", async () => { diff --git a/packages/logic-grid-ai/src/rewrite.ts b/packages/logic-grid-ai/src/rewrite.ts index fa47640..f4afcf8 100644 --- a/packages/logic-grid-ai/src/rewrite.ts +++ b/packages/logic-grid-ai/src/rewrite.ts @@ -3,6 +3,7 @@ import type { RewriteCluesResult, AIClient, JSONSchema, + RewriteCluesValidationError, } from "./types"; import type { Clue } from "logic-grid"; import { createAnthropicClient } from "./client"; @@ -10,6 +11,20 @@ import { validateRewrittenClues } from "./clue-validation"; const MAX_RETRIES = 3; +/** + * Thrown by {@link rewriteClues} when AI output fails validation on every retry. + * `errors` contains the structured validation errors from the final attempt. + */ +export class RewriteCluesError extends Error { + readonly errors: RewriteCluesValidationError[]; + + constructor(message: string, errors: RewriteCluesValidationError[]) { + super(message); + this.name = "RewriteCluesError"; + this.errors = errors; + } +} + function buildSchema(clueCount: number): JSONSchema { return { type: "object", @@ -66,7 +81,13 @@ function buildPrompt( * Sends all clues in a single batched AI call. Each clue is accompanied * by its constraint JSON so the AI has ground truth semantics. * - * @throws {Error} If rewriting fails after all retry attempts. + * Note: this retries on *semantic* failures (the AI returned invalid output). + * Transport-level retries (429s, 5xx, network errors) are already handled + * inside the Anthropic SDK with built-in exponential backoff — they do not + * consume one of our 3 attempts. + * + * @throws {RewriteCluesError} If rewriting fails after all retry attempts. + * Inspect `error.errors` for the structured validation failures. */ export async function rewriteClues( options: RewriteCluesOptions, @@ -78,10 +99,13 @@ export async function rewriteClues( const client: AIClient = options.client ?? createAnthropicClient(); const schema = buildSchema(clues.length); - let lastErrors: string[] | undefined; + let lastErrors: RewriteCluesValidationError[] | undefined; for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { - const prompt = buildPrompt(options, lastErrors); + const prompt = buildPrompt( + options, + lastErrors?.map((e) => e.message), + ); const result = await client.completeJSON( prompt, schema, @@ -98,7 +122,10 @@ export async function rewriteClues( lastErrors = errors; } - throw new Error( - `Clue rewriting failed after ${MAX_RETRIES} attempts. Last errors:\n${lastErrors!.join("\n")}`, + throw new RewriteCluesError( + `Clue rewriting failed after ${MAX_RETRIES} attempts. Last errors:\n${lastErrors! + .map((e) => e.message) + .join("\n")}`, + lastErrors!, ); } diff --git a/packages/logic-grid-ai/src/test-utils.ts b/packages/logic-grid-ai/src/test-utils.ts new file mode 100644 index 0000000..c57e5b1 --- /dev/null +++ b/packages/logic-grid-ai/src/test-utils.ts @@ -0,0 +1,10 @@ +/** + * Test-only helper. Returns true if any error in the array has the given code. + * Generic over the code union so passing a foreign code fails type-check. + */ +export function hasCode( + errors: { code: C }[], + code: C, +): boolean { + return errors.some((e) => e.code === code); +} diff --git a/packages/logic-grid-ai/src/theme.test.ts b/packages/logic-grid-ai/src/theme.test.ts index 14aa9e1..0998bb7 100644 --- a/packages/logic-grid-ai/src/theme.test.ts +++ b/packages/logic-grid-ai/src/theme.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect, vi } from "vitest"; import { generate } from "logic-grid"; -import { generateTheme } from "./theme"; +import { generateTheme, ThemeGenerationError } from "./theme"; import type { AIClient, ThemeResult } from "./types"; import * as clientModule from "./client"; @@ -132,8 +132,8 @@ describe("generateTheme", () => { expect(result.categories).toHaveLength(4); }); - it("throws after max retries", async () => { - // No ordered category → validation fails + it("throws ThemeGenerationError with structured errors after max retries", async () => { + // No ordered category → validation fails with code "no_ordered_category" const badResult: ThemeResult = { categories: VALID_THEME.categories.map((c) => ({ name: c.name, @@ -143,14 +143,22 @@ describe("generateTheme", () => { })), }; - await expect( - generateTheme({ + let caught: unknown; + try { + await generateTheme({ theme: "pirate adventure", size: 4, categories: 4, client: mockClient(badResult), - }), - ).rejects.toThrow("Theme generation failed after 3 attempts"); + }); + } catch (e) { + caught = e; + } + + expect(caught).toBeInstanceOf(ThemeGenerationError); + const err = caught as ThemeGenerationError; + expect(err.message).toContain("Theme generation failed after 3 attempts"); + expect(err.errors.some((e) => e.code === "no_ordered_category")).toBe(true); }); it("throws on invalid size", async () => { diff --git a/packages/logic-grid-ai/src/theme.ts b/packages/logic-grid-ai/src/theme.ts index 4624e77..4397fcf 100644 --- a/packages/logic-grid-ai/src/theme.ts +++ b/packages/logic-grid-ai/src/theme.ts @@ -1,9 +1,30 @@ -import type { ThemeOptions, ThemeResult, AIClient, JSONSchema } from "./types"; +import type { + ThemeOptions, + ThemeResult, + AIClient, + JSONSchema, + ThemeValidationError, +} from "./types"; import { createAnthropicClient } from "./client"; import { validateThemeResult } from "./validation"; const MAX_RETRIES = 3; +/** + * Thrown by {@link generateTheme} when AI output fails validation on every retry. + * `errors` contains the structured validation errors from the final attempt + * so callers can branch on the failure mode (e.g. show a specific UI hint). + */ +export class ThemeGenerationError extends Error { + readonly errors: ThemeValidationError[]; + + constructor(message: string, errors: ThemeValidationError[]) { + super(message); + this.name = "ThemeGenerationError"; + this.errors = errors; + } +} + function buildSchema(size: number, categories: number): JSONSchema { const categorySchema: JSONSchema = { type: "object", @@ -228,12 +249,19 @@ Generate themed categories for: "${theme}" /** * Generate themed categories for a logic grid puzzle using AI. * - * Calls the AI client to produce categories with ordering phrases - * that fit the given theme. Validates the result and retries up to 3 times - * if the AI output fails validation, feeding errors back into the prompt. + * Calls the AI client to produce categories with ordering phrases that fit + * the given theme. Validates the result and retries up to 3 times if the AI + * output fails validation, feeding the previous errors back into the prompt + * so the model can self-correct. + * + * Note: this retries on *semantic* failures (the AI returned invalid JSON + * shape). Transport-level retries (429s, 5xx, network errors) are already + * handled inside the Anthropic SDK with built-in exponential backoff — they + * do not consume one of our 3 attempts. * * @throws {RangeError} If size or categories is outside 3-8. - * @throws {Error} If generation fails after all retry attempts. + * @throws {ThemeGenerationError} If generation fails after all retry attempts. + * Inspect `error.errors` for the structured validation failures. */ export async function generateTheme( options: ThemeOptions, @@ -250,10 +278,13 @@ export async function generateTheme( const client: AIClient = options.client ?? createAnthropicClient(); const schema = buildSchema(size, categories); - let lastErrors: string[] | undefined; + let lastErrors: ThemeValidationError[] | undefined; for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { - const prompt = buildPrompt(options, lastErrors); + const prompt = buildPrompt( + options, + lastErrors?.map((e) => e.message), + ); const result = await client.completeJSON(prompt, schema); // Normalize: treat undefined noun as "" (person category) @@ -271,7 +302,10 @@ export async function generateTheme( lastErrors = errors; } - throw new Error( - `Theme generation failed after ${MAX_RETRIES} attempts. Last errors:\n${lastErrors!.join("\n")}`, + throw new ThemeGenerationError( + `Theme generation failed after ${MAX_RETRIES} attempts. Last errors:\n${lastErrors! + .map((e) => e.message) + .join("\n")}`, + lastErrors!, ); } diff --git a/packages/logic-grid-ai/src/types.ts b/packages/logic-grid-ai/src/types.ts index 57a6b05..a2f3510 100644 --- a/packages/logic-grid-ai/src/types.ts +++ b/packages/logic-grid-ai/src/types.ts @@ -52,3 +52,58 @@ export interface JSONSchema { enum?: string[]; description?: string; } + +/** + * Structured validation error for AI-generated theme output. + * `code` is stable and machine-readable; `message` is human-readable. + * `category` is the asserted category name when the error is scoped to one, + * regardless of whether that name is itself valid (e.g. for `long_category_name`, + * `category` echoes the over-long name so callers can group errors by it). + */ +export type ThemeValidationCode = + | "wrong_category_count" + | "empty_category_name" + | "long_category_name" + | "duplicate_category_name" + | "wrong_value_count" + | "empty_value" + | "long_value" + | "duplicate_value" + | "positional_word_value" + | "whitespace_noun" + | "duplicate_noun" + | "invalid_verb" + | "empty_verb" + | "missing_verb" + | "invalid_numeric_values" + | "non_ascending_numeric_values" + | "invalid_ordering_phrases" + | "symmetric_comparator_tuple" + | "invalid_value_suffix" + | "invalid_position_adjective" + | "missing_value_suffix" + | "invalid_subject_priority" + | "no_person_category" + | "multiple_person_categories" + | "no_ordered_category"; + +export interface ThemeValidationError { + code: ThemeValidationCode; + message: string; + category?: string; +} + +/** Structured validation error for AI-rewritten clues. */ +export type RewriteCluesValidationCode = + | "wrong_clue_count" + | "non_string_clue" + | "empty_clue" + | "long_clue" + | "duplicate_clue"; + +export interface RewriteCluesValidationError { + code: RewriteCluesValidationCode; + message: string; + /** 1-indexed clue position when the error is scoped to a single clue. */ + clueIndex?: number; +} diff --git a/packages/logic-grid-ai/src/validation.test.ts b/packages/logic-grid-ai/src/validation.test.ts index 13689d0..afc8c34 100644 --- a/packages/logic-grid-ai/src/validation.test.ts +++ b/packages/logic-grid-ai/src/validation.test.ts @@ -1,5 +1,6 @@ import { describe, it, expect } from "vitest"; import { validateThemeResult } from "./validation"; +import { hasCode } from "./test-utils"; function validResult() { return { @@ -46,17 +47,19 @@ describe("validateThemeResult", () => { const r = validResult(); r.categories.pop(); const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("Expected 3 categories, got 2"), - ); + expect(hasCode(errors, "wrong_category_count")).toBe(true); + expect( + errors.find((e) => e.code === "wrong_category_count")?.message, + ).toContain("Expected 3 categories, got 2"); }); it("rejects wrong value count", () => { const r = validResult(); r.categories[1].values.push("Extra"); const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining('Category "Dish" has 4 values'), + expect(hasCode(errors, "wrong_value_count")).toBe(true); + expect(errors.find((e) => e.code === "wrong_value_count")?.category).toBe( + "Dish", ); }); @@ -64,112 +67,126 @@ describe("validateThemeResult", () => { const r = validResult(); r.categories[2].values[0] = "gordon"; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining('Duplicate value "gordon"'), - ); + expect(hasCode(errors, "duplicate_value")).toBe(true); }); it("rejects missing person category", () => { const r = validResult(); r.categories[0].noun = "chef"; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("No person category found"), - ); + expect(hasCode(errors, "no_person_category")).toBe(true); }); it("rejects multiple person categories", () => { const r = validResult(); r.categories[1].noun = ""; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("Multiple person categories"), - ); + expect(hasCode(errors, "multiple_person_categories")).toBe(true); }); it("rejects invalid verb type", () => { const r = validResult(); r.categories[1].verb = ["only one"] as unknown as [string, string]; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual(expect.stringContaining("invalid verb")); + expect(hasCode(errors, "invalid_verb")).toBe(true); }); it("rejects empty category name", () => { const r = validResult(); (r.categories[0] as Record).name = null; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("A category has an empty name"), - ); + expect(hasCode(errors, "empty_category_name")).toBe(true); + }); + + it("rejects whitespace-only category name", () => { + const r = validResult(); + (r.categories[0] as Record).name = " "; + const errors = validateThemeResult(r, 3, 3); + expect(hasCode(errors, "empty_category_name")).toBe(true); }); it("rejects duplicate category names", () => { const r = validResult(); r.categories[2].name = "Dish"; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("Duplicate category name"), + expect(hasCode(errors, "duplicate_category_name")).toBe(true); + }); + + it("does not double-report two empty names as duplicates", () => { + const r = validResult(); + (r.categories[0] as Record).name = ""; + (r.categories[1] as Record).name = ""; + const errors = validateThemeResult(r, 3, 3); + // Both empty names should each trigger empty_category_name; the second + // should NOT also trigger duplicate_category_name (the empty name is the + // problem, not its duplication of another empty name). + expect(errors.filter((e) => e.code === "empty_category_name")).toHaveLength( + 2, ); + expect(hasCode(errors, "duplicate_category_name")).toBe(false); }); it("rejects category with no values", () => { const r = validResult(); delete (r.categories[0] as Record).values; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual(expect.stringContaining("has 0 values")); + expect(hasCode(errors, "wrong_value_count")).toBe(true); }); it("rejects long category name", () => { const r = validResult(); r.categories[0].name = "A".repeat(31); const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual(expect.stringContaining("too long")); + expect(hasCode(errors, "long_category_name")).toBe(true); }); it("rejects long value", () => { const r = validResult(); r.categories[0].values[0] = "X".repeat(31); const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual(expect.stringContaining("too long")); + expect(hasCode(errors, "long_value")).toBe(true); + }); + + it("rejects empty value", () => { + const r = validResult(); + r.categories[0].values[0] = ""; + const errors = validateThemeResult(r, 3, 3); + expect(hasCode(errors, "empty_value")).toBe(true); }); it("rejects empty verb strings", () => { const r = validResult(); r.categories[1].verb = ["", "does not"] as [string, string]; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual(expect.stringContaining("empty verb")); + expect(hasCode(errors, "empty_verb")).toBe(true); }); it("rejects whitespace-only noun", () => { const r = validResult(); r.categories[1].noun = " "; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("whitespace-only noun"), - ); + expect(hasCode(errors, "whitespace_noun")).toBe(true); }); it("rejects duplicate noun", () => { const r = validResult(); r.categories[2].noun = "chef"; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual(expect.stringContaining("Duplicate noun")); + expect(hasCode(errors, "duplicate_noun")).toBe(true); }); it("rejects values that collide with positional words", () => { const r = validResult(); r.categories[0].values[0] = "First"; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("collides with a positional word"), - ); + expect(hasCode(errors, "positional_word_value")).toBe(true); }); it("rejects non-person category without verb", () => { const r = validResult(); delete r.categories[1].verb; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual(expect.stringContaining("requires a verb")); + expect(hasCode(errors, "missing_verb")).toBe(true); }); it("reports multiple errors at once", () => { @@ -194,9 +211,7 @@ describe("validateThemeResult", () => { const r = validResult(); delete (r.categories[1] as Record).ordered; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("No ordered category found"), - ); + expect(hasCode(errors, "no_ordered_category")).toBe(true); }); it("accepts multiple ordered categories", () => { @@ -209,18 +224,27 @@ describe("validateThemeResult", () => { const r = validResult(); (r.categories[1] as Record).numericValues = [1, 2]; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("numericValues must have exactly 3 numbers"), - ); + expect(hasCode(errors, "invalid_numeric_values")).toBe(true); + expect( + errors.find((e) => e.code === "invalid_numeric_values")?.message, + ).toContain("must have exactly 3 numbers"); }); it("rejects non-numeric numericValues", () => { const r = validResult(); (r.categories[1] as Record).numericValues = [1, "two", 3]; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("numericValues must all be numbers"), - ); + expect(hasCode(errors, "invalid_numeric_values")).toBe(true); + expect( + errors.find((e) => e.code === "invalid_numeric_values")?.message, + ).toContain("must all be numbers"); + }); + + it("rejects numericValues that are not an array", () => { + const r = validResult(); + (r.categories[1] as Record).numericValues = "not an array"; + const errors = validateThemeResult(r, 3, 3); + expect(hasCode(errors, "invalid_numeric_values")).toBe(true); }); it("accepts orderingPhrases with unit on ordered category", () => { @@ -235,18 +259,22 @@ describe("validateThemeResult", () => { const r = validResult(); (r.categories[1] as Record).orderingPhrases = null; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("orderingPhrases must be an object"), - ); + expect(hasCode(errors, "invalid_ordering_phrases")).toBe(true); + }); + + it("rejects non-object orderingPhrases", () => { + const r = validResult(); + (r.categories[1] as Record).orderingPhrases = + "not an object"; + const errors = validateThemeResult(r, 3, 3); + expect(hasCode(errors, "invalid_ordering_phrases")).toBe(true); }); it("rejects non-string valueSuffix", () => { const r = validResult(); (r.categories[1] as Record).valueSuffix = 42; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("valueSuffix must be a string"), - ); + expect(hasCode(errors, "invalid_value_suffix")).toBe(true); }); it("rejects positionAdjective without valueSuffix", () => { @@ -256,9 +284,7 @@ describe("validateThemeResult", () => { "is not", ]; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("has positionAdjective but no valueSuffix"), - ); + expect(hasCode(errors, "missing_value_suffix")).toBe(true); }); it("rejects invalid positionAdjective type", () => { @@ -268,29 +294,21 @@ describe("validateThemeResult", () => { "only one", ]; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining( - "positionAdjective must be a [positive, negative]", - ), - ); + expect(hasCode(errors, "invalid_position_adjective")).toBe(true); }); it("rejects non-ascending numericValues", () => { const r = validResult(); (r.categories[1] as Record).numericValues = [3, 1, 2]; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("strictly ascending"), - ); + expect(hasCode(errors, "non_ascending_numeric_values")).toBe(true); }); it("rejects equal adjacent numericValues", () => { const r = validResult(); (r.categories[1] as Record).numericValues = [1, 1, 2]; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("strictly ascending"), - ); + expect(hasCode(errors, "non_ascending_numeric_values")).toBe(true); }); it("rejects symmetric comparator as tuple", () => { @@ -299,9 +317,7 @@ describe("validateThemeResult", () => { comparators: { next_to: ["fwd", "rev"] }, }; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining('comparator "next_to" is symmetric'), - ); + expect(hasCode(errors, "symmetric_comparator_tuple")).toBe(true); }); it("accepts directional comparator as tuple", () => { @@ -316,8 +332,23 @@ describe("validateThemeResult", () => { const r = validResult(); (r.categories[1] as Record).subjectPriority = "high"; const errors = validateThemeResult(r, 3, 3); - expect(errors).toContainEqual( - expect.stringContaining("subjectPriority must be a number"), - ); + expect(hasCode(errors, "invalid_subject_priority")).toBe(true); + }); + + it("attaches the offending category name to scoped errors", () => { + const r = validResult(); + r.categories[1].values.push("Extra"); + const errors = validateThemeResult(r, 3, 3); + const e = errors.find((x) => x.code === "wrong_value_count"); + expect(e?.category).toBe("Dish"); + }); + + it("omits category on errors that aren't scoped to one", () => { + const r = validResult(); + r.categories.pop(); + const errors = validateThemeResult(r, 3, 3); + const e = errors.find((x) => x.code === "wrong_category_count"); + expect(e).toBeDefined(); + expect("category" in (e as object)).toBe(false); }); }); diff --git a/packages/logic-grid-ai/src/validation.ts b/packages/logic-grid-ai/src/validation.ts index 3302845..3340416 100644 --- a/packages/logic-grid-ai/src/validation.ts +++ b/packages/logic-grid-ai/src/validation.ts @@ -1,3 +1,5 @@ +import type { ThemeValidationCode, ThemeValidationError } from "./types"; + /** Loose shape for untrusted AI category output. All fields optional. */ interface RawCategory { name?: string; @@ -26,27 +28,49 @@ const POSITIONAL_WORDS = new Set([ "eighth", ]); +const SYMMETRIC_COMPARATORS = new Set([ + "next_to", + "not_next_to", + "between", + "not_between", + "exact_distance", +]); + +function err( + code: ThemeValidationCode, + message: string, + category?: string, +): ThemeValidationError { + return category !== undefined + ? { code, message, category } + : { code, message }; +} + /** * Validate AI-generated theme output against structural and semantic rules. * - * Returns an array of error messages. Empty array means the result is valid. - * Used internally by generateTheme to decide whether to retry. - */ -/** - * The input shape is intentionally loose (`categories: Record[]`) - * because this function validates untrusted AI JSON output. The caller - * (generateTheme) casts to ThemeResult only after validation passes. + * Returns an array of structured errors. Empty array means the result is valid. + * Each error has a stable `code` (machine-readable) and `message` (human-readable). + * Used internally by generateTheme to decide whether to retry; exported for + * custom pipelines that bring their own client. + * + * The input shape is intentionally loose because this function validates + * untrusted AI JSON output. The caller (generateTheme) casts to ThemeResult + * only after validation passes. */ export function validateThemeResult( result: { categories: readonly unknown[] }, expectedSize: number, expectedCategories: number, -): string[] { - const errors: string[] = []; +): ThemeValidationError[] { + const errors: ThemeValidationError[] = []; if (result.categories.length !== expectedCategories) { errors.push( - `Expected ${expectedCategories} categories, got ${result.categories.length}.`, + err( + "wrong_category_count", + `Expected ${expectedCategories} categories, got ${result.categories.length}.`, + ), ); } @@ -62,15 +86,28 @@ export function validateThemeResult( // Category name checks if (!name || name.trim() === "") { - errors.push("A category has an empty name."); + errors.push(err("empty_category_name", "A category has an empty name.")); } else if (name.length > 30) { errors.push( - `Category name "${name}" is too long (${name.length} chars, max 30).`, + err( + "long_category_name", + `Category name "${name}" is too long (${name.length} chars, max 30).`, + name, + ), ); } const nameLower = name.toLowerCase(); - if (seenNames.has(nameLower)) { - errors.push(`Duplicate category name "${name}".`); + // Skip the duplicate check when the name is empty/missing — the empty name + // itself is the problem; reporting it as a duplicate of another empty name + // is noise. + if (nameLower !== "" && seenNames.has(nameLower)) { + errors.push( + err( + "duplicate_category_name", + `Duplicate category name "${name}".`, + name, + ), + ); } seenNames.add(nameLower); @@ -78,31 +115,49 @@ export function validateThemeResult( const values = cat.values ?? []; if (values.length !== expectedSize) { errors.push( - `Category "${name}" has ${values.length} values, expected ${expectedSize}.`, + err( + "wrong_value_count", + `Category "${name}" has ${values.length} values, expected ${expectedSize}.`, + name, + ), ); } // Value checks for (const val of values) { if (val.trim() === "") { - errors.push(`Category "${name}" has an empty value.`); + errors.push( + err("empty_value", `Category "${name}" has an empty value.`, name), + ); } else if (val.length > 30) { errors.push( - `Category "${name}" value "${val}" is too long (${val.length} chars, max 30).`, + err( + "long_value", + `Category "${name}" value "${val}" is too long (${val.length} chars, max 30).`, + name, + ), ); } const valLower = val.toLowerCase(); const existing = seenValues.get(valLower); if (existing !== undefined) { errors.push( - `Duplicate value "${val}" (also in category with value "${existing}").`, + err( + "duplicate_value", + `Duplicate value "${val}" (also in category with value "${existing}").`, + name, + ), ); } else { seenValues.set(valLower, val); } if (POSITIONAL_WORDS.has(valLower)) { errors.push( - `Category "${name}" value "${val}" collides with a positional word.`, + err( + "positional_word_value", + `Category "${name}" value "${val}" collides with a positional word.`, + name, + ), ); } } @@ -112,12 +167,22 @@ export function validateThemeResult( personCount++; } else if (cat.noun.trim() === "") { errors.push( - `Category "${name}" has a whitespace-only noun. Use a meaningful noun or "" for person categories.`, + err( + "whitespace_noun", + `Category "${name}" has a whitespace-only noun. Use a meaningful noun or "" for person categories.`, + name, + ), ); } else { const nounLower = cat.noun.toLowerCase(); if (seenNouns.has(nounLower)) { - errors.push(`Duplicate noun "${cat.noun}" in category "${name}".`); + errors.push( + err( + "duplicate_noun", + `Duplicate noun "${cat.noun}" in category "${name}".`, + name, + ), + ); } seenNouns.add(nounLower); } @@ -131,15 +196,25 @@ export function validateThemeResult( typeof cat.verb[1] !== "string" ) { errors.push( - `Category "${cat.name}" has invalid verb. Must be [positive, negative] string pair.`, + err( + "invalid_verb", + `Category "${name}" has invalid verb. Must be [positive, negative] string pair.`, + name, + ), ); } else if (cat.verb[0].trim() === "" || cat.verb[1].trim() === "") { - errors.push(`Category "${cat.name}" has empty verb strings.`); + errors.push( + err("empty_verb", `Category "${name}" has empty verb strings.`, name), + ); } } else if (cat.noun !== "" && cat.noun !== undefined) { // Every non-person category must have a verb so same_position renders cleanly. errors.push( - `Category "${cat.name}" requires a verb. Only the person category (noun: "") may omit it.`, + err( + "missing_verb", + `Category "${name}" requires a verb. Only the person category (noun: "") may omit it.`, + name, + ), ); } @@ -155,15 +230,29 @@ export function validateThemeResult( cat.numericValues.length !== expectedSize ) { errors.push( - `Category "${name}" numericValues must have exactly ${expectedSize} numbers.`, + err( + "invalid_numeric_values", + `Category "${name}" numericValues must have exactly ${expectedSize} numbers.`, + name, + ), ); } else if ( cat.numericValues.some((v: unknown) => typeof v !== "number") ) { - errors.push(`Category "${name}" numericValues must all be numbers.`); + errors.push( + err( + "invalid_numeric_values", + `Category "${name}" numericValues must all be numbers.`, + name, + ), + ); } else if (cat.numericValues.some((v, i, a) => i > 0 && v <= a[i - 1])) { errors.push( - `Category "${name}" numericValues must be in strictly ascending order.`, + err( + "non_ascending_numeric_values", + `Category "${name}" numericValues must be in strictly ascending order.`, + name, + ), ); } } @@ -171,25 +260,28 @@ export function validateThemeResult( cat.orderingPhrases !== undefined && (cat.orderingPhrases === null || typeof cat.orderingPhrases !== "object") ) { - errors.push(`Category "${name}" orderingPhrases must be an object.`); + errors.push( + err( + "invalid_ordering_phrases", + `Category "${name}" orderingPhrases must be an object.`, + name, + ), + ); } // Symmetric comparators must be single strings, not [forward, reverse]. // NOTE: duplicated in logic-grid/src/generator.ts (validateGrid). The AI // package can't depend on the core package, so we keep a parallel copy. // Keep both lists in sync. - const symmetric = new Set([ - "next_to", - "not_next_to", - "between", - "not_between", - "exact_distance", - ]); const comps = cat.orderingPhrases?.comparators; if (comps && typeof comps === "object") { for (const [type, value] of Object.entries(comps)) { - if (Array.isArray(value) && symmetric.has(type)) { + if (Array.isArray(value) && SYMMETRIC_COMPARATORS.has(type)) { errors.push( - `Category "${name}" comparator "${type}" is symmetric and must be a single string, not [forward, reverse].`, + err( + "symmetric_comparator_tuple", + `Category "${name}" comparator "${type}" is symmetric and must be a single string, not [forward, reverse].`, + name, + ), ); } } @@ -197,7 +289,13 @@ export function validateThemeResult( // valueSuffix / positionAdjective if (cat.valueSuffix !== undefined && typeof cat.valueSuffix !== "string") { - errors.push(`Category "${name}" valueSuffix must be a string.`); + errors.push( + err( + "invalid_value_suffix", + `Category "${name}" valueSuffix must be a string.`, + name, + ), + ); } if (cat.positionAdjective !== undefined) { if ( @@ -207,12 +305,20 @@ export function validateThemeResult( typeof cat.positionAdjective[1] !== "string" ) { errors.push( - `Category "${name}" positionAdjective must be a [positive, negative] string pair.`, + err( + "invalid_position_adjective", + `Category "${name}" positionAdjective must be a [positive, negative] string pair.`, + name, + ), ); } if (cat.valueSuffix === undefined) { errors.push( - `Category "${name}" has positionAdjective but no valueSuffix. They must be set together.`, + err( + "missing_value_suffix", + `Category "${name}" has positionAdjective but no valueSuffix. They must be set together.`, + name, + ), ); } } @@ -222,23 +328,38 @@ export function validateThemeResult( cat.subjectPriority !== undefined && typeof cat.subjectPriority !== "number" ) { - errors.push(`Category "${name}" subjectPriority must be a number.`); + errors.push( + err( + "invalid_subject_priority", + `Category "${name}" subjectPriority must be a number.`, + name, + ), + ); } } if (personCount === 0) { errors.push( - 'No person category found. Exactly one category must have noun: "".', + err( + "no_person_category", + 'No person category found. Exactly one category must have noun: "".', + ), ); } else if (personCount > 1) { errors.push( - 'Multiple person categories found. Exactly one must have noun: "".', + err( + "multiple_person_categories", + 'Multiple person categories found. Exactly one must have noun: "".', + ), ); } if (orderedCount === 0) { errors.push( - "No ordered category found. At least one category must have ordered: true.", + err( + "no_ordered_category", + "No ordered category found. At least one category must have ordered: true.", + ), ); }