antonstefer · antonstefer · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026
diff --git a/packages/logic-grid-ai/README.md b/packages/logic-grid-ai/README.md
@@ -67,14 +67,34 @@ interface ThemeResult {
 
 At least one category must have `ordered: true` with `orderingPhrases.comparators` defining all 7 comparator phrases. The result is validated against structural and semantic rules (value uniqueness, noun consistency, category count, ordered category presence, etc.). If validation fails, the AI is retried with error feedback up to 3 times.
 
-### `createAnthropicClient(apiKey?)`
+If all retries fail, `generateTheme` throws a `ThemeGenerationError`. The error carries an `errors` array of structured `ThemeValidationError` objects (each with a stable `code` like `"no_ordered_category"` or `"duplicate_value"` and a human-readable `message`), so callers can branch on the failure mode:
 
-Create the default AI client backed by the Anthropic SDK. If no key is provided, reads from `ANTHROPIC_API_KEY`.
+```typescript
+import { generateTheme, ThemeGenerationError } from "logic-grid-ai";
+
+try {
+  const theme = await generateTheme({ theme: "...", size: 4, categories: 4 });
+} catch (err) {
+  if (err instanceof ThemeGenerationError) {
+    if (err.errors.some((e) => e.code === "no_ordered_category")) {
+      // Show a hint about ordered categories
+    }
+  }
+  throw err;
+}
+```
+
+> Transport-level retries (429s, 5xx, network errors) are already handled inside the Anthropic SDK with exponential backoff — they don't consume one of the 3 semantic-retry attempts.
+
+### `createAnthropicClient(apiKey?, options?)`
+
+Create the default AI client backed by the Anthropic SDK. If no key is provided, reads from `ANTHROPIC_API_KEY`. Pass `{ model }` to override the default model (`claude-sonnet-4-6`):
 
 ```typescript
 import { createAnthropicClient } from "logic-grid-ai";
 
-const client = createAnthropicClient("sk-ant-...");
+const fast = createAnthropicClient(undefined, { model: "claude-haiku-4-5" });
+const explicit = createAnthropicClient("sk-ant-...");
 ```
 
 ### Custom AI Client
@@ -100,14 +120,14 @@ const theme = await generateTheme({
 
 ### `validateThemeResult(result, size, categories)`
 
-Validate AI output against structural and semantic rules. Returns an array of error messages (empty = valid). Used internally by `generateTheme`, but exported for custom pipelines.
+Validate AI output against structural and semantic rules. Returns `ThemeValidationError[]` (empty = valid). Each error has a stable `code`, a human-readable `message`, and an optional `category` field naming the offending category. Used internally by `generateTheme`, but exported for custom pipelines.
 
 ```typescript
 import { validateThemeResult } from "logic-grid-ai";
 
 const errors = validateThemeResult(result, 4, 4);
 if (errors.length > 0) {
-  console.error("Invalid theme:", errors);
+  for (const e of errors) console.error(`[${e.code}] ${e.message}`);
 }
 ```
 
@@ -126,11 +146,11 @@ const rewritten = await rewriteClues({
 // Returns Clue[] with the same constraints and replaced text fields.
 ```
 
-All clues are rewritten in a single batched AI call. Each clue is sent alongside its constraint JSON so the AI has ground-truth semantics. Output is validated against duplicate / empty / overlong text rules; retries up to 3 times before throwing.
+All clues are rewritten in a single batched AI call. Each clue is sent alongside its constraint JSON so the AI has ground-truth semantics. Output is validated against duplicate / empty / overlong text rules; retries up to 3 times before throwing a `RewriteCluesError` (parallel to `ThemeGenerationError` — carries `errors: RewriteCluesValidationError[]` with codes like `"empty_clue"`, `"long_clue"`, `"duplicate_clue"`).
 
 ### `validateRewrittenClues(result, expectedCount)`
 
-Validate raw AI output for `rewriteClues`. Returns an array of error messages (empty = valid).
+Validate raw AI output for `rewriteClues`. Returns `RewriteCluesValidationError[]` (empty = valid). Each error has a `code`, a `message`, and an optional 1-indexed `clueIndex`.
 
 ```typescript
 import { validateRewrittenClues } from "logic-grid-ai";

diff --git a/packages/logic-grid-ai/src/client.test.ts b/packages/logic-grid-ai/src/client.test.ts
@@ -68,4 +68,19 @@ describe("createAnthropicClient", () => {
 
     expect(mockCreate).toHaveBeenCalled();
   });
+
+  it("uses overridden model when passed via options", async () => {
+    mockCreate.mockResolvedValueOnce({
+      content: [{ type: "tool_use", id: "call_3", name: "respond", input: {} }],
+    });
+
+    const client = createAnthropicClient(undefined, {
+      model: "claude-haiku-4-5",
+    });
+    await client.completeJSON("test", { type: "object" });
+
+    expect(mockCreate).toHaveBeenCalledWith(
+      expect.objectContaining({ model: "claude-haiku-4-5" }),
+    );
+  });
 });
diff --git a/packages/logic-grid-ai/src/client.ts b/packages/logic-grid-ai/src/client.ts
@@ -1,19 +1,38 @@
 import Anthropic from "@anthropic-ai/sdk";
 import type { AIClient, JSONSchema } from "./types";
 
+/** Default model used when no `model` option is provided. */
+export const DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-6";
+
+/** Optional knobs for the default Anthropic-backed client. */
+export interface AnthropicClientOptions {
+  /** Override the model. Defaults to {@link DEFAULT_ANTHROPIC_MODEL}. */
+  model?: string;
+}
+
 /**
  * Create an AIClient backed by the Anthropic SDK.
  *
- * Uses Claude's tool_use feature for structured JSON output.
- * If no apiKey is provided, the SDK reads from ANTHROPIC_API_KEY.
+ * Uses Claude's tool_use feature for structured JSON output. The Anthropic SDK
+ * already retries transport-level errors (429s, 5xx, network) with exponential
+ * backoff internally — `generateTheme`'s and `rewriteClues`' own retries only
+ * cover semantic validation failures.
+ *
+ * If no apiKey is provided, the SDK reads from `ANTHROPIC_API_KEY`. Pass a
+ * `model` option to swap the underlying Claude model (e.g. `claude-haiku-4-5`
+ * for cheaper/faster generation).
  */
-export function createAnthropicClient(apiKey?: string): AIClient {
+export function createAnthropicClient(
+  apiKey?: string,
+  options: AnthropicClientOptions = {},
+): AIClient {
   const client = new Anthropic({ apiKey });
+  const model = options.model ?? DEFAULT_ANTHROPIC_MODEL;
 
   return {
     async completeJSON<T>(prompt: string, schema: JSONSchema): Promise<T> {
       const response = await client.messages.create({
-        model: "claude-sonnet-4-6",
+        model,
         max_tokens: 4096,
         temperature: 0.8,
         messages: [{ role: "user", content: prompt }],

diff --git a/packages/logic-grid-ai/src/clue-validation.test.ts b/packages/logic-grid-ai/src/clue-validation.test.ts
@@ -1,5 +1,6 @@
 import { describe, it, expect } from "vitest";
 import { validateRewrittenClues } from "./clue-validation";
+import { hasCode } from "./test-utils";
 import type { RewriteCluesResult } from "./types";
 
 function validResult(count: number = 3): RewriteCluesResult {
@@ -18,48 +19,47 @@ describe("validateRewrittenClues", () => {
 
   it("rejects wrong clue count (too few)", () => {
     const errors = validateRewrittenClues(validResult(2), 3);
-    expect(errors).toContainEqual(
-      expect.stringContaining("Expected 3 clues, got 2"),
-    );
+    expect(hasCode(errors, "wrong_clue_count")).toBe(true);
+    expect(
+      errors.find((e) => e.code === "wrong_clue_count")?.message,
+    ).toContain("Expected 3 clues, got 2");
   });
 
   it("rejects wrong clue count (too many)", () => {
     const errors = validateRewrittenClues(validResult(4), 3);
-    expect(errors).toContainEqual(
-      expect.stringContaining("Expected 3 clues, got 4"),
-    );
+    expect(hasCode(errors, "wrong_clue_count")).toBe(true);
   });
 
   it("rejects empty clue text", () => {
     const r = validResult();
     r.clues[1] = "";
     const errors = validateRewrittenClues(r, 3);
-    expect(errors).toContainEqual(expect.stringContaining("Clue 2 is empty"));
+    expect(hasCode(errors, "empty_clue")).toBe(true);
+    expect(errors.find((e) => e.code === "empty_clue")?.clueIndex).toBe(2);
   });
 
   it("rejects whitespace-only clue text", () => {
     const r = validResult();
     r.clues[0] = "   ";
     const errors = validateRewrittenClues(r, 3);
-    expect(errors).toContainEqual(expect.stringContaining("Clue 1 is empty"));
+    expect(hasCode(errors, "empty_clue")).toBe(true);
+    expect(errors.find((e) => e.code === "empty_clue")?.clueIndex).toBe(1);
   });
 
   it("rejects clue exceeding max length", () => {
     const r = validResult();
     r.clues[2] = "A".repeat(501);
     const errors = validateRewrittenClues(r, 3);
-    expect(errors).toContainEqual(
-      expect.stringContaining("Clue 3 is too long (501 chars, max 500)"),
-    );
+    expect(hasCode(errors, "long_clue")).toBe(true);
+    expect(errors.find((e) => e.code === "long_clue")?.clueIndex).toBe(3);
   });
 
   it("rejects duplicate rewritten clues (case-insensitive)", () => {
     const r = validResult();
     r.clues[2] = r.clues[0].toLowerCase();
     const errors = validateRewrittenClues(r, 3);
-    expect(errors).toContainEqual(
-      expect.stringContaining("Clue 3 is a duplicate"),
-    );
+    expect(hasCode(errors, "duplicate_clue")).toBe(true);
+    expect(errors.find((e) => e.code === "duplicate_clue")?.clueIndex).toBe(3);
   });
 
   it("reports multiple errors at once", () => {
@@ -73,12 +73,18 @@ describe("validateRewrittenClues", () => {
   it("rejects non-string clue item", () => {
     const r = { clues: ["Valid clue.", 42 as unknown as string, "Another."] };
     const errors = validateRewrittenClues(r, 3);
-    expect(errors).toContainEqual(
-      expect.stringContaining("Clue 2 is not a string"),
-    );
+    expect(hasCode(errors, "non_string_clue")).toBe(true);
+    expect(errors.find((e) => e.code === "non_string_clue")?.clueIndex).toBe(2);
   });
 
   it("accepts single clue", () => {
     expect(validateRewrittenClues(validResult(1), 1)).toEqual([]);
   });
+
+  it("omits clueIndex on count-level errors", () => {
+    const errors = validateRewrittenClues(validResult(2), 3);
+    const e = errors.find((x) => x.code === "wrong_clue_count");
+    expect(e).toBeDefined();
+    expect("clueIndex" in (e as object)).toBe(false);
+  });
 });
diff --git a/packages/logic-grid-ai/src/clue-validation.ts b/packages/logic-grid-ai/src/clue-validation.ts
@@ -1,43 +1,76 @@
-import type { RewriteCluesResult } from "./types";
+import type {
+  RewriteCluesResult,
+  RewriteCluesValidationCode,
+  RewriteCluesValidationError,
+} from "./types";
+
+function err(
+  code: RewriteCluesValidationCode,
+  message: string,
+  clueIndex?: number,
+): RewriteCluesValidationError {
+  return clueIndex !== undefined
+    ? { code, message, clueIndex }
+    : { code, message };
+}
 
 /**
  * Validate AI-generated rewritten clues against structural rules.
  *
- * Returns an array of error messages. Empty array means the result is valid.
- * Used internally by rewriteClues to decide whether to retry.
+ * Returns an array of structured errors. Empty array means the result is valid.
+ * Each error has a stable `code` (machine-readable) and `message` (human-readable);
+ * `clueIndex` is the 1-indexed position when the error is scoped to a single clue.
  */
 export function validateRewrittenClues(
   result: RewriteCluesResult,
   expectedCount: number,
-): string[] {
-  const errors: string[] = [];
+): RewriteCluesValidationError[] {
+  const errors: RewriteCluesValidationError[] = [];
 
   if (result.clues.length !== expectedCount) {
-    errors.push(`Expected ${expectedCount} clues, got ${result.clues.length}.`);
+    errors.push(
+      err(
+        "wrong_clue_count",
+        `Expected ${expectedCount} clues, got ${result.clues.length}.`,
+      ),
+    );
   }
 
   const seen = new Set<string>();
 
   for (let i = 0; i < result.clues.length; i++) {
     const text = result.clues[i];
+    const pos = i + 1;
 
     if (typeof text !== "string") {
-      errors.push(`Clue ${i + 1} is not a string.`);
+      errors.push(err("non_string_clue", `Clue ${pos} is not a string.`, pos));
       continue;
     }
 
     if (!text || text.trim() === "") {
-      errors.push(`Clue ${i + 1} is empty.`);
+      errors.push(err("empty_clue", `Clue ${pos} is empty.`, pos));
       continue;
     }
 
     if (text.length > 500) {
-      errors.push(`Clue ${i + 1} is too long (${text.length} chars, max 500).`);
+      errors.push(
+        err(
+          "long_clue",
+          `Clue ${pos} is too long (${text.length} chars, max 500).`,
+          pos,
+        ),
+      );
     }
 
     const lower = text.toLowerCase();
     if (seen.has(lower)) {
-      errors.push(`Clue ${i + 1} is a duplicate of an earlier clue.`);
+      errors.push(
+        err(
+          "duplicate_clue",
+          `Clue ${pos} is a duplicate of an earlier clue.`,
+          pos,
+        ),
+      );
     }
     seen.add(lower);
   }

diff --git a/packages/logic-grid-ai/src/index.ts b/packages/logic-grid-ai/src/index.ts
@@ -1,6 +1,10 @@
-export { generateTheme } from "./theme";
-export { rewriteClues } from "./rewrite";
-export { createAnthropicClient } from "./client";
+export { generateTheme, ThemeGenerationError } from "./theme";
+export { rewriteClues, RewriteCluesError } from "./rewrite";
+export {
+  createAnthropicClient,
+  DEFAULT_ANTHROPIC_MODEL,
+  type AnthropicClientOptions,
+} from "./client";
 export { validateThemeResult } from "./validation";
 export { validateRewrittenClues } from "./clue-validation";
 export type {
@@ -10,4 +14,8 @@ export type {
   RewriteCluesResult,
   AIClient,
   JSONSchema,
+  ThemeValidationCode,
+  ThemeValidationError,
+  RewriteCluesValidationCode,
+  RewriteCluesValidationError,
 } from "./types";
diff --git a/packages/logic-grid-ai/src/rewrite.test.ts b/packages/logic-grid-ai/src/rewrite.test.ts
@@ -1,6 +1,6 @@
 import { describe, it, expect, vi } from "vitest";
 import { generate, deduce } from "logic-grid";
-import { rewriteClues } from "./rewrite";
+import { rewriteClues, RewriteCluesError } from "./rewrite";
 import type { AIClient, RewriteCluesResult } from "./types";
 import type { Clue } from "logic-grid";
 import * as clientModule from "./client";
@@ -123,17 +123,25 @@ describe("rewriteClues", () => {
     expect(result[0].text).toBe(VALID_RESULT.clues[0]);
   });
 
-  it("throws after max retries", async () => {
+  it("throws RewriteCluesError with structured errors after max retries", async () => {
     const badResult: RewriteCluesResult = {
       clues: ["Only one clue."],
     };
 
-    await expect(
-      rewriteClues({
+    let caught: unknown;
+    try {
+      await rewriteClues({
         clues: SAMPLE_CLUES,
         client: mockClient(badResult),
-      }),
-    ).rejects.toThrow("Clue rewriting failed after 3 attempts");
+      });
+    } catch (e) {
+      caught = e;
+    }
+
+    expect(caught).toBeInstanceOf(RewriteCluesError);
+    const err = caught as RewriteCluesError;
+    expect(err.message).toContain("Clue rewriting failed after 3 attempts");
+    expect(err.errors.some((e) => e.code === "wrong_clue_count")).toBe(true);
   });
 
   it("propagates client errors", async () => {