diff --git a/src/__tests__/llm-json.test.ts b/src/__tests__/llm-json.test.ts new file mode 100644 index 0000000..819a08c --- /dev/null +++ b/src/__tests__/llm-json.test.ts @@ -0,0 +1,48 @@ +import { describe, expect, test } from "bun:test"; +import { extractJsonObject } from "../llm-json.ts"; + +describe("extractJsonObject", () => { + test("parses a clean JSON object", () => { + expect(extractJsonObject('{"a":1,"b":"x"}')).toEqual({ a: 1, b: "x" }); + }); + + test("strips a ```json code fence", () => { + expect(extractJsonObject('```json\n{"ok":true}\n```')).toEqual({ + ok: true, + }); + }); + + test("strips a plain ``` code fence", () => { + expect(extractJsonObject('```\n{"ok":false}\n```')).toEqual({ ok: false }); + }); + + test("strips a leading block", () => { + const raw = 'reasoning here\n{"stage":"pitch"}'; + expect(extractJsonObject(raw)).toEqual({ stage: "pitch" }); + }); + + test("extracts the object from surrounding prose", () => { + const raw = 'Ответ: {"winner":"a"} — надеюсь, помог'; + expect(extractJsonObject(raw)).toEqual({ winner: "a" }); + }); + + test("returns null for prose with no JSON object", () => { + expect(extractJsonObject("I cannot determine the outcome")).toBeNull(); + }); + + test("returns null for an empty string", () => { + expect(extractJsonObject("")).toBeNull(); + }); + + test("returns null for a JSON array (objects only)", () => { + expect(extractJsonObject("[1,2,3]")).toBeNull(); + }); + + test("returns null for malformed JSON", () => { + expect(extractJsonObject('{"a": }')).toBeNull(); + }); + + test("returns null for a non-string input", () => { + expect(extractJsonObject(undefined as unknown as string)).toBeNull(); + }); +}); diff --git a/src/__tests__/prompt.test.ts b/src/__tests__/prompt.test.ts new file mode 100644 index 0000000..53ea0bd --- /dev/null +++ b/src/__tests__/prompt.test.ts @@ -0,0 +1,81 @@ +import { describe, expect, test } from "bun:test"; +import { composeSystemPrompt } from "../prompt.ts"; +import { marinaPrime } from "../styles/marina-prime.ts"; + +describe("composeSystemPrompt — core sections", () => { + test("includes the persona name", () => { + expect(composeSystemPrompt(marinaPrime, "qualify")).toContain( + "Тебя зовут Марина", + ); + }); + + test("includes the framework blurb", () => { + expect(composeSystemPrompt(marinaPrime, "qualify")).toContain("NEPQ"); + }); + + test("separates sections with blank lines", () => { + expect(composeSystemPrompt(marinaPrime, "qualify")).toContain("\n\n"); + }); +}); + +describe("composeSystemPrompt — few-shot toggle", () => { + test("included by default", () => { + expect(composeSystemPrompt(marinaPrime, "qualify")).toContain( + "ПРИМЕРЫ ДИАЛОГА", + ); + }); + + test("omitted when includeFewShot is false", () => { + const prompt = composeSystemPrompt(marinaPrime, "qualify", null, { + includeFewShot: false, + }); + expect(prompt).not.toContain("ПРИМЕРЫ ДИАЛОГА"); + }); +}); + +describe("composeSystemPrompt — KB context", () => { + test("omitted when no context is provided", () => { + expect(composeSystemPrompt(marinaPrime, "qualify")).not.toContain( + "KB CONTEXT (актуальные факты", + ); + }); + + test("included verbatim when context is provided", () => { + const kb = "Корея: оклад ₩110 000 за смену."; + const prompt = composeSystemPrompt(marinaPrime, "qualify", kb); + expect(prompt).toContain("KB CONTEXT (актуальные факты"); + expect(prompt).toContain(kb); + }); +}); + +describe("composeSystemPrompt — human persona disclosure branch", () => { + test("evasive answer when botDisclosureOnDirectQuestion is false", () => { + expect(composeSystemPrompt(marinaPrime, "qualify")).toContain( + "отвечай уклончиво", + ); + }); + + test("honest answer when botDisclosureOnDirectQuestion is true", () => { + const style = structuredClone(marinaPrime); + style.guardrails.botDisclosureOnDirectQuestion = true; + const prompt = composeSystemPrompt(style, "qualify"); + expect(prompt).toContain("ЧЕСТНО ответь, что ты ИИ-ассистент"); + expect(prompt).not.toContain("отвечай уклончиво"); + }); +}); + +describe("composeSystemPrompt — persona facts", () => { + test("no facts section when persona has no facts", () => { + expect(composeSystemPrompt(marinaPrime, "qualify")).not.toContain( + "ЛИЧНЫЕ ФАКТЫ", + ); + }); + + test("facts section rendered when persona facts are present", () => { + const style = structuredClone(marinaPrime); + style.persona.facts = { возраст: "28", город: "Москва" }; + const prompt = composeSystemPrompt(style, "qualify"); + expect(prompt).toContain("ЛИЧНЫЕ ФАКТЫ"); + expect(prompt).toContain("город: Москва"); + }); +}); diff --git a/src/shadow-eval.test.ts b/src/__tests__/shadow-eval.test.ts similarity index 96% rename from src/shadow-eval.test.ts rename to src/__tests__/shadow-eval.test.ts index ff5d0f7..ba7beef 100644 --- a/src/shadow-eval.test.ts +++ b/src/__tests__/shadow-eval.test.ts @@ -1,5 +1,5 @@ import { describe, expect, test } from "bun:test"; -import { shadowDecide } from "./shadow-eval.ts"; +import { shadowDecide } from "../shadow-eval.ts"; describe("shadowDecide", () => { test("0 pairs → inconclusive", () => { diff --git a/src/__tests__/stage-classifier.test.ts b/src/__tests__/stage-classifier.test.ts new file mode 100644 index 0000000..79d9b34 --- /dev/null +++ b/src/__tests__/stage-classifier.test.ts @@ -0,0 +1,115 @@ +import { describe, expect, test } from "bun:test"; +import type { ChatClient } from "@chatman-media/rag"; +import { classifyStage, parseClassifierOutput } from "../stage-classifier.ts"; + +/** Minimal ChatClient whose `complete` returns (or throws) a fixed value. */ +function stubChat(reply: string | (() => never)): ChatClient { + return { + async complete() { + if (typeof reply === "function") return reply(); + return reply; + }, + }; +} + +describe("parseClassifierOutput", () => { + test("parses a clean object", () => { + expect(parseClassifierOutput('{"stage":"pitch","confidence":0.9}')).toEqual( + { stage: "pitch", confidence: 0.9 }, + ); + }); + + test("strips a ```json code fence", () => { + const raw = '```json\n{"stage":"qualify","confidence":0.8}\n```'; + expect(parseClassifierOutput(raw)).toEqual({ + stage: "qualify", + confidence: 0.8, + }); + }); + + test("extracts the object past an 'Ответ:' prefix", () => { + const raw = 'Ответ: {"stage":"close","confidence":0.7}'; + expect(parseClassifierOutput(raw)).toEqual({ + stage: "close", + confidence: 0.7, + }); + }); + + test("clamps a percentage-style confidence (95 → 0.95)", () => { + expect(parseClassifierOutput('{"stage":"pitch","confidence":95}')).toEqual({ + stage: "pitch", + confidence: 0.95, + }); + }); + + test("returns null for malformed JSON", () => { + expect(parseClassifierOutput("not json at all")).toBeNull(); + }); + + test("returns null when stage field is missing", () => { + expect(parseClassifierOutput('{"confidence":0.9}')).toBeNull(); + }); + + test("returns null when confidence is not a number", () => { + expect( + parseClassifierOutput('{"stage":"pitch","confidence":"high"}'), + ).toBeNull(); + }); +}); + +describe("classifyStage — fallback paths", () => { + const base = { + userMessage: "сколько платят?", + currentStage: "qualify" as const, + turnNumber: 3, + }; + + test("LLM error → regex fallback with reason 'llm-error'", async () => { + const result = await classifyStage({ + ...base, + chat: stubChat(() => { + throw new Error("network down"); + }), + }); + expect(result.source).toBe("regex-fallback"); + expect(result.fallbackReason).toBe("llm-error"); + }); + + test("unparseable output → reason 'parse-error'", async () => { + const result = await classifyStage({ + ...base, + chat: stubChat("I have no idea"), + }); + expect(result.fallbackReason).toBe("parse-error"); + }); + + test("unknown stage → reason 'unknown-stage'", async () => { + const result = await classifyStage({ + ...base, + chat: stubChat('{"stage":"smalltalk","confidence":0.9}'), + }); + expect(result.fallbackReason).toBe("unknown-stage"); + }); + + test("below-threshold confidence → reason 'low-confidence'", async () => { + const result = await classifyStage({ + ...base, + chat: stubChat('{"stage":"pitch","confidence":0.3}'), + }); + expect(result.fallbackReason).toBe("low-confidence"); + }); +}); + +describe("classifyStage — LLM path", () => { + test("valid high-confidence verdict is taken as-is", async () => { + const result = await classifyStage({ + userMessage: "сколько платят?", + currentStage: "qualify", + turnNumber: 3, + chat: stubChat('{"stage":"pitch","confidence":0.92}'), + }); + expect(result.source).toBe("llm"); + expect(result.stage).toBe("pitch"); + expect(result.confidence).toBe(0.92); + }); +}); diff --git a/src/ab-router.test.ts b/src/ab-router.test.ts deleted file mode 100644 index 5cc35c9..0000000 --- a/src/ab-router.test.ts +++ /dev/null @@ -1,77 +0,0 @@ -import { describe, expect, test } from "bun:test"; -import { pickVariant } from "./ab-router.ts"; - -const EXP = { - slug: "test-exp", - variants: [ - { styleSlug: "a", weight: 50 }, - { styleSlug: "b", weight: 50 }, - ], -}; - -describe("pickVariant", () => { - test("returns a known variant slug", () => { - const result = pickVariant(EXP, "user-1"); - expect(["a", "b"]).toContain(result); - }); - - test("same userId always gets same variant (deterministic)", () => { - const r1 = pickVariant(EXP, "user-42"); - const r2 = pickVariant(EXP, "user-42"); - expect(r1).toBe(r2); - }); - - test("different userIds can get different variants", () => { - const results = new Set( - Array.from({ length: 20 }, (_, i) => pickVariant(EXP, `user-${i}`)), - ); - expect(results.size).toBeGreaterThan(1); - }); - - test("100% weight on one variant always returns it", () => { - const exp = { - slug: "one-sided", - variants: [{ styleSlug: "only", weight: 100 }], - }; - for (let i = 0; i < 50; i++) { - expect(pickVariant(exp, `u${i}`)).toBe("only"); - } - }); - - test("distribution is roughly proportional to weights", () => { - const exp = { - slug: "weighted", - variants: [ - { styleSlug: "heavy", weight: 80 }, - { styleSlug: "light", weight: 20 }, - ], - }; - const counts: Record = { heavy: 0, light: 0 }; - for (let i = 0; i < 500; i++) { - const v = pickVariant(exp, String(i)); - counts[v] = (counts[v] ?? 0) + 1; - } - // heavy should win ~80% of the time — allow ±10% slack - expect(counts.heavy).toBeGreaterThan(300); - expect(counts.light).toBeLessThan(200); - }); - - test("throws on empty variants", () => { - expect(() => pickVariant({ slug: "x", variants: [] }, "u")).toThrow(); - }); - - test("throws on zero total weight", () => { - expect(() => - pickVariant( - { slug: "x", variants: [{ styleSlug: "a", weight: 0 }] }, - "u", - ), - ).toThrow(); - }); - - test("numeric userId is treated same as its string equivalent", () => { - const byStr = pickVariant(EXP, "123"); - const byNum = pickVariant(EXP, 123); - expect(byStr).toBe(byNum); - }); -}); diff --git a/src/coach.ts b/src/coach.ts index 775c10d..c1b3661 100644 --- a/src/coach.ts +++ b/src/coach.ts @@ -1,3 +1,4 @@ +import { extractJsonObject } from "./llm-json.ts"; import type { ISelfPlayMatchesRepo } from "./store.ts"; /** * Coach-LLM: reads recent self-play LOSSES and DRAWS for a style, @@ -216,31 +217,12 @@ export async function proposeStyleEdits( /** * Tolerant JSON parser. Strips code fences, attempts JSON.parse, falls - * back to extracting an outer object via regex. Always returns a valid - * CoachProposal (with raw output preserved on parse failure). + * back to extracting an outer object. Always returns a valid CoachProposal + * (with raw output preserved on parse failure). */ export function parseProposal(raw: string): CoachProposal { - const stripped = raw - .replace(/^```(?:json)?\s*/i, "") - .replace(/\s*```\s*$/i, "") - .trim(); - // First try a direct parse. - try { - const parsed = JSON.parse(stripped); - return normalizeProposal(parsed, raw); - } catch { - /* fall through */ - } - // Try to extract the outermost {...} block. - const m = stripped.match(/\{[\s\S]*\}/); - if (m) { - try { - const parsed = JSON.parse(m[0]); - return normalizeProposal(parsed, raw); - } catch { - /* fall through */ - } - } + const parsed = extractJsonObject(raw); + if (parsed) return normalizeProposal(parsed, raw); return { summary: "(coach output unparseable — see raw)", edits: {}, diff --git a/src/elo.test.ts b/src/elo.test.ts deleted file mode 100644 index 1a2639f..0000000 --- a/src/elo.test.ts +++ /dev/null @@ -1,76 +0,0 @@ -import { describe, expect, test } from "bun:test"; -import { - actualScore, - ELO_BASELINE, - ELO_DEFAULT_K, - eloUpdate, - eloUpdatePair, - expectedScore, -} from "./elo.ts"; - -describe("actualScore", () => { - test("won → 1", () => expect(actualScore("won")).toBe(1)); - test("lost → 0", () => expect(actualScore("lost")).toBe(0)); - test("draw → 0.5", () => expect(actualScore("draw")).toBe(0.5)); -}); - -describe("expectedScore", () => { - test("equal ratings → 0.5", () => { - expect(expectedScore(1500, 1500)).toBeCloseTo(0.5); - }); - test("higher self → > 0.5", () => { - expect(expectedScore(1600, 1500)).toBeGreaterThan(0.5); - }); - test("lower self → < 0.5", () => { - expect(expectedScore(1400, 1500)).toBeLessThan(0.5); - }); -}); - -describe("eloUpdate", () => { - test("win from baseline raises rating", () => { - const next = eloUpdate(ELO_BASELINE, "won"); - expect(next).toBeGreaterThan(ELO_BASELINE); - }); - test("loss from baseline lowers rating", () => { - const next = eloUpdate(ELO_BASELINE, "lost"); - expect(next).toBeLessThan(ELO_BASELINE); - }); - test("draw from baseline changes by less than K/2", () => { - const next = eloUpdate(ELO_BASELINE, "draw"); - expect(Math.abs(next - ELO_BASELINE)).toBeLessThan(ELO_DEFAULT_K / 2); - }); - test("win + loss are symmetric around baseline", () => { - const win = eloUpdate(ELO_BASELINE, "won"); - const loss = eloUpdate(ELO_BASELINE, "lost"); - expect(win + loss).toBe(2 * ELO_BASELINE); - }); - test("win delta ≈ K*(1-0.5) = 16 at equal ratings", () => { - expect(eloUpdate(1500, "won")).toBe(1516); - expect(eloUpdate(1500, "lost")).toBe(1484); - }); - test("custom k and opponentRating respected", () => { - const next = eloUpdate(1500, "won", { k: 16, opponentRating: 1500 }); - expect(next).toBe(1508); - }); -}); - -describe("eloUpdatePair", () => { - test("symmetric: A wins → A up, B down", () => { - const { a, b } = eloUpdatePair(1500, 1500, "won"); - expect(a).toBeGreaterThan(1500); - expect(b).toBeLessThan(1500); - }); - test("sum of ratings is preserved (±1 rounding)", () => { - const { a, b } = eloUpdatePair(1500, 1500, "won"); - expect(Math.abs(a + b - 3000)).toBeLessThanOrEqual(1); - }); - test("draw at equal ratings leaves both unchanged", () => { - const { a, b } = eloUpdatePair(1500, 1500, "draw"); - expect(a).toBe(1500); - expect(b).toBe(1500); - }); - test("A wins → delta = -(B delta) within rounding", () => { - const { a, b } = eloUpdatePair(1600, 1400, "won"); - expect(Math.abs(a - 1600 + (b - 1400))).toBeLessThanOrEqual(1); - }); -}); diff --git a/src/llm-json.ts b/src/llm-json.ts new file mode 100644 index 0000000..18bc9b8 --- /dev/null +++ b/src/llm-json.ts @@ -0,0 +1,49 @@ +/** + * Tolerant JSON-object extraction for LLM output. + * + * Models rarely return a clean JSON object: they wrap it in markdown code + * fences, prepend `...` reasoning, or surround it with prose. + * `extractJsonObject` strips all of that and returns the first parseable + * object — or `null` when nothing usable is found. + * + * Callers keep their own domain-specific normalization and last-resort + * regex fallback; this only handles the generic strip-and-parse step that + * was previously duplicated across coach / judge / pairwise / classifier. + */ + +function tryParseObject(s: string): Record | null { + try { + const parsed: unknown = JSON.parse(s); + if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) { + return parsed as Record; + } + } catch { + /* not valid JSON */ + } + return null; +} + +/** + * Strip think-tags and code fences, then return the first JSON object found + * — either the whole payload or the outermost `{...}` block embedded in it. + */ +export function extractJsonObject(raw: string): Record | null { + if (typeof raw !== "string") return null; + const stripped = raw + .replace(/[\s\S]*?<\/think>/gi, "") + .replace(/^\s*```(?:json|js)?\s*/i, "") + .replace(/\s*```\s*$/i, "") + .trim(); + + const direct = tryParseObject(stripped); + if (direct) return direct; + + // Fall back to the outermost { ... } block — handles leading prefixes + // ("Ответ:", "Result:") and trailing commentary around the object. + const start = stripped.indexOf("{"); + const end = stripped.lastIndexOf("}"); + if (start >= 0 && end > start) { + return tryParseObject(stripped.slice(start, end + 1)); + } + return null; +} diff --git a/src/self-play/judge.ts b/src/self-play/judge.ts index fe3e4fe..8654eff 100644 --- a/src/self-play/judge.ts +++ b/src/self-play/judge.ts @@ -13,6 +13,7 @@ */ import type { ChatClient, ChatMessage } from "@chatman-media/rag"; import type { EloOutcome } from "../elo.ts"; +import { extractJsonObject } from "../llm-json.ts"; export interface JudgeInput { /** Style under test (e.g. "marina-prime-v1"). */ @@ -93,28 +94,18 @@ export async function judgeMatch(input: JudgeInput): Promise { * falls back to regex match if necessary. Exported for tests. */ export function parseVerdict(raw: string): JudgeVerdict { - const stripped = raw - .replace(/[\s\S]*?<\/think>/gi, "") - .replace(/^```(?:json)?\s*/i, "") - .replace(/\s*```\s*$/i, "") - .trim(); - // Try direct parse. - try { - const parsed = JSON.parse(stripped); - if (parsed && typeof parsed === "object") { - const outcome = pickOutcome(parsed.outcome); - const reason = - typeof parsed.reason === "string" ? parsed.reason : "(no reason)"; - if (outcome) return { outcome, reason }; - } - } catch { - /* fall through */ + const parsed = extractJsonObject(raw); + if (parsed) { + const outcome = pickOutcome(parsed.outcome); + const reason = + typeof parsed.reason === "string" ? parsed.reason : "(no reason)"; + if (outcome) return { outcome, reason }; } // Regex fallback — find an "outcome": "..." pair anywhere. - const m = stripped.match(/"outcome"\s*:\s*"(won|lost|draw)"/i); + const m = raw.match(/"outcome"\s*:\s*"(won|lost|draw)"/i); if (m) { const outcome = (m[1] ?? "draw").toLowerCase() as EloOutcome; - const reasonMatch = stripped.match(/"reason"\s*:\s*"([^"]+)"/); + const reasonMatch = raw.match(/"reason"\s*:\s*"([^"]+)"/); return { outcome, reason: reasonMatch?.[1] ?? "(no reason)", @@ -122,7 +113,7 @@ export function parseVerdict(raw: string): JudgeVerdict { } console.warn( "[judge] unparseable output (first 300 chars):", - stripped.slice(0, 300), + raw.slice(0, 300), ); return { outcome: "draw", reason: "judge output unparseable", raw }; } diff --git a/src/self-play/orchestrator.ts b/src/self-play/orchestrator.ts index bac24a3..87efe15 100644 --- a/src/self-play/orchestrator.ts +++ b/src/self-play/orchestrator.ts @@ -93,6 +93,12 @@ export interface SelfPlayMatchResult { fabricationsCaught: number; /** Row id in self_play_matches, or null when the insert failed. */ matchId: number | null; + /** + * Whether the match transcript was durably persisted. `false` means the + * insert threw and this result exists only in memory — callers running + * evaluation loops should treat the run as not recorded. + */ + persisted: boolean; /** Non-fatal errors collected during the match (e.g. skill grading failures). */ warnings: string[]; } @@ -333,9 +339,11 @@ async function finalize( leadId, fabricationsCaught, matchId: null, + persisted: false, warnings, }; result.matchId = await persistSelfPlayMatch(deps, result, verdict.reason); + result.persisted = result.matchId !== null; return result; } diff --git a/src/self-play/pairwise.ts b/src/self-play/pairwise.ts index cfb1c0d..342f95d 100644 --- a/src/self-play/pairwise.ts +++ b/src/self-play/pairwise.ts @@ -11,6 +11,7 @@ import type { ChatClient, ChatMessage } from "@chatman-media/rag"; import type { EloOutcome } from "../elo.ts"; import { eloUpdatePair } from "../elo.ts"; +import { extractJsonObject } from "../llm-json.ts"; import type { IPairwiseMatchesRepo } from "../store.ts"; import type { Style } from "../types.ts"; import { @@ -51,6 +52,12 @@ export interface PairwiseMatchResult { eloAAfter: number; eloBAfter: number; pairwiseId: number | null; + /** + * Whether the pairwise match was durably persisted. `false` means the + * insert threw and this result exists only in memory — callers running + * A/B evaluation loops should treat the comparison as not recorded. + */ + persisted: boolean; } const PAIRWISE_SYSTEM = (hint: string) => @@ -117,28 +124,17 @@ export async function judgePairwise(args: { } export function parsePairwiseVerdict(raw: string): PairwiseVerdict { - const stripped = raw - .replace(/[\s\S]*?<\/think>/gi, "") - .replace(/^```(?:json)?\s*/i, "") - .replace(/\s*```\s*$/i, "") - .trim(); - try { - const parsed = JSON.parse(stripped); - if (parsed && typeof parsed === "object") { - const winner = pickWinner((parsed as Record).winner); - const reason = - typeof (parsed as Record).reason === "string" - ? ((parsed as Record).reason as string) - : "(no reason)"; - if (winner) return { winner, reason }; - } - } catch { - /* fall through to regex */ + const parsed = extractJsonObject(raw); + if (parsed) { + const winner = pickWinner(parsed.winner); + const reason = + typeof parsed.reason === "string" ? parsed.reason : "(no reason)"; + if (winner) return { winner, reason }; } - const m = stripped.match(/"winner"\s*:\s*"(a|b|draw)"/i); + const m = raw.match(/"winner"\s*:\s*"(a|b|draw)"/i); if (m) { const winner = (m[1] ?? "draw").toLowerCase() as PairwiseWinner; - const reasonMatch = stripped.match(/"reason"\s*:\s*"([^"]+)"/); + const reasonMatch = raw.match(/"reason"\s*:\s*"([^"]+)"/); return { winner, reason: reasonMatch?.[1] ?? "(no reason)" }; } return { winner: "draw", reason: "pairwise judge unparseable", raw }; @@ -189,6 +185,7 @@ export async function runPairwiseMatch( if (newB !== bRating) await deps.ratings.setRating(input.styleBId, newB); let pairwiseId: number | null = null; + let persisted = false; try { pairwiseId = await deps.pairwiseMatches.insert({ matchAId: matchA.matchId ?? 0, @@ -199,6 +196,7 @@ export async function runPairwiseMatch( winner: verdict.winner, reason: verdict.reason, }); + persisted = true; } catch (err) { console.warn("[pairwise] failed to persist pairwise match:", err); } @@ -213,5 +211,6 @@ export async function runPairwiseMatch( eloAAfter: newA, eloBAfter: newB, pairwiseId, + persisted, }; } diff --git a/src/skill-recommendations.test.ts b/src/skill-recommendations.test.ts deleted file mode 100644 index bab152a..0000000 --- a/src/skill-recommendations.test.ts +++ /dev/null @@ -1,109 +0,0 @@ -import { describe, expect, test } from "bun:test"; -import { - rankSkillRecommendations, - wilsonLowerBound, -} from "./skill-recommendations.ts"; -import type { SkillAggregate, SkillRow } from "./store.ts"; - -describe("wilsonLowerBound", () => { - test("0 total → 0", () => expect(wilsonLowerBound(0, 0)).toBe(0)); - test("100% win rate returns positive lower bound", () => { - expect(wilsonLowerBound(10, 10)).toBeGreaterThan(0.7); - }); - test("0% win rate → 0", () => { - expect(wilsonLowerBound(0, 10)).toBe(0); - }); - test("50% rate, large sample → near 0.5", () => { - expect(wilsonLowerBound(500, 1000)).toBeCloseTo(0.47, 1); - }); - test("lower bound < observed rate", () => { - const lb = wilsonLowerBound(7, 10); - expect(lb).toBeLessThan(0.7); - expect(lb).toBeGreaterThan(0); - }); - test("more samples → tighter (higher lower bound) for same rate", () => { - const lb10 = wilsonLowerBound(5, 10); - const lb100 = wilsonLowerBound(50, 100); - expect(lb100).toBeGreaterThan(lb10); - }); -}); - -const makeSkill = (slug: string, family = "cialdini"): SkillRow => ({ - slug, - display_name: slug, - family, - prompt_fragment: "", - applicable_stages: [], - is_enabled: true, -}); - -const makeAgg = ( - slug: string, - wins: number, - losses: number, - draws = 0, -): SkillAggregate => ({ - skill_slug: slug, - wins, - losses, - draws, - count: wins + losses + draws, -}); - -describe("rankSkillRecommendations", () => { - test("returns empty when catalogue is empty", () => { - expect(rankSkillRecommendations([], [])).toEqual([]); - }); - - test("filters out disabled skills", () => { - const skill = { ...makeSkill("s1"), is_enabled: false }; - expect(rankSkillRecommendations([skill], [])).toHaveLength(0); - }); - - test("filters out noise family", () => { - const skill = makeSkill("noise-skill", "noise"); - expect(rankSkillRecommendations([skill], [])).toHaveLength(0); - }); - - test("skill with no aggregates has count=0, NaN rate", () => { - const [rec] = rankSkillRecommendations([makeSkill("s1")], []); - expect(rec?.count).toBe(0); - expect(rec?.observed_rate).toBeNaN(); - expect(rec?.confidence_lower).toBe(0); - expect(rec?.recommended).toBe(false); - }); - - test("skill below minSamples has confidence_lower=0", () => { - const [rec] = rankSkillRecommendations( - [makeSkill("s1")], - [makeAgg("s1", 3, 0)], - { minSamples: 5 }, - ); - expect(rec?.confidence_lower).toBe(0); - }); - - test("high win-rate skill is recommended once samples met", () => { - const [rec] = rankSkillRecommendations( - [makeSkill("s1")], - [makeAgg("s1", 8, 2)], - { minSamples: 5, acceptThreshold: 0.4 }, - ); - expect(rec?.recommended).toBe(true); - }); - - test("ranks high-confidence skill above low-confidence", () => { - const skills = [makeSkill("weak"), makeSkill("strong")]; - const aggs = [makeAgg("weak", 3, 7), makeAgg("strong", 9, 1)]; - const recs = rankSkillRecommendations(skills, aggs, { minSamples: 5 }); - expect(recs[0]?.slug).toBe("strong"); - }); - - test("draws count as 0.5 wins for observed_rate", () => { - const [rec] = rankSkillRecommendations( - [makeSkill("s1")], - [makeAgg("s1", 5, 5, 10)], - ); - // wins=5, draws=10 → successCount=10, total=20 → rate=0.5 - expect(rec?.observed_rate).toBeCloseTo(0.5); - }); -}); diff --git a/src/stage-classifier.ts b/src/stage-classifier.ts index fd07c6e..e1d0244 100644 --- a/src/stage-classifier.ts +++ b/src/stage-classifier.ts @@ -1,4 +1,5 @@ import type { ChatClient } from "@chatman-media/rag"; +import { extractJsonObject } from "./llm-json.ts"; import { nextStage } from "./stage-router.ts"; import { FUNNEL_STAGES, type FunnelStage } from "./types.ts"; @@ -89,23 +90,8 @@ interface ParsedClassification { export function parseClassifierOutput( raw: string, ): ParsedClassification | null { - if (typeof raw !== "string") return null; - // Strip common code-fence wrappers. - let s = raw.trim(); - s = s.replace(/^```(?:json|js)?\s*/i, "").replace(/```\s*$/, ""); - // Locate the first { and matching last } — naive but works because the - // expected payload is a flat object with two scalar fields. - const start = s.indexOf("{"); - const end = s.lastIndexOf("}"); - if (start < 0 || end <= start) return null; - let parsed: unknown; - try { - parsed = JSON.parse(s.slice(start, end + 1)); - } catch { - return null; - } - if (typeof parsed !== "object" || parsed === null) return null; - const obj = parsed as Record; + const obj = extractJsonObject(raw); + if (!obj) return null; if (typeof obj.stage !== "string") return null; if (typeof obj.confidence !== "number" || !Number.isFinite(obj.confidence)) { return null; diff --git a/src/stage-router.test.ts b/src/stage-router.test.ts deleted file mode 100644 index 9b39214..0000000 --- a/src/stage-router.test.ts +++ /dev/null @@ -1,138 +0,0 @@ -import { describe, expect, test } from "bun:test"; -import { nextStage } from "./stage-router.ts"; - -describe("nextStage — objection keywords", () => { - for (const word of [ - "но", - "боюсь", - "развод", - "обман", - "не уверен", - "страшно", - ]) { - test(`"${word}" → objection`, () => { - expect( - nextStage({ - turnNumber: 3, - currentStage: "qualify", - lastUserMessage: word, - }), - ).toBe("objection"); - }); - } -}); - -describe("nextStage — pitch keywords", () => { - for (const word of [ - "сколько", - "зарплата", - "вакансии", - "контракт", - "виза", - "условия", - ]) { - test(`"${word}" → pitch`, () => { - expect( - nextStage({ - turnNumber: 3, - currentStage: "qualify", - lastUserMessage: word, - }), - ).toBe("pitch"); - }); - } -}); - -describe("nextStage — agreement → close", () => { - for (const stage of ["pitch", "qualify", "objection"] as const) { - test(`"давай" from ${stage} → close`, () => { - expect( - nextStage({ - turnNumber: 5, - currentStage: stage, - lastUserMessage: "давай", - }), - ).toBe("close"); - }); - } - test("agreement from opener does NOT go to close", () => { - const result = nextStage({ - turnNumber: 5, - currentStage: "opener", - lastUserMessage: "ок", - }); - expect(result).not.toBe("close"); - }); -}); - -describe("nextStage — turn 1 fallback", () => { - test("turn 1, null stage → opener", () => { - expect( - nextStage({ - turnNumber: 1, - currentStage: null, - lastUserMessage: "привет", - }), - ).toBe("opener"); - }); - test("turn 1, existing stage preserved", () => { - expect( - nextStage({ - turnNumber: 1, - currentStage: "qualify", - lastUserMessage: "привет", - }), - ).toBe("qualify"); - }); -}); - -describe("nextStage — stage progression", () => { - test("opener → qualify on turn 2", () => { - expect( - nextStage({ - turnNumber: 2, - currentStage: "opener", - lastUserMessage: "интересно", - }), - ).toBe("qualify"); - }); - test("qualify stays on qualifier pattern", () => { - expect( - nextStage({ - turnNumber: 3, - currentStage: "qualify", - lastUserMessage: "мне 23 года, из Москвы", - }), - ).toBe("qualify"); - }); - test("close stays close", () => { - expect( - nextStage({ - turnNumber: 8, - currentStage: "close", - lastUserMessage: "думаю", - }), - ).toBe("close"); - }); -}); - -describe("nextStage — Cyrillic Unicode boundary", () => { - test("objection keyword inside sentence matches", () => { - expect( - nextStage({ - turnNumber: 3, - currentStage: "qualify", - lastUserMessage: "мне кажется это развод какой-то", - }), - ).toBe("objection"); - }); - test("pricing keyword inside sentence matches", () => { - expect( - nextStage({ - turnNumber: 3, - currentStage: "qualify", - lastUserMessage: "а сколько там платят?", - }), - ).toBe("pitch"); - }); -});