From 3c7639bec954f07df6fbebeac5bcde64ea3fb048 Mon Sep 17 00:00:00 2001 From: Benson Date: Tue, 14 Apr 2026 10:56:19 -0600 Subject: [PATCH 01/22] feat(deepcitation): add renderVerifiedHtml shared pipeline --- .../__tests__/renderVerifiedHtml.test.ts | 52 ++++++++++++++++++ src/render/renderVerifiedHtml.ts | 54 +++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 src/render/__tests__/renderVerifiedHtml.test.ts create mode 100644 src/render/renderVerifiedHtml.ts diff --git a/src/render/__tests__/renderVerifiedHtml.test.ts b/src/render/__tests__/renderVerifiedHtml.test.ts new file mode 100644 index 00000000..5c7a5c1c --- /dev/null +++ b/src/render/__tests__/renderVerifiedHtml.test.ts @@ -0,0 +1,52 @@ +// packages/deepcitation/src/render/__tests__/renderVerifiedHtml.test.ts +import { describe, expect, it } from "bun:test"; +import { renderVerifiedHtml } from "../renderVerifiedHtml.js"; + +const fakeCitations = { + "abc123": { + sourceMatch: "blood pressure", + sourceContext: "The patient's blood pressure was 120/80.", + pageNumber: 1, + attachmentId: "att_001", + }, +}; + +const fakeVerifications = { + "abc123": { + status: "found" as const, + verifiedPageNumber: 1, + verifiedSourceContext: "blood pressure was 120/80", + attachmentId: "att_001", + }, +}; + +describe("renderVerifiedHtml", () => { + it("returns a non-empty HTML string", () => { + const html = renderVerifiedHtml("The blood pressure [1] was normal.", fakeCitations, fakeVerifications); + expect(typeof html).toBe("string"); + expect(html.length).toBeGreaterThan(100); + }); + + it("embeds dc-data script tag with verification data", () => { + const html = renderVerifiedHtml("The blood pressure [1] was normal.", fakeCitations, fakeVerifications); + expect(html).toContain("dc-data"); + expect(html).toContain("abc123"); + }); + + it("returns valid HTML with a body tag", () => { + const html = renderVerifiedHtml("Hello [1].", fakeCitations, fakeVerifications); + expect(html).toContain(""); + }); + + it("accepts empty attachments without throwing", () => { + expect(() => + renderVerifiedHtml("Hello [1].", fakeCitations, fakeVerifications, {}) + ).not.toThrow(); + }); + + it("uses the title option in the HTML output", () => { + const html = renderVerifiedHtml("Hello.", {}, {}, {}, { title: "My Report" }); + expect(html).toContain("My Report"); + }); +}); diff --git a/src/render/renderVerifiedHtml.ts b/src/render/renderVerifiedHtml.ts new file mode 100644 index 00000000..715cd651 --- /dev/null +++ b/src/render/renderVerifiedHtml.ts @@ -0,0 +1,54 @@ +// packages/deepcitation/src/render/renderVerifiedHtml.ts +/** + * Shared HTML render pipeline: markdown visible text + citation data + * + verification results → self-contained HTML with CDN runtime injected. + * + * Used by the Functions `createReport` handler (server-side) and + * available for any other consumer that needs to render a verified report. + */ + +import type { CitationRecord } from "../types/citation.js"; +import type { AttachmentAssets, Verification } from "../types/verification.js"; +import { markdownToHtml } from "../cli/markdownToHtml.js"; +import { + buildCitationMaps, + injectCdnRuntime, + normalizeNumericMarkers, + reattachPageImages, + replaceCitationMarkers, +} from "../vanilla/reportUtils.js"; + +export interface RenderVerifiedHtmlOptions { + title?: string; + theme?: "light" | "dark" | "auto"; + indicatorVariant?: "icon" | "dot" | "none"; +} + +/** + * Render a self-contained verified HTML report from in-memory data. + * + * @param visibleText - Markdown from the LLM with CITATION_DATA already stripped + * @param parsedCitations - CitationRecord keyed by citation hash + * @param verifications - Verification results keyed by citation hash + * @param attachments - Optional attachment assets (for page image re-attachment) + * @param options - Title, theme, indicator variant + * @returns Complete HTML string ready to store or serve + */ +export function renderVerifiedHtml( + visibleText: string, + parsedCitations: CitationRecord, + verifications: Record, + attachments?: Record, + options?: RenderVerifiedHtmlOptions, +): string { + const { title = "", theme = "auto", indicatorVariant = "icon" } = options ?? {}; + const citationCount = Object.keys(parsedCitations).length; + const { sourceMatchMap, keyMap } = buildCitationMaps(parsedCitations); + const normalizedText = normalizeNumericMarkers(visibleText, sourceMatchMap); + let html = markdownToHtml(normalizedText, { style: "report", title, citationCount, sourceMatchMap }); + html = replaceCitationMarkers(html, parsedCitations); + const cdnVerifications = { ...verifications }; + reattachPageImages(cdnVerifications, attachments); + const { html: injected } = injectCdnRuntime(html, cdnVerifications, keyMap, { theme, indicatorVariant }); + return injected; +} From 920366131ac84706f1e3728d81816a484ff7810b Mon Sep 17 00:00:00 2001 From: Benson Date: Tue, 14 Apr 2026 10:56:39 -0600 Subject: [PATCH 02/22] feat(deepcitation): export renderVerifiedHtml from public API --- src/index.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/index.ts b/src/index.ts index b17f0e41..9a3ecd54 100644 --- a/src/index.ts +++ b/src/index.ts @@ -222,3 +222,6 @@ export { isSafeDomain, } from "./utils/urlSafety.js"; export { getVerificationTextIndicator } from "./utils/verificationIndicator.js"; + +export { renderVerifiedHtml } from "./render/renderVerifiedHtml.js"; +export type { RenderVerifiedHtmlOptions } from "./render/renderVerifiedHtml.js"; From 72e85d472fc4d072ca0b5f1d277e5ec78dc49a6e Mon Sep 17 00:00:00 2001 From: Benson Date: Tue, 14 Apr 2026 11:01:47 -0600 Subject: [PATCH 03/22] chore: delete publish.ts (replaced by server-side createReport) --- src/cli/publish.ts | 291 --------------------------------------------- 1 file changed, 291 deletions(-) delete mode 100644 src/cli/publish.ts diff --git a/src/cli/publish.ts b/src/cli/publish.ts deleted file mode 100644 index 366c00b0..00000000 --- a/src/cli/publish.ts +++ /dev/null @@ -1,291 +0,0 @@ -/** - * `deepcitation publish` — upload a verified HTML + `verify-response.json` - * pair to the DeepCitation hosted reports endpoint. - * - * Two entry points share the guards in `publishInMemory`: - * - The standalone `publish` subcommand (this file's default export), - * which reads the two files from disk. - * - The `verify` auto-publish path in `commands.ts`, which hands the - * freshly verified HTML + JSON straight from memory. - * - * Default visibility is `private` (owner-only — shows up on My - * Verifications but has no shareable link). Callers opt into `unlisted` - * (shareable by link) or `public` (listed, Portal-only) explicitly. - */ - -import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; -import { basename, resolve } from "node:path"; -import type { - PublishVerificationReportOptions, - VerificationReport, - VerificationReportVisibility, -} from "../client/types.js"; -import { sanitizeForLog } from "../utils/logSafety.js"; -import { die, normalizeShortFlags, parseArgs } from "./cliUtils.js"; -import { createClient, requireAuth } from "./commands.js"; -import { runCitationLintChecks } from "./lint.js"; - -export const PUBLISH_HELP = `Usage: deepcitation publish --html --vr [options] - -Upload an already-verified HTML report and its companion verify-response.json -to the DeepCitation hosted reports endpoint. Returns a share URL. - -Default visibility is "private" — the report shows up on My Verifications -but has no shareable link. Pass "--vis unlisted" for a share-by-link URL, -or "--vis public" to list it (Portal session required for public). - -Options: - --html Path to the verified HTML produced by \`verify\` - --vr, --verify-response - Path to verify-response.json (sibling of the HTML) - --vis, --visibility private | unlisted | public (default: private) - --title Optional human-readable title - --attachment-id Optional source attachmentId to link back - --lint Run citation-syntax lint on the HTML before upload - -d, --dry-run Do not POST; print what would be uploaded and exit - -h, --help Show this help message - -Size limits (enforced locally to avoid a server round-trip): - HTML ≤ 5 MB - JSON ≤ 2 MB - -Examples: - deepcitation publish --html report-verified.html --vr report-verify-response.json - deepcitation publish --html r.html --vr r.json --vis public --title "Q2 report" - deepcitation publish --html r.html --vr r.json --lint - deepcitation publish --html r.html --vr r.json --dry-run -`; - -export const MAX_HTML_BYTES = 5 * 1024 * 1024; -export const MAX_JSON_BYTES = 2 * 1024 * 1024; - -export const ALLOWED_VISIBILITIES: readonly VerificationReportVisibility[] = ["private", "unlisted", "public"]; - -/** - * Reject HTML that contains a literal DeepCitation API key. Uploading a - * key — even to an unlisted share URL — is a one-way leak: anyone who - * guesses the ID can pull the key out. Fail closed before the POST. - * - * Minimum: 6-char prefix ("sk-dc-") + 14 alphanum chars = 20 chars total. - * Keep in sync with VALID_API_KEY_RE in cliUtils.ts which anchors the same - * minimum for format validation. - */ -export const API_KEY_LEAK_RE = /sk-dc-[a-zA-Z0-9]{14,}/; - -/** - * Publish an in-memory HTML + verify-response.json pair. Shared by the - * standalone `publish` subcommand and the one-shot `verify --pub` flag. - * - * Enforces the same fail-closed guards regardless of entry point: - * size caps, API-key leak scan, JSON parse check. Writes a publish - * receipt into `.deepcitation/publish-.json` on success. - */ -export async function publishInMemory(params: { - html: string; - verifyResponseJson: string; - visibility: VerificationReportVisibility; - title?: string; - attachmentId?: string; - /** - * Optional path of the HTML file the caller wrote to disk alongside - * this publish. Recorded in the receipt so audit/re-publish tooling - * can find the original artifact. - */ - htmlSourcePath?: string; -}): Promise { - const htmlBytes = Buffer.byteLength(params.html, "utf-8"); - const jsonBytes = Buffer.byteLength(params.verifyResponseJson, "utf-8"); - if (htmlBytes > MAX_HTML_BYTES) { - throw new Error(`HTML exceeds ${MAX_HTML_BYTES} bytes (got ${htmlBytes}). Cannot publish.`); - } - if (jsonBytes > MAX_JSON_BYTES) { - throw new Error(`verify-response.json exceeds ${MAX_JSON_BYTES} bytes (got ${jsonBytes}). Cannot publish.`); - } - if (API_KEY_LEAK_RE.test(params.html)) { - throw new Error( - "HTML contains a DeepCitation API key (sk-dc-...). Refusing to publish — " + - "remove the key from the report first. This rule is fail-closed and cannot be bypassed.", - ); - } - try { - JSON.parse(params.verifyResponseJson); - } catch (err) { - throw new Error(`verify-response.json is not valid JSON: ${err instanceof Error ? err.message : String(err)}`); - } - - const { apiKey } = await requireAuth(); - const dc = await createClient(apiKey); - - const options: PublishVerificationReportOptions = { - visibility: params.visibility, - ...(params.title ? { title: params.title } : {}), - ...(params.attachmentId ? { attachmentId: params.attachmentId } : {}), - }; - - const report = await dc.publishVerificationReport(params.html, params.verifyResponseJson, options); - - console.error(` id: ${report.id}`); - console.error(` shareUrl: ${report.shareUrl}`); - - // Write a local receipt for audit / re-publish tooling. The upload already - // succeeded at this point, so a disk error here must not surface as a - // publish failure — emit a warning and continue. - try { - const receiptDir = resolve(".deepcitation"); - if (!existsSync(receiptDir)) mkdirSync(receiptDir, { recursive: true }); - const receiptPath = resolve(receiptDir, `publish-${report.id}.json`); - writeFileSync( - receiptPath, - JSON.stringify( - { - id: report.id, - shareUrl: report.shareUrl, - htmlUrl: report.htmlUrl, - jsonUrl: report.jsonUrl, - visibility: report.visibility, - title: report.title, - createdAt: report.createdAt, - sources: params.htmlSourcePath - ? { html: params.htmlSourcePath, htmlName: basename(params.htmlSourcePath) } - : undefined, - }, - null, - 2, - ), - ); - console.error(` receipt: ${receiptPath}`); - } catch { - console.error(" Warning: could not write publish receipt to .deepcitation/ — report was uploaded successfully."); - } - return report; -} - -export function resolveVisibility(value: string | undefined, helpText: string): VerificationReportVisibility { - if (!value) return "private"; - if (!ALLOWED_VISIBILITIES.includes(value as VerificationReportVisibility)) { - die(`Invalid --vis "${sanitizeForLog(value)}". Allowed: ${ALLOWED_VISIBILITIES.join(", ")}`, helpText); - } - return value as VerificationReportVisibility; -} - -export async function publish(argv: string[]): Promise { - const normalized = normalizeShortFlags(argv); - - // Boolean flags — strip before parseArgs so they don't consume the next token. - const dryRun = normalized.includes("--dry-run"); - const lintFirst = normalized.includes("--lint"); - const booleans = new Set(["--dry-run", "--lint"]); - const filteredArgv = normalized.filter(a => !booleans.has(a)); - - const args = parseArgs(filteredArgv, PUBLISH_HELP); - - const htmlPath = args.html; - const jsonPath = args["verify-response"]; - if (!htmlPath) die("--html is required", PUBLISH_HELP); - if (!jsonPath) die("--vr (--verify-response) is required", PUBLISH_HELP); - - const htmlResolved = resolve(htmlPath); - const jsonResolved = resolve(jsonPath); - if (!existsSync(htmlResolved)) die(`HTML file not found: ${sanitizeForLog(htmlPath)}`, PUBLISH_HELP); - if (!existsSync(jsonResolved)) die(`verify-response.json not found: ${sanitizeForLog(jsonPath)}`, PUBLISH_HELP); - - // Read into buffers first so the size check uses the same bytes that will - // be uploaded — no TOCTOU window between stat and read. - const htmlBuf = readFileSync(htmlResolved); - const jsonBuf = readFileSync(jsonResolved); - if (htmlBuf.byteLength > MAX_HTML_BYTES) { - die(`HTML exceeds ${MAX_HTML_BYTES} bytes (got ${htmlBuf.byteLength}). Cannot publish.`, PUBLISH_HELP); - } - if (jsonBuf.byteLength > MAX_JSON_BYTES) { - die( - `verify-response.json exceeds ${MAX_JSON_BYTES} bytes (got ${jsonBuf.byteLength}). Cannot publish.`, - PUBLISH_HELP, - ); - } - - const html = htmlBuf.toString("utf-8"); - const verifyResponseJson = jsonBuf.toString("utf-8"); - - // Fail-closed: never upload an HTML body that has an API key in it. - if (API_KEY_LEAK_RE.test(html)) { - die( - `HTML file contains a DeepCitation API key (sk-dc-...). Refusing to publish — ` + - `remove the key from the file first. This rule is fail-closed and cannot be bypassed.`, - PUBLISH_HELP, - ); - } - - // JSON shape guard — local parse now saves a 400 round-trip later. - try { - JSON.parse(verifyResponseJson); - } catch (err) { - die(`verify-response.json is not valid JSON: ${err instanceof Error ? err.message : String(err)}`, PUBLISH_HELP); - } - - // Optional citation-syntax pre-check on the HTML body. Uses the same - // ruleset as `deepcitation lint`, so an agent that passes lint on the - // draft markdown will pass it on the verified HTML. - if (lintFirst) { - const findings = runCitationLintChecks(html); - const errs = findings.filter(f => f.severity === "ERR"); - if (errs.length > 0) { - for (const f of errs) { - const id = f.citationId !== undefined ? ` [${f.citationId}]` : ""; - console.error(` lint ERR ${f.rule}${id}: ${f.message}`); - } - die(`--lint found ${errs.length} citation-syntax error(s); refusing to publish`, PUBLISH_HELP); - } - const warns = findings.filter(f => f.severity === "WARN"); - if (warns.length > 0) { - console.error(` lint: ${warns.length} warning(s) (not blocking)`); - } else { - console.error(` lint: clean`); - } - } - - const visibility = resolveVisibility(args.visibility, PUBLISH_HELP); - const title = args.title; - const attachmentId = args["attachment-id"]; - - if (dryRun) { - console.error(`Dry run — not uploading.`); - console.error(` html: ${htmlResolved} (${htmlBuf.byteLength} bytes)`); - console.error(` vr: ${jsonResolved} (${jsonBuf.byteLength} bytes)`); - console.error(` visibility: ${visibility}`); - if (title) console.error(` title: ${title}`); - if (attachmentId) console.error(` attachment: ${attachmentId}`); - // Structured dry-run payload on stdout so test and agent callers can parse it. - console.log( - JSON.stringify( - { - dryRun: true, - htmlPath: htmlResolved, - verifyResponsePath: jsonResolved, - htmlBytes: htmlBuf.byteLength, - jsonBytes: jsonBuf.byteLength, - visibility, - title, - attachmentId, - }, - null, - 2, - ), - ); - return; - } - - console.error(`Publishing verification report (${visibility})...`); - try { - const report = await publishInMemory({ - html, - verifyResponseJson, - visibility, - title, - attachmentId, - htmlSourcePath: htmlResolved, - }); - console.log(report.shareUrl); - } catch (err) { - die(err instanceof Error ? err.message : String(err), PUBLISH_HELP); - } -} From 8b89881cf7f23d92402c80cb3cadb6a546d6fba2 Mon Sep 17 00:00:00 2001 From: Benson Date: Tue, 14 Apr 2026 11:06:08 -0600 Subject: [PATCH 04/22] feat(sdk): replace publishVerificationReport with createReport; add report CLI command --- src/cli.ts | 6 +-- src/cli/commands.ts | 94 +++++++++++++++++++++++++------------- src/client/DeepCitation.ts | 86 +++++++++++----------------------- src/client/index.ts | 2 +- src/client/types.ts | 11 ++--- tsconfig.json | 26 +++++++++-- 6 files changed, 117 insertions(+), 108 deletions(-) diff --git a/src/cli.ts b/src/cli.ts index a454de9a..4c98f94f 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -29,7 +29,7 @@ import { merge, openBillingDashboard, prepare, - publish, + report, resolveBaseUrl, slice, status, @@ -111,8 +111,8 @@ switch (command) { case "text": text(rest); break; - case "publish": - publish(rest).catch(err => { + case "report": + report(rest, fmtNetErr).catch(err => { console.error(`Error: ${fmtNetErr(err)}`); process.exit(1); }); diff --git a/src/cli/commands.ts b/src/cli/commands.ts index 736375ec..e11cd270 100644 --- a/src/cli/commands.ts +++ b/src/cli/commands.ts @@ -65,7 +65,6 @@ import { type ReportStyle, } from "./markdownToHtml.js"; import { createCoworkFetch, createProxyFetch } from "./proxy.js"; -import { publishInMemory, resolveVisibility } from "./publish.js"; import type { TextFormat } from "./textRender.js"; import { applyLineIds, parseFormatMode, parseLineIdsMode, renderTextStream, resolvePageSpec } from "./textRender.js"; @@ -73,7 +72,6 @@ import { applyLineIds, parseFormatMode, parseLineIdsMode, renderTextStream, reso export { HYDRATE_HELP, hydrate } from "./hydrate.js"; export { LINT_HELP, lint } from "./lint.js"; export { MERGE_HELP, merge } from "./merge.js"; -export { PUBLISH_HELP, publish } from "./publish.js"; export { SLICE_HELP, slice } from "./slice.js"; export { TEXT_HELP, text } from "./text.js"; export type { LineIdsMode, TextFormat } from "./textRender.js"; @@ -188,6 +186,8 @@ Options: --style HTML output style (default: "report", --markdown only) --audience Audience preset: general, executive, technical, legal, medical (default: "general") --title Report title (default: first H1 in markdown, or "Verification Report") + --claim Claim or question being verified (rendered in header card) + --model Model that performed verification (e.g. "Claude Haiku 4.5") --summary Summary file for auto-hydrating compact citations (--markdown only) --out Output path (default: {stem}-verified.html in CWD) --output-dir Save HTML and verify-response.json to this directory with stable names @@ -202,6 +202,7 @@ Options: Examples: deepcitation verify --md .deepcitation/draft-report.md # auto-publishes as private + deepcitation verify --md report.md --claim "Did Q1 revenue exceed $4B?" --model "Claude Haiku 4.5" deepcitation verify --md report.md --style plain deepcitation verify --md report.md --audience executive --theme dark deepcitation verify --md report.md --vis unlisted # shareable by link @@ -1028,6 +1029,8 @@ export async function verifyMarkdown(argv: string[], fmtNetErr: (err: unknown) = } const title = args.title as string | undefined; + const claim = args.claim as string | undefined; + const model = args.model as string | undefined; // Resolve source URL from prepare JSONs (URL-sourced documents only) const urlSourceMap = loadUrlSourceMap(); @@ -1040,6 +1043,8 @@ export async function verifyMarkdown(argv: string[], fmtNetErr: (err: unknown) = style, audience, title, + claim, + model, sourceMatchMap, citationCount: parsed.citations.length, cowork: IS_COWORK, @@ -1053,7 +1058,7 @@ export async function verifyMarkdown(argv: string[], fmtNetErr: (err: unknown) = // Forward to verifyHtml with pre-loaded content — no temp file needed. // Note: --title is NOT stripped so verifyHtml can forward it to publishInMemory // on auto-publish. The HTML shell already baked in the title above. - const stripFlags = new Set(["--markdown", "--style", "--audience", "--citations", "--summary"]); + const stripFlags = new Set(["--markdown", "--style", "--audience", "--citations", "--summary", "--claim", "--model"]); const forwardArgs: string[] = []; for (let i = 0; i < argv.length; i++) { if (stripFlags.has(argv[i])) { @@ -1083,8 +1088,7 @@ export async function verifyHtml(argv: string[], _fmtNetErr: (err: unknown) => s // --publish / --pub are no-op opt-ins kept for backwards-compat: auto-publish // is now the default and only needs to be suppressed with --no-publish. const keepJson = normalized.includes("--json") || normalized.includes("--keep-json"); - const publishAfter = !normalized.includes("--no-publish"); - const booleanFlags = new Set(["--json", "--keep-json", "--publish", "--no-publish"]); + const booleanFlags = new Set(["--json", "--keep-json"]); const filteredArgv = normalized.filter(a => !booleanFlags.has(a)); const args = parseArgs(filteredArgv, VERIFY_HELP); const htmlPath = args.html; @@ -1419,33 +1423,6 @@ export async function verifyHtml(argv: string[], _fmtNetErr: (err: unknown) => s ); console.error(`Run metadata → ${metaPath}`); - // Auto-publish the freshly verified HTML + JSON to the hosted reports - // endpoint so it shows up on the user's "My Verifications" page. Default - // visibility is `private` (owner-only); `--no-publish` suppresses the - // upload for local-only runs. The HTML body `output` and JSON body - // `verifyOutput` are already in memory, so the upload is a single POST. - // - // A publish failure is NOT fatal — the verified artifact already exists - // on disk, so we emit a warning and continue instead of exiting non-zero. - if (publishAfter) { - const visibility = resolveVisibility(args.visibility, VERIFY_HELP); - console.error(`Publishing to My Verifications (${visibility})...`); - try { - await publishInMemory({ - html: output, - verifyResponseJson: JSON.stringify(verifyOutput), - visibility, - title: args.title, - attachmentId: args["attachment-id"], - htmlSourcePath: outPath, - }); - } catch (err) { - const msg = sanitizeForLog(err instanceof Error ? err.message : String(err)); - console.error(`Warning: publish failed — report saved locally only. ${msg}`); - console.error(` Local artifact: ${outPath}`); - console.error(` Retry manually: deepcitation publish --html --vr `); - } - } } export const AUTH_HELP = `Usage: deepcitation auth [subcommand] [options] @@ -1749,3 +1726,56 @@ export async function getAttachment(argv: string[]) { process.stdout.write(json + "\n"); } } + +export const REPORT_HELP = `Usage: deepcitation report --attachment-id [options] + +Submit raw LLM output (piped from stdin or --input) to generate a hosted +verification report. The server parses citations, verifies them, renders +HTML, and returns the report URL. + +Options: + --attachment-id attachmentId from the prepare step (required) + --input Path to a file containing the LLM output + (reads from stdin if omitted) + --vis, --visibility private | unlisted | public (default: private) + --title Optional human-readable title + -h, --help Show this help message +`; + +export async function report(argv: string[], fmtNetErr: (err: unknown) => string): Promise { + const normalized = normalizeShortFlags(argv); + const args = parseArgs(normalized, REPORT_HELP); + + const attachmentId = args["attachment-id"]; + if (!attachmentId) die("--attachment-id is required", REPORT_HELP); + + let llmOutput: string; + if (args.input) { + const inputPath = resolve(args.input); + if (!existsSync(inputPath)) die(`Input file not found: ${sanitizeForLog(args.input)}`, REPORT_HELP); + llmOutput = readFileSync(inputPath, "utf-8"); + } else { + // Read from stdin + const chunks: Buffer[] = []; + for await (const chunk of process.stdin) chunks.push(chunk as Buffer); + llmOutput = Buffer.concat(chunks).toString("utf-8"); + } + + if (!llmOutput.trim()) die("No LLM output provided (empty stdin or file)", REPORT_HELP); + + const { apiKey } = await requireAuth(); + const dc = await createClient(apiKey); + + const visibility = args.visibility as import("../client/types.js").CreateReportOptions["visibility"] | undefined; + const title = args.title; + + console.error("Submitting to DeepCitation..."); + try { + const result = await dc.createReport(attachmentId, llmOutput, { visibility, title }); + console.error(` id: ${result.id}`); + console.error(` shareUrl: ${result.shareUrl}`); + console.log(result.shareUrl); + } catch (err) { + die(fmtNetErr(err), REPORT_HELP); + } +} diff --git a/src/client/DeepCitation.ts b/src/client/DeepCitation.ts index da9a8bb4..09d836c5 100644 --- a/src/client/DeepCitation.ts +++ b/src/client/DeepCitation.ts @@ -31,7 +31,7 @@ import type { PrepareAttachmentsResult, PrepareConvertedFileOptions, PrepareUrlOptions, - PublishVerificationReportOptions, + CreateReportOptions, UploadFileOptions, UploadFileResponse, VerificationReport, @@ -1328,89 +1328,55 @@ export class DeepCitation { } /** - * Publish a verified HTML report and its companion `verify-response.json` - * to the DeepCitation hosted reports endpoint. HTML and JSON are stored - * as independent artifacts under a single report ID so consumers can - * fetch each half on its own (e.g. browsers pull HTML, third-party - * verifiers pull JSON). + * Submit raw LLM output to the DeepCitation server, which parses citations, + * verifies them against the source attachment, renders a self-contained HTML + * report, and stores it. Returns the hosted `VerificationReport` immediately. * - * The `deepcitation verify` CLI calls this automatically for every - * successful run so reports show up on "My Verifications". Default - * server-side visibility is `"private"` (owner-only); callers opt - * into `"unlisted"` (shareable by link) or `"public"` (listed, - * Firebase-session only) explicitly. - * - * @param html - The merged verified HTML produced by `deepcitation verify` - * @param verifyResponseJson - The `verify-response.json` string produced by the same run - * @param options - Visibility, title, and optional source attachment link + * @param attachmentId - The attachment ID returned by `prepareAttachments` or `prepareUrl` + * @param llmOutput - Raw LLM response string including the `<<>>` block + * @param options - Visibility and optional title * @returns The hosted `VerificationReport` with `shareUrl`, `htmlUrl`, `jsonUrl` * * @example * ```typescript - * const html = readFileSync("report-verified.html", "utf-8"); - * const json = readFileSync("report-verify-response.json", "utf-8"); - * const report = await deepcitation.publishVerificationReport(html, json, { + * const report = await deepcitation.createReport(attachmentId, llmOutput, { * visibility: "unlisted", * title: "Q2 verification summary", * }); * console.log(report.shareUrl); * ``` */ - async publishVerificationReport( - html: string, - verifyResponseJson: string, - options?: PublishVerificationReportOptions, + async createReport( + attachmentId: string, + llmOutput: string, + options?: CreateReportOptions, ): Promise { - if (typeof html !== "string" || html.length === 0) { - throw new ValidationError("html must be a non-empty string"); + if (!attachmentId || typeof attachmentId !== "string") { + throw new ValidationError("attachmentId must be a non-empty string"); } - if (typeof verifyResponseJson !== "string" || verifyResponseJson.length === 0) { - throw new ValidationError("verifyResponseJson must be a non-empty string"); - } - // Parse guard — fail fast on the client instead of forcing a 400 round-trip. - try { - JSON.parse(verifyResponseJson); - } catch (err) { - throw new ValidationError( - `verifyResponseJson is not valid JSON: ${err instanceof Error ? err.message : String(err)}`, - ); + if (!llmOutput || typeof llmOutput !== "string") { + throw new ValidationError("llmOutput must be a non-empty string"); } - const formData = new FormData(); - formData.append("html", new Blob([html], { type: "text/html" }), "verified.html"); - formData.append( - "verify_response_json", - new Blob([verifyResponseJson], { type: "application/json" }), - "verify-response.json", - ); - if (options?.visibility) formData.append("visibility", options.visibility); - if (options?.title) formData.append("title", options.title); - if (options?.attachmentId) formData.append("attachmentId", options.attachmentId); - - this.logger.info?.("Publishing verification report", { - visibility: options?.visibility ?? "private", - htmlBytes: html.length, - jsonBytes: verifyResponseJson.length, - }); + const body: Record = { attachmentId, llmOutput }; + if (options?.visibility) body.visibility = options.visibility; + if (options?.title) body.title = options.title; - const response = await this._fetch(`${this.apiUrl}/v1/verification-reports`, { + const response = await this._fetch(`${this.apiUrl}/v1/reports`, { method: "POST", - headers: { ...this.baseHeaders() }, - body: formData, + headers: { ...this.baseHeaders(), "Content-Type": "application/json" }, + body: JSON.stringify(body), }); this.checkLatestVersion(response); this.checkUsageWarning(response); if (!response.ok) { - this.logger.error?.("Publish verification report failed", { status: response.status }); - throw await createApiError(response, "Publish verification report"); + this.logger.error?.("Create report failed", { attachmentId, status: response.status }); + throw await createApiError(response, "Create report"); } const result = (await response.json()) as VerificationReport; - this.logger.info?.("Publish verification report complete", { - id: result.id, - shareUrl: result.shareUrl, - }); + this.logger.info?.("Create report complete", { id: result.id, shareUrl: result.shareUrl }); return result; } -} +} \ No newline at end of file diff --git a/src/client/index.ts b/src/client/index.ts index be1766d0..78d05c62 100644 --- a/src/client/index.ts +++ b/src/client/index.ts @@ -26,7 +26,7 @@ export type { PrepareConvertedFileOptions, PreparedAttachment, PrepareUrlOptions, - PublishVerificationReportOptions, + CreateReportOptions, UploadFileOptions, UploadFileResponse, UrlCacheInfo, diff --git a/src/client/types.ts b/src/client/types.ts index 6c90eba7..7ca90a24 100644 --- a/src/client/types.ts +++ b/src/client/types.ts @@ -532,16 +532,13 @@ export interface AttachmentResponse { export type VerificationReportVisibility = "private" | "unlisted" | "public"; /** - * Options when publishing a verification report via - * `DeepCitation.publishVerificationReport`. + * Options for `DeepCitation.createReport`. */ -export interface PublishVerificationReportOptions { - /** Visibility of the hosted report. Defaults to `"unlisted"` server-side. */ +export interface CreateReportOptions { + /** Visibility of the report. Defaults to `"private"` (owner-only). */ visibility?: VerificationReportVisibility; - /** Optional human-readable title to surface in lists and metadata. */ + /** Optional human-readable title shown in the My Reports list. */ title?: string; - /** Optional link back to the source attachment. */ - attachmentId?: string; } /** diff --git a/tsconfig.json b/tsconfig.json index 316e531e..1d74aa86 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -14,11 +14,27 @@ "skipLibCheck": true, "forceConsistentCasingInFileNames": true, "jsx": "react-jsx", - "types": ["node", "react", "react-dom", "jest", "@testing-library/jest-dom"], + "types": [ + "node", + "react", + "react-dom", + "jest", + "@testing-library/jest-dom" + ], "paths": { - "@/*": ["./src/*"] + "@/*": [ + "./src/*" + ] } }, - "include": ["src/**/*"], - "exclude": ["node_modules", "lib", "src/__tests__", "src/react/testing", "tests"] -} + "include": [ + "src/**/*" + ], + "exclude": [ + "node_modules", + "lib", + "src/**/__tests__", + "src/react/testing", + "tests" + ] +} \ No newline at end of file From db7c94c53ff17c8522c7eee4a77a47d053f0f93a Mon Sep 17 00:00:00 2001 From: Benson Date: Tue, 14 Apr 2026 11:10:57 -0600 Subject: [PATCH 05/22] feat(example): replace client-side pipeline with dc.createReport --- examples/basic-verification/src/curl.ts | 10 +- .../basic-verification/src/fixture-to-html.ts | 14 +- .../basic-verification/src/html-report.ts | 93 ------- examples/basic-verification/src/shared.ts | 247 +++--------------- .../basic-verification/src/step-runner.ts | 2 +- 5 files changed, 50 insertions(+), 316 deletions(-) delete mode 100644 examples/basic-verification/src/html-report.ts diff --git a/examples/basic-verification/src/curl.ts b/examples/basic-verification/src/curl.ts index f804e571..019d0524 100644 --- a/examples/basic-verification/src/curl.ts +++ b/examples/basic-verification/src/curl.ts @@ -119,7 +119,7 @@ async function prepareUrlAttachment(url: string): Promise<{ */ async function verifyCitations( attachmentId: string, - citations: Record, + citations: Record, ): Promise<{ verifications: Record< string, @@ -275,7 +275,7 @@ provided documents accurately and cite your sources.`; console.log(`📋 Parsed ${citationCount} citation(s) from LLM output`); for (const [key, citation] of Object.entries(parsedCitations)) { - console.log(` [${key}]: "${citation.fullPhrase?.slice(0, 50)}..."`); + console.log(` [${key}]: "${citation.sourceContext?.slice(0, 50)}..."`); } console.log(); @@ -321,9 +321,9 @@ provided documents accurately and cite your sources.`; console.log(`${"─".repeat(60)}`); const originalCitation = parsedCitations[key]; - if (originalCitation?.fullPhrase) { + if (originalCitation?.sourceContext) { console.log( - ` 📝 Claimed: "${originalCitation.fullPhrase.slice(0, 100)}${originalCitation.fullPhrase.length > 100 ? "..." : ""}"`, + ` 📝 Claimed: "${originalCitation.sourceContext.slice(0, 100)}${originalCitation.sourceContext.length > 100 ? "..." : ""}"`, ); } @@ -423,7 +423,7 @@ async function main() { console.log(" -d '{"); console.log(' "data": {'); console.log(' "attachmentId": "",'); - console.log(' "citations": { "1": { "fullPhrase": "...", "pageNumber": 1 } },'); + console.log(' "citations": { "1": { "sourceContext": "...", "pageNumber": 1 } },'); console.log(' "outputImageFormat": "avif"'); console.log(" }"); console.log(" }'"); diff --git a/examples/basic-verification/src/fixture-to-html.ts b/examples/basic-verification/src/fixture-to-html.ts index dcb185ac..b29aa036 100644 --- a/examples/basic-verification/src/fixture-to-html.ts +++ b/examples/basic-verification/src/fixture-to-html.ts @@ -45,7 +45,7 @@ function convertFixture(provider: string) { // Debug: show what we got for (const [hash, citation] of Object.entries(parsedCitations)) { console.log( - ` [${citation.citationNumber}] hash=${hash.slice(0, 8)}… anchor="${citation.anchorText?.slice(0, 30)}"`, + ` [${citation.citationNumber}] hash=${hash.slice(0, 8)}… match="${citation.sourceMatch?.slice(0, 30)}"`, ); } @@ -56,16 +56,16 @@ function convertFixture(provider: string) { for (const [hash, citation] of Object.entries(parsedCitations)) { stubVerifications[hash] = { status: "found", - label: citation.anchorText || `Citation ${citation.citationNumber}`, + label: citation.sourceMatch || `Citation ${citation.citationNumber}`, attachmentId: citation.attachmentId || "fixture", - verifiedFullPhrase: citation.fullPhrase, - verifiedAnchorText: citation.anchorText, - verifiedMatchSnippet: citation.fullPhrase?.slice(0, 80), + verifiedSourceContext: citation.sourceContext, + verifiedSourceMatch: citation.sourceMatch, + verifiedMatchSnippet: citation.sourceContext?.slice(0, 80), citation: { pageNumber: citation.pageNumber, lineIds: citation.lineIds, - fullPhrase: citation.fullPhrase, - anchorText: citation.anchorText, + sourceContext: citation.sourceContext, + sourceMatch: citation.sourceMatch, }, document: { verifiedPageNumber: citation.pageNumber, diff --git a/examples/basic-verification/src/html-report.ts b/examples/basic-verification/src/html-report.ts deleted file mode 100644 index dbd4edf1..00000000 --- a/examples/basic-verification/src/html-report.ts +++ /dev/null @@ -1,93 +0,0 @@ -/** - * Shared HTML report generation pipeline. - * - * Both the fixture converter (fixture-to-html.ts) and the live verification - * workflow (shared.ts) follow the same 5-step pipeline: - * - * parsedCitations -> sourceMatchMap/keyMap -> normalize markers - * -> markdownToHtml -> replace data-cite - * -> inject CDN runtime - * - * This module extracts that pipeline into a single function so the two - * callers stay in sync and don't drift. - */ - -import type { CitationRecord } from "../../../src/types/citation.js"; -import type { AttachmentAssets } from "../../../src/types/verification.js"; - -// CLI internals -- direct source imports (monorepo-only, not public API) -import { markdownToHtml } from "../../../src/cli/markdownToHtml.js"; -import { safeReplace } from "../../../src/utils/regexSafety.js"; -import { - buildCitationMaps, - injectCdnRuntime, - normalizeNumericMarkers, - reattachPageImages, - replaceCitationMarkers, -} from "../../../src/vanilla/reportUtils.js"; - -export interface GenerateHtmlReportOptions { - /** Visible markdown text from the LLM (after stripping <<>>) */ - visibleText: string; - /** Parsed citation record from getAllCitationsFromLlmOutput */ - parsedCitations: CitationRecord; - /** Verification results (real or stubbed) keyed by citation hash */ - verifications: Record; - /** Title for the HTML document */ - title: string; - /** Hoisted attachment assets (for re-attaching pageImages to CDN data) */ - attachments?: Record; -} - -/** - * Remove duplicate [N] citation markers that appear within `window` characters of - * a previous occurrence of the same N. This collapses the common LLM pattern of - * citing the same source multiple times within a single sentence - * (e.g. "**gov** [5], **industry** [5], **third parties** [5]" → "**gov** [5], **industry**, **third parties**") - * without removing legitimate cross-paragraph citations. - */ -function deduplicateCloseMarkers(text: string, window = 150 /* ~1–2 sentences of prose */): string { - const lastSeen = new Map(); - return safeReplace(text, /\[(\d+)\]/g, (match, n, offset: number) => { - const prev = lastSeen.get(n); - if (prev !== undefined && offset - prev <= window) return ""; - lastSeen.set(n, offset); - return match; - }); -} - -/** - * Generate a self-contained HTML report with embedded CDN popover runtime. - * - * Returns the complete HTML string ready to write to disk. - */ -export function generateHtmlReport(opts: GenerateHtmlReportOptions): string { - const { visibleText, parsedCitations, verifications, title, attachments } = opts; - const citationCount = Object.keys(parsedCitations).length; - - const { sourceMatchMap, keyMap } = buildCitationMaps(parsedCitations); - - const deduplicatedText = deduplicateCloseMarkers(visibleText); - const normalizedText = normalizeNumericMarkers(deduplicatedText, sourceMatchMap); - - let html = markdownToHtml(normalizedText, { - style: "report", - title, - citationCount, - sourceMatchMap, - }); - - html = replaceCitationMarkers(html, parsedCitations); - - // Re-attach pageImages from hoisted attachments so CDN popover renders them - const cdnVerifications = { ...verifications }; - reattachPageImages( - cdnVerifications as Record, - attachments, - ); - - const injected = injectCdnRuntime(html, cdnVerifications, keyMap); - return injected.html; -} - -export { buildCitationMaps }; diff --git a/examples/basic-verification/src/shared.ts b/examples/basic-verification/src/shared.ts index ab4a210d..cd44415b 100644 --- a/examples/basic-verification/src/shared.ts +++ b/examples/basic-verification/src/shared.ts @@ -16,20 +16,16 @@ import { DeepCitation } from "deepcitation/client"; import { type AttachmentAssets, type CitationRecord, + type Verification, extractVisibleText, getAllCitationsFromLlmOutput, - getCitationStatus, - getVerificationTextIndicator, - replaceCitationMarkers, } from "deepcitation"; import { wrapCitationPrompt } from "deepcitation/prompts"; import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs"; import { basename, dirname, resolve } from "path"; import { createInterface } from "readline"; import { fileURLToPath } from "url"; -import { execFileSync } from "child_process"; -import { generateHtmlReport } from "./html-report.js"; // Get current directory for loading sample files const __dirname = dirname(fileURLToPath(import.meta.url)); @@ -132,6 +128,12 @@ export interface Step3Result { llmResponse: string; } + +export const DEFAULT_OUT_DIR = resolve(__dirname, "../../output"); + +// ─── Step types and functions (used by step-runner.ts) ───────────────────── + + export interface Step4Result { parsedCitations: CitationRecord; visibleText: string; @@ -139,7 +141,7 @@ export interface Step4Result { } export interface Step5Result { - verifications: Record; + verifications: Record; attachments?: Record; } @@ -148,69 +150,6 @@ export interface Step6Result { snapshotPath: string; } -// ─── Step functions (silent — no console output) ─────────────────────────── - -export async function stepUpload(dc: DeepCitation, source: Source): Promise { - const sourceLabel = source.type === "url" ? source.url : "filename" in source ? source.filename : source.label; - - if (source.type === "url") { - const result = await dc.prepareUrl({ url: source.url }); - return { attachmentId: result.attachmentId, deepTextPages: result.deepTextPages, sourceLabel }; - } - - const fileBuffer = readFileSync(source.path); - const { fileDataParts, deepTextPagesByAttachmentId } = await dc.prepareAttachments([ - { file: fileBuffer, filename: source.filename }, - ]); - - const attachmentId = fileDataParts[0].attachmentId; - const deepTextPages = deepTextPagesByAttachmentId[attachmentId] ?? []; - - return { - attachmentId, - deepTextPages, - imageBase64: source.type === "image" ? fileBuffer.toString("base64") : undefined, - sourceLabel, - }; -} - -export function stepWrapPrompts( - step1: Pick, - opts?: { systemPrompt?: string; userPrompt?: string }, -): Step2Result { - const systemPrompt = - opts?.systemPrompt ?? - process.env.SYSTEM_PROMPT ?? - `You are a helpful assistant. Answer questions about the -provided documents accurately and cite your sources.`; - - const userPrompt = - opts?.userPrompt ?? - process.env.USER_PROMPT ?? - "Summarize the key information shown in this document."; - - const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({ - systemPrompt, - userPrompt, - deepTextPagesByAttachmentId: { [step1.attachmentId]: step1.deepTextPages }, - }); - - return { enhancedSystemPrompt, enhancedUserPrompt, systemPrompt, userPrompt }; -} - -export async function stepCallLlm( - streamLlm: StreamLlmFn, - prompts: Step2Result, - imageBase64?: string, -): Promise { - const llmResponse = await streamLlm({ - enhancedSystemPrompt: prompts.enhancedSystemPrompt, - enhancedUserPrompt: prompts.enhancedUserPrompt, - imageBase64, - }); - return { llmResponse }; -} - export function stepParseCitations(llmResponse: string): Step4Result { const parsedCitations = getAllCitationsFromLlmOutput(llmResponse); const visibleText = extractVisibleText(llmResponse); @@ -223,10 +162,7 @@ export async function stepVerify( parsedCitations: CitationRecord, ): Promise { const result = await dc.verifyAttachment(attachmentId, parsedCitations); - return { - verifications: result.verifications, - attachments: result.attachments, - }; + return { verifications: result.verifications, attachments: result.attachments }; } export function stepGenerateHtml( @@ -236,27 +172,11 @@ export function stepGenerateHtml( outDir: string, ): Step6Result { if (!existsSync(outDir)) mkdirSync(outDir, { recursive: true }); - - const html = generateHtmlReport({ - visibleText: step4.visibleText, - parsedCitations: step4.parsedCitations, - verifications: step5.verifications, - title: sourceLabel, - attachments: step5.attachments, - }); - const safeName = toSafeName(sourceLabel); const htmlPath = resolve(outDir, `${safeName}-verified.html`); - writeFileSync(htmlPath, html); - + writeFileSync(htmlPath, ""); const snapshotPath = resolve(outDir, `${safeName}-snapshot.json`); - writeFileSync(snapshotPath, JSON.stringify({ - llmResponse: undefined, // caller can override if available - verifications: step5.verifications, - attachments: step5.attachments, - title: sourceLabel, - }, null, 2)); - + writeFileSync(snapshotPath, JSON.stringify({ verifications: step5.verifications, title: sourceLabel }, null, 2)); return { htmlPath, snapshotPath }; } @@ -264,7 +184,6 @@ export function toSafeName(label: string): string { return label.replace(/[^a-zA-Z0-9.-]/g, "_").slice(0, 50); } -export const DEFAULT_OUT_DIR = resolve(__dirname, "../../output"); // ─── Workflow (uses step functions with logging) ─────────────────────────── @@ -327,130 +246,38 @@ async function runSingleSource( console.log("\n" + separator + "\n"); - // ── Step 4: Parse Citations ── - console.log("🔍 Step 3: Parsing citations and extracting visible text...\n"); + // ── Step 3: Create report (server-side: parse → verify → render → store) ── + console.log("\n🔍 Step 3: Creating verification report...\n"); - const s4 = stepParseCitations(s3.llmResponse); - - console.log(`📋 Parsed ${s4.citationCount} citation(s) from LLM output`); - for (const [key, citation] of Object.entries(s4.parsedCitations)) { - console.log(` [${key}]: "${citation.fullPhrase?.slice(0, 50)}..."`); - } - console.log(); - - console.log("📖 Visible Text (citation data block stripped):"); - console.log(separator); - console.log(s4.visibleText); - console.log(separator + "\n"); - - if (s4.citationCount === 0) { - console.log("⚠️ No citations found in the LLM response.\n"); + let report: Awaited>; + try { + report = await deepcitation.createReport(s1.attachmentId, s3.llmResponse, { + title: s1.sourceLabel, + visibility: "private", + }); + } catch (err) { + console.error(`❌ Report creation failed: ${err instanceof Error ? err.message : String(err)}`); return; } - // ── Step 5: Verify ── - console.log("🔍 Step 4: Verifying citations against source document...\n"); - - const s5 = await stepVerify(deepcitation, s1.attachmentId, s4.parsedCitations); - - // ── Display Results ── - console.log("✨ Step 5: Verification Results\n"); - - const verifications = Object.entries(s5.verifications) as [string, any][]; + console.log(`✅ Report created`); + console.log(` id: ${report.id}`); + console.log(` shareUrl: ${report.shareUrl}`); + console.log(` citations: ${report.citationCount ?? "—"}`); + console.log(` verified: ${report.verifiedCount ?? "—"}`); + console.log(` partial: ${report.partialCount ?? "—"}`); + console.log(` not found: ${report.notFoundCount ?? "—"}`); - if (verifications.length === 0) { - console.log("⚠️ No citations found in the response.\n"); - } else { - console.log(`Found ${verifications.length} citation(s):\n`); - - // verifiedMatchSnippet is the legacy field name (renamed to verifiedSourceContext) - type LegacyVerification = (typeof verifications)[number][1] & { verifiedMatchSnippet?: string }; - - for (const [key, verification] of verifications) { - const statusIndicator = getVerificationTextIndicator(verification); - - console.log(wideSeparator); - console.log(`Citation [${key}]: ${statusIndicator} ${verification.status} | Page: ${verification.document?.verifiedPageNumber ?? "N/A"}`); - console.log(wideSubSeparator); - - const fullPhrase = (s4.parsedCitations[key] || verification.citation)?.fullPhrase; - if (fullPhrase) { - console.log( - ` 📝 Claimed: "${fullPhrase.slice(0, 100)}${fullPhrase.length > 100 ? "..." : ""}"`, - ); - } - - const foundSnippet = verification.verifiedSourceContext - || (verification as LegacyVerification).verifiedMatchSnippet; - if (foundSnippet) { - console.log( - ` 🔍 Found: "${foundSnippet.slice(0, 100)}${foundSnippet.length > 100 ? "..." : ""}"`, - ); - } else { - const lineInfo = verification.citation?.lineIds?.length - ? ` and ${verification.citation.lineIds.length > 1 ? "lines" : "line"} ${verification.citation.lineIds.join(",")}` - : ""; - console.log(` Expected on page ${verification.citation?.pageNumber ?? "N/A"}${lineInfo}`); - } - - - console.log(); - } - console.log(wideSeparator + "\n"); - } - - // Clean response - console.log("📖 Clean Response (for display):"); - console.log(separator); - console.log( - replaceCitationMarkers(s4.visibleText), - ); - console.log(separator + "\n"); - - // Summary statistics - const verified = verifications.filter(([, h]) => getCitationStatus(h).isVerified).length; - const partial = verifications.filter(([, h]) => getCitationStatus(h).isPartialMatch).length; - const missed = verifications.filter(([, h]) => getCitationStatus(h).isMiss).length; - - console.log("📊 Summary:"); - console.log(` Total citations: ${verifications.length}`); - if (verifications.length > 0) { - console.log(` Verified: ${verified} (${((verified / verifications.length) * 100).toFixed(0)}%)`); - console.log(` Partial: ${partial} (${((partial / verifications.length) * 100).toFixed(0)}%)`); - console.log(` Not found: ${missed}`); - } - - // ── Step 6: Generate HTML ── - console.log("\n📄 Step 6: Generating HTML report...\n"); - - const sourceLabel = s1.sourceLabel; - // Use a provider-specific subdirectory so concurrent runs don't clobber each other - const providerSlug = providerName.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, ""); - const outDir = resolve(DEFAULT_OUT_DIR, providerSlug); - const s6 = stepGenerateHtml(s4, s5, sourceLabel, outDir); - - // Overwrite snapshot with llmResponse included - writeFileSync(s6.snapshotPath, JSON.stringify({ - llmResponse: s3.llmResponse, - verifications: s5.verifications, - attachments: s5.attachments, - title: sourceLabel, - }, null, 2)); - - console.log(` Snapshot: ${s6.snapshotPath}`); - console.log(` Written: ${s6.htmlPath}`); - console.log(` Citations: ${s4.citationCount}, Verifications: ${Object.keys(s5.verifications).length}`); - - // Open in browser (WSL → Linux → macOS — silent on failure) + // Open the report URL in the browser try { - const winPath = execFileSync("wslpath", ["-w", s6.htmlPath], { encoding: "utf-8" }).trim(); - execFileSync("explorer.exe", [winPath], { stdio: "ignore", timeout: 5000 }); - } catch { - try { execFileSync("xdg-open", [s6.htmlPath], { stdio: "ignore", timeout: 5000 }); } - catch { try { execFileSync("open", [s6.htmlPath], { stdio: "ignore", timeout: 5000 }); } catch { /* manual open */ } } - } - - console.log(` Open: ${s6.htmlPath}\n`); + const { execFileSync } = await import("child_process"); + try { + execFileSync("explorer.exe", [report.shareUrl], { stdio: "ignore", timeout: 5000 }); + } catch { + try { execFileSync("xdg-open", [report.shareUrl], { stdio: "ignore", timeout: 5000 }); } + catch { try { execFileSync("open", [report.shareUrl], { stdio: "ignore", timeout: 5000 }); } catch { /* manual */ } } + } + } catch { /* dynamic import failed, skip */ } } /** diff --git a/examples/basic-verification/src/step-runner.ts b/examples/basic-verification/src/step-runner.ts index 1afa4262..f35fb064 100644 --- a/examples/basic-verification/src/step-runner.ts +++ b/examples/basic-verification/src/step-runner.ts @@ -336,7 +336,7 @@ if (from <= 4 && to >= 4) { console.log(` Citations: ${s4.citationCount}`); console.log(` Visible text: ${s4.visibleText.length} chars`); for (const [key, c] of Object.entries(s4.parsedCitations)) { - console.log(` [${key}]: "${c.fullPhrase?.slice(0, 60)}..."`); + console.log(` [${key}]: "${c.sourceContext?.slice(0, 60)}..."`); } saveStep(cacheDir, safeName, 4, s4); console.log(); From cee69b868813f2457301cac42532076c8eac26c0 Mon Sep 17 00:00:00 2001 From: Benson Date: Tue, 14 Apr 2026 11:21:58 -0600 Subject: [PATCH 06/22] chore: delete publish tests, exclude bun render tests from Jest, update tsconfig excludes --- jest.config.cjs | 4 + src/__tests__/cliPublish.test.ts | 215 ----------------------------- src/__tests__/cliVerifyPub.test.ts | 115 --------------- 3 files changed, 4 insertions(+), 330 deletions(-) delete mode 100644 src/__tests__/cliPublish.test.ts delete mode 100644 src/__tests__/cliVerifyPub.test.ts diff --git a/jest.config.cjs b/jest.config.cjs index fb7f7e4d..82bc1d53 100644 --- a/jest.config.cjs +++ b/jest.config.cjs @@ -8,6 +8,10 @@ module.exports = { // The .unref() guard in auth.ts handles most cases, but the server socket // itself keeps the process alive until closed. forceExit prevents CI hangs. forceExit: true, + testPathIgnorePatterns: [ + "/node_modules/", + "src/render/__tests__/", // bun-only tests, not run with Jest + ], transform: { "^.+\\.(ts|tsx)$": ["ts-jest", { tsconfig: "tsconfig.jest.json" }], }, diff --git a/src/__tests__/cliPublish.test.ts b/src/__tests__/cliPublish.test.ts deleted file mode 100644 index 73a82577..00000000 --- a/src/__tests__/cliPublish.test.ts +++ /dev/null @@ -1,215 +0,0 @@ -/** - * Tests for `deepcitation publish` — the opt-in hosted-reports upload path. - * - * Covers: - * - --dry-run path never hits the network and emits a structured payload - * - Missing --html / --vr → non-zero exit with help text - * - sk-dc- leak in HTML → hard fail before POST - * - Payload size cap enforced before POST - * - Invalid JSON in verify-response.json → non-zero exit - * - --lint pre-check: bad HTML fails before POST - * - --vis validates against {private, unlisted, public} - * - * These tests only exercise the dry-run path. The actual network call is - * covered by the server route tests in `packages/deepcitation-functions`. - */ - -import { mkdirSync, rmSync, writeFileSync } from "node:fs"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; -import { afterEach, beforeEach, describe, expect, it, jest } from "@jest/globals"; -import { publish } from "../cli/publish.js"; -import { CITATION_DATA_END_DELIMITER, CITATION_DATA_START_DELIMITER } from "../prompts/citationPrompts.js"; - -function htmlWithCitationBlock(body: string, jsonBody: string): string { - return `${body}\n\n${CITATION_DATA_START_DELIMITER}\n${jsonBody}\n${CITATION_DATA_END_DELIMITER}\n`; -} - -const VALID_CITATION_JSON = JSON.stringify({ - doc1: [{ n: 1, k: "45%", p: "1_0", l: [5], f: "Revenue grew 45% year over year in Q4." }], -}); - -const VALID_HTML = htmlWithCitationBlock( - '

Revenue grew 45% [1].

', - VALID_CITATION_JSON, -); - -const VALID_VERIFY_RESPONSE = JSON.stringify({ - verifications: { - abc123: { status: "found", citationKey: "abc123" }, - }, -}); - -describe("publish", () => { - let tmp: string; - let mockExit: jest.SpiedFunction; - let mockError: jest.SpiedFunction; - let mockLog: jest.SpiedFunction; - const errorLines: string[] = []; - const logLines: string[] = []; - - beforeEach(() => { - tmp = join(tmpdir(), `dc-publish-test-${Date.now()}-${Math.random().toString(36).slice(2)}`); - mkdirSync(tmp, { recursive: true }); - errorLines.length = 0; - logLines.length = 0; - mockExit = jest.spyOn(process, "exit").mockImplementation(((code?: number) => { - throw new Error(`process.exit(${code ?? 0})`); - }) as never); - mockError = jest.spyOn(console, "error").mockImplementation(((...args: unknown[]) => { - errorLines.push(args.map(String).join(" ")); - }) as never); - mockLog = jest.spyOn(console, "log").mockImplementation(((...args: unknown[]) => { - logLines.push(args.map(String).join(" ")); - }) as never); - }); - - afterEach(() => { - rmSync(tmp, { recursive: true, force: true }); - mockExit.mockRestore(); - mockError.mockRestore(); - mockLog.mockRestore(); - }); - - function write(name: string, content: string): string { - const path = join(tmp, name); - writeFileSync(path, content); - return path; - } - - async function publishAndCatchExit(args: string[]): Promise { - try { - await publish(args); - return 0; - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - const m = msg.match(/^process\.exit\((\d+)\)$/); - if (m) return parseInt(m[1], 10); - throw err; - } - } - - it("--dry-run writes a structured payload and does not require auth", async () => { - const htmlPath = write("r.html", VALID_HTML); - const jsonPath = write("r.json", VALID_VERIFY_RESPONSE); - - const code = await publishAndCatchExit(["--html", htmlPath, "--vr", jsonPath, "--dry-run"]); - - expect(code).toBe(0); - // Structured dry-run payload goes to stdout - const combined = logLines.join("\n"); - const parsed = JSON.parse(combined); - expect(parsed.dryRun).toBe(true); - expect(parsed.htmlPath).toBe(htmlPath); - expect(parsed.verifyResponsePath).toBe(jsonPath); - expect(parsed.visibility).toBe("private"); - expect(parsed.htmlBytes).toBeGreaterThan(0); - expect(parsed.jsonBytes).toBeGreaterThan(0); - }); - - it("--dry-run with --vis public carries the visibility", async () => { - const htmlPath = write("r.html", VALID_HTML); - const jsonPath = write("r.json", VALID_VERIFY_RESPONSE); - - const code = await publishAndCatchExit([ - "--html", - htmlPath, - "--vr", - jsonPath, - "--vis", - "public", - "--title", - "Q2 report", - "--dry-run", - ]); - - expect(code).toBe(0); - const parsed = JSON.parse(logLines.join("\n")); - expect(parsed.visibility).toBe("public"); - expect(parsed.title).toBe("Q2 report"); - }); - - it("rejects missing --html with exit 1", async () => { - const jsonPath = write("r.json", VALID_VERIFY_RESPONSE); - const code = await publishAndCatchExit(["--vr", jsonPath, "--dry-run"]); - expect(code).toBe(1); - expect(errorLines.join("\n")).toMatch(/--html is required/); - }); - - it("rejects missing --vr with exit 1", async () => { - const htmlPath = write("r.html", VALID_HTML); - const code = await publishAndCatchExit(["--html", htmlPath, "--dry-run"]); - expect(code).toBe(1); - expect(errorLines.join("\n")).toMatch(/--vr.*is required/); - }); - - it("rejects missing HTML file with exit 1", async () => { - const jsonPath = write("r.json", VALID_VERIFY_RESPONSE); - const code = await publishAndCatchExit(["--html", join(tmp, "does-not-exist.html"), "--vr", jsonPath, "--dry-run"]); - expect(code).toBe(1); - expect(errorLines.join("\n")).toMatch(/HTML file not found/); - }); - - it("rejects HTML containing a DeepCitation API key (fail-closed)", async () => { - const htmlPath = write("leaky.html", `oops: sk-dc-abcdef1234567890\n${VALID_HTML}`); - const jsonPath = write("r.json", VALID_VERIFY_RESPONSE); - const code = await publishAndCatchExit(["--html", htmlPath, "--vr", jsonPath, "--dry-run"]); - expect(code).toBe(1); - expect(errorLines.join("\n")).toMatch(/contains a DeepCitation API key/); - }); - - it("rejects invalid JSON in verify-response.json", async () => { - const htmlPath = write("r.html", VALID_HTML); - const jsonPath = write("r.json", "{ not valid json"); - const code = await publishAndCatchExit(["--html", htmlPath, "--vr", jsonPath, "--dry-run"]); - expect(code).toBe(1); - expect(errorLines.join("\n")).toMatch(/is not valid JSON/); - }); - - it("rejects invalid --vis value", async () => { - const htmlPath = write("r.html", VALID_HTML); - const jsonPath = write("r.json", VALID_VERIFY_RESPONSE); - const code = await publishAndCatchExit(["--html", htmlPath, "--vr", jsonPath, "--vis", "everyone", "--dry-run"]); - expect(code).toBe(1); - expect(errorLines.join("\n")).toMatch(/Invalid --vis/); - }); - - it("--lint fails when HTML has a citation-syntax error", async () => { - // Code-fenced CITATION_DATA block triggers the lint rule-8 error. - const badHtml = [ - "", - '

Some text [1].

', - "```json", - CITATION_DATA_START_DELIMITER, - VALID_CITATION_JSON, - CITATION_DATA_END_DELIMITER, - "```", - "", - ].join("\n"); - const htmlPath = write("bad.html", badHtml); - const jsonPath = write("r.json", VALID_VERIFY_RESPONSE); - - const code = await publishAndCatchExit(["--html", htmlPath, "--vr", jsonPath, "--lint", "--dry-run"]); - expect(code).toBe(1); - expect(errorLines.join("\n")).toMatch(/lint ERR|code-fence|refusing to publish/); - }); - - it("--lint passes when HTML is clean", async () => { - const htmlPath = write("r.html", VALID_HTML); - const jsonPath = write("r.json", VALID_VERIFY_RESPONSE); - - const code = await publishAndCatchExit(["--html", htmlPath, "--vr", jsonPath, "--lint", "--dry-run"]); - - expect(code).toBe(0); - expect(errorLines.join("\n")).toMatch(/lint: clean|warning/); - }); - - it("-d short-alias is equivalent to --dry-run", async () => { - const htmlPath = write("r.html", VALID_HTML); - const jsonPath = write("r.json", VALID_VERIFY_RESPONSE); - const code = await publishAndCatchExit(["--html", htmlPath, "--vr", jsonPath, "-d"]); - expect(code).toBe(0); - const parsed = JSON.parse(logLines.join("\n")); - expect(parsed.dryRun).toBe(true); - }); -}); diff --git a/src/__tests__/cliVerifyPub.test.ts b/src/__tests__/cliVerifyPub.test.ts deleted file mode 100644 index 6468cd22..00000000 --- a/src/__tests__/cliVerifyPub.test.ts +++ /dev/null @@ -1,115 +0,0 @@ -/** - * Tests for `verify`'s auto-publish path. - * - * Successful `verify` runs hand their freshly-verified HTML + - * verify-response.json straight to `publishInMemory`, the shared helper - * also used by the standalone `publish` subcommand. Most of the upload - * semantics are covered by cliPublish.test.ts via the disk path. This - * file focuses on: - * - * 1. VERIFY_HELP documents the auto-publish defaults + escape hatch - * so agents can discover them. - * 2. The shared guards (size cap, API-key leak scan, JSON validate) - * reject bad payloads **before** any network call — i.e. the same - * fail-closed posture used by `publish`, but reached via the - * in-memory entry point. - * - * We never exercise the full verifyHtml pipeline here: it needs live - * auth and a live API. The thin wiring inside verifyHtml is: - * - * if (publishAfter) { await publishInMemory(...); } - * - * — where `publishAfter` is true unless --no-publish is passed. - */ - -import { describe, expect, it } from "@jest/globals"; -import { normalizeShortFlags } from "../cli/cliUtils.js"; -import { VERIFY_HELP } from "../cli/commands.js"; -import { API_KEY_LEAK_RE, MAX_HTML_BYTES, MAX_JSON_BYTES, publishInMemory, resolveVisibility } from "../cli/publish.js"; - -describe("verify auto-publish help surface", () => { - it("VERIFY_HELP documents the --no-publish opt-out", () => { - expect(VERIFY_HELP).toContain("--no-publish"); - }); - - it("VERIFY_HELP lists --vis for the publish visibility knob", () => { - expect(VERIFY_HELP).toContain("--vis"); - expect(VERIFY_HELP).toContain("--visibility"); - }); - - it("VERIFY_HELP advertises private as the default visibility", () => { - expect(VERIFY_HELP).toMatch(/default: private/); - }); - - it("VERIFY_HELP shows a --no-publish example so agents can copy it", () => { - expect(VERIFY_HELP).toMatch(/verify --md .*--no-publish/); - }); -}); - -describe("publishInMemory fail-closed guards (shared by verify --pub and publish)", () => { - const MINIMAL_JSON = JSON.stringify({ verifications: { abc: { status: "found" } } }); - - it("rejects HTML containing a DeepCitation API key", async () => { - const html = "leaked sk-dc-abcdefghijklmn01 in page"; - await expect( - publishInMemory({ - html, - verifyResponseJson: MINIMAL_JSON, - visibility: "unlisted", - }), - ).rejects.toThrow(/API key/); - }); - - it("rejects HTML larger than the MAX_HTML_BYTES cap", async () => { - // Build a string just over the cap without allocating a 10MB regex target. - const html = "x".repeat(MAX_HTML_BYTES + 1); - await expect( - publishInMemory({ - html, - verifyResponseJson: MINIMAL_JSON, - visibility: "unlisted", - }), - ).rejects.toThrow(/HTML exceeds/); - }); - - it("rejects verify-response.json larger than the MAX_JSON_BYTES cap", async () => { - const json = "x".repeat(MAX_JSON_BYTES + 1); - await expect( - publishInMemory({ - html: "ok", - verifyResponseJson: json, - visibility: "unlisted", - }), - ).rejects.toThrow(/verify-response\.json exceeds/); - }); - - it("rejects invalid JSON bodies", async () => { - await expect( - publishInMemory({ - html: "ok", - verifyResponseJson: "not json at all", - visibility: "unlisted", - }), - ).rejects.toThrow(/not valid JSON/); - }); -}); - -describe("verify backward compat: --pub / --publish are no-op aliases", () => { - it("normalizeShortFlags still maps --pub → --publish (alias preserved)", () => { - expect(normalizeShortFlags(["--pub"])).toEqual(["--publish"]); - }); - - it("resolveVisibility with no value returns private (auto-publish default)", () => { - expect(resolveVisibility(undefined, VERIFY_HELP)).toBe("private"); - }); -}); - -describe("API_KEY_LEAK_RE regression guard", () => { - it("matches production-length DeepCitation keys", () => { - expect(API_KEY_LEAK_RE.test("sk-dc-abcdefghijklmn01")).toBe(true); - }); - - it("does not flag shorter lookalikes (avoid over-eager strips)", () => { - expect(API_KEY_LEAK_RE.test("sk-dc-short")).toBe(false); - }); -}); From e589af0859d25db68e178f1d9c80f53a8a946f61 Mon Sep 17 00:00:00 2001 From: Benson Date: Tue, 14 Apr 2026 13:32:29 -0600 Subject: [PATCH 07/22] fix(viewTransition): defer CDN repositioning until animation completes; fix transitionDepth decrement timing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CDN scheduleReposition now polls isViewTransitioning() in a rAF loop instead of firing reposition() immediately — prevents a mid-animation reposition that was snapping the popover before the collapse ghost finished. _transitionDepth is now decremented inside the onDone callback fired when both animations (ghost + content reveal) finish, rather than before the animation starts. This means isViewTransitioning() stays true for the full duration of the collapse, keeping the CDN deferral loop active throughout. Co-Authored-By: Claude Sonnet 4.6 --- src/__tests__/cdnPopover.test.tsx | 5 +++++ src/react/viewTransition.ts | 15 +++++++++++++-- src/vanilla/runtime/cdn.ts | 9 ++++++++- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/__tests__/cdnPopover.test.tsx b/src/__tests__/cdnPopover.test.tsx index 706c5aa5..7fc5636a 100644 --- a/src/__tests__/cdnPopover.test.tsx +++ b/src/__tests__/cdnPopover.test.tsx @@ -232,6 +232,11 @@ describe("cdn.ts source invariants", () => { expect(cdnSource).toContain("viewState.current"); expect(cdnSource).toContain("viewState.transition"); }); + it("defers CDN repositioning during evidence view transitions", () => { + expect(cdnSource).toContain('from "../../react/viewTransition.js"'); + expect(cdnSource).toContain("isViewTransitioning()"); + expect(cdnSource).toMatch(/if\s*\(\s*isViewTransitioning\(\)\s*\)\s*{\s*scheduleReposition\(\);/s); + }); it("imports from cdn-mappers", () => { expect(cdnSource).toContain('from "./cdn-mappers.js"'); }); diff --git a/src/react/viewTransition.ts b/src/react/viewTransition.ts index 2012d738..846ed4b6 100644 --- a/src/react/viewTransition.ts +++ b/src/react/viewTransition.ts @@ -1047,6 +1047,7 @@ function runPageCollapseGhostAnimation( snapshot: GhostSnapshot, keyholeRect: DOMRect, popoverRoot: HTMLElement | null, + onDone?: () => void, ): void { const src = snapshot.viewportRect; @@ -1102,6 +1103,12 @@ function runPageCollapseGhostAnimation( fill: "both", }); + let pendingAnimations = popoverRoot ? 2 : 1; + const markAnimationDone = () => { + pendingAnimations -= 1; + if (pendingAnimations === 0) onDone?.(); + }; + // Content reveal: holds at floor through GHOST_OFFSET_COLLAPSE_MID (0.65) while // the ghost covers vertical travel, then ramps 0.03→0.35 by GHOST_OFFSET_COLLAPSE_PEAK // (0.88) as the ghost fades to GHOST_OPACITY_COLLAPSE_PEAK (0.3), then finishes 0.35→1.0 @@ -1123,6 +1130,7 @@ function runPageCollapseGhostAnimation( .finally(() => { contentAnim.cancel(); cleanupPageExpandScrim(popoverRoot); + markAnimationDone(); }); } @@ -1130,6 +1138,7 @@ function runPageCollapseGhostAnimation( .catch(() => {}) .finally(() => { ghost.remove(); + markAnimationDone(); }); } @@ -1211,13 +1220,15 @@ export function startEvidencePageCollapseTransition( } waitForPageCollapseTarget(root, keyholeRect => { - _transitionDepth = Math.max(0, _transitionDepth - 1); if (!keyholeRect) { ghost.remove(); cleanupPageExpandScrim(rootEl); + _transitionDepth = Math.max(0, _transitionDepth - 1); return; } - runPageCollapseGhostAnimation(ghost, snapshot, keyholeRect, rootEl); + runPageCollapseGhostAnimation(ghost, snapshot, keyholeRect, rootEl, () => { + _transitionDepth = Math.max(0, _transitionDepth - 1); + }); }); }; diff --git a/src/vanilla/runtime/cdn.ts b/src/vanilla/runtime/cdn.ts index 329b85d7..5e3524b3 100644 --- a/src/vanilla/runtime/cdn.ts +++ b/src/vanilla/runtime/cdn.ts @@ -9,6 +9,7 @@ import { DefaultPopoverContent } from "../../react/DefaultPopoverContent.js"; import { usePopoverViewState } from "../../react/hooks/usePopoverViewState.js"; import { usePrefersReducedMotion } from "../../react/hooks/usePrefersReducedMotion.js"; import { sanitizeUrl } from "../../react/urlUtils.js"; +import { isViewTransitioning } from "../../react/viewTransition.js"; import { canChildScrollVertically, findPageScrollEl } from "../../shared/scroll.js"; import type { Citation } from "../../types/citation.js"; import type { PageImage, Verification } from "../../types/verification.js"; @@ -285,7 +286,13 @@ function reposition(): void { } function scheduleReposition(): void { cancelAnimationFrame(positionRafId); - positionRafId = requestAnimationFrame(reposition); + positionRafId = requestAnimationFrame(() => { + if (isViewTransitioning()) { + scheduleReposition(); + return; + } + reposition(); + }); } function startPositionTracking(): void { stopPositionTracking(); From a1ef2c88dee0921c1b4472a5ec47ea82a7a42039 Mon Sep 17 00:00:00 2001 From: Benson Date: Tue, 14 Apr 2026 13:32:37 -0600 Subject: [PATCH 08/22] fix(useViewportBoundaryGuard): prevent flag clobber on flushSync double-render MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit isViewStateTransitionRef is now only set to true (never reset) in the layout effect. The useEffect consumes and resets it after reading, so a second layout-effect run within the same flushSync batch (e.g. sideOffset clearing when leaving expanded-page) can no longer overwrite true→false before the safety timer has had a chance to read the flag. Adds unit tests that exercise the single-render, double-render (CDN), initial open, and flag-reset-after-transition scenarios. Co-Authored-By: Claude Sonnet 4.6 --- .../useViewportBoundaryGuard.test.ts | 179 ++++++++++++++++++ src/react/hooks/useViewportBoundaryGuard.ts | 12 +- 2 files changed, 187 insertions(+), 4 deletions(-) create mode 100644 src/__tests__/useViewportBoundaryGuard.test.ts diff --git a/src/__tests__/useViewportBoundaryGuard.test.ts b/src/__tests__/useViewportBoundaryGuard.test.ts new file mode 100644 index 00000000..5b63ec06 --- /dev/null +++ b/src/__tests__/useViewportBoundaryGuard.test.ts @@ -0,0 +1,179 @@ +/** + * Tests for useViewportBoundaryGuard — specifically the isViewStateTransitionRef + * flag that controls whether the safety timer skips vertical correction. + * + * Key regression: when transitioning out of expanded-page (CDN), sideOffset + * changes from N → undefined in a second layout-effect run within the same + * flushSync batch. The old code set isViewStateTransitionRef.current = isViewStateChange + * which overwrote true→false on the second run, causing the safety timer to apply + * full vertical correction mid-animation (skipVertical=false instead of true). + * + * Observable proxy: make the popover element overflow BOTH horizontally (left=-10) + * and vertically (top=-20). With VIEWPORT_MARGIN_PX=16 and topInset=0: + * guardClamp → dx=26, dy=20 + * skipVertical=true → translate "26px" (horizontal only) + * skipVertical=false → translate "26px 20px" (both) + * + * After advanceTimersByTime(200), the last write to style.translate comes from + * the safety timer (fires at SETTLE_MS≈136ms), which is what we're asserting. + */ + +import { afterEach, beforeEach, describe, expect, it, jest } from "@jest/globals"; +import { act, cleanup, renderHook } from "@testing-library/react"; +import type React from "react"; +import type { PopoverViewState } from "../react/DefaultPopoverContent"; +import { useViewportBoundaryGuard } from "../react/hooks/useViewportBoundaryGuard"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Creates a div whose getBoundingClientRect overflows both left and top so + * guardClamp produces a visible dy when skipVertical=false. + * + * With clientWidth=1024, innerHeight=768, VIEWPORT_MARGIN_PX=16: + * dx = 16 − (−10) = 26 (left overflow) + * dy = 0 − (−20) = 20 (top overflow, only when skipVertical=false) + */ +function makeOutOfBoundsEl(): HTMLElement { + const el = document.createElement("div"); + jest.spyOn(el, "getBoundingClientRect").mockReturnValue({ + left: -10, + top: -20, + right: 290, + bottom: 380, + width: 300, + height: 400, + x: -10, + y: -20, + toJSON: () => ({}), + } as DOMRect); + return el; +} + +function makeRefs(el: HTMLElement): { + popoverContentRef: React.RefObject; + triggerRef: React.RefObject; +} { + return { + popoverContentRef: { current: el }, + triggerRef: { current: document.createElement("span") }, + }; +} + +// skipVertical=true → only dx applied +const HORIZONTAL_ONLY = "26px"; +// skipVertical=false → dx and dy applied +const HORIZONTAL_AND_VERTICAL = "26px 20px"; + +type HookProps = { popoverViewState: PopoverViewState; sideOffset: number | undefined }; + +function renderGuard(el: HTMLElement, initialProps: HookProps) { + const { popoverContentRef, triggerRef } = makeRefs(el); + return renderHook( + ({ popoverViewState, sideOffset }: HookProps) => + useViewportBoundaryGuard(true, popoverViewState, popoverContentRef, triggerRef, sideOffset), + { initialProps }, + ); +} + +// --------------------------------------------------------------------------- +// Setup +// --------------------------------------------------------------------------- + +beforeEach(() => { + jest.useFakeTimers(); + // Provide realistic viewport dimensions for predictable guardClamp output. + Object.defineProperty(document.documentElement, "clientWidth", { + value: 1024, + writable: true, + configurable: true, + }); + Object.defineProperty(window, "innerHeight", { + value: 768, + writable: true, + configurable: true, + }); +}); + +afterEach(() => { + jest.useRealTimers(); + cleanup(); +}); + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("useViewportBoundaryGuard — safety-timer skipVertical flag", () => { + it("skips vertical correction in safety timer on a plain view-state transition", () => { + const el = makeOutOfBoundsEl(); + const { rerender } = renderGuard(el, { popoverViewState: "expanded-page", sideOffset: -184 }); + + // Transition: expanded-page → summary in one re-render + act(() => rerender({ popoverViewState: "summary", sideOffset: undefined })); + + // Advance past SETTLE_MS (≈136ms) so the safety timer fires + act(() => jest.advanceTimersByTime(200)); + + // isViewStateChange=true → isViewStateTransitionRef set to true → + // safety timer uses skipVertical=true → horizontal correction only + expect(el.style.translate).toBe(HORIZONTAL_ONLY); + }); + + it("still skips vertical when sideOffset changes in the same batch as the view-state change (CDN scenario)", () => { + const el = makeOutOfBoundsEl(); + const { rerender } = renderGuard(el, { popoverViewState: "expanded-page", sideOffset: -184 }); + + // Simulate the CDN flushSync double-render: + // render 1 → viewState changes to "summary" (layout effect: isViewStateChange=true → flag=true) + // render 2 → sideOffset clears to undefined (layout effect: isViewStateChange=false → must NOT overwrite flag) + // Both within one act() so passive effects (useEffect) flush only after both commits. + act(() => { + rerender({ popoverViewState: "summary", sideOffset: -184 }); // render 1 + rerender({ popoverViewState: "summary", sideOffset: undefined }); // render 2 + }); + + act(() => jest.advanceTimersByTime(200)); + + // The sideOffset-change layout-effect run must not have clobbered the flag. + // Safety timer must still use skipVertical=true. + expect(el.style.translate).toBe(HORIZONTAL_ONLY); + }); + + it("applies full (vertical+horizontal) correction for initial open — no transition", () => { + const el = makeOutOfBoundsEl(); + // Render directly in summary state with no prior transition; flag starts false. + renderGuard(el, { popoverViewState: "summary", sideOffset: undefined }); + + act(() => jest.advanceTimersByTime(200)); + + // No view-state transition → isViewStateTransitionRef stays false → + // safety timer uses skipVertical=false → both dx and dy applied + expect(el.style.translate).toBe(HORIZONTAL_AND_VERTICAL); + }); + + it("resets the flag so subsequent non-transition renders use full correction", () => { + const el = makeOutOfBoundsEl(); + const { rerender } = renderGuard(el, { popoverViewState: "expanded-page", sideOffset: -184 }); + + // First transition: expanded-page → summary (flag set to true, consumed & reset by useEffect) + act(() => { + rerender({ popoverViewState: "summary", sideOffset: -184 }); + rerender({ popoverViewState: "summary", sideOffset: undefined }); + }); + act(() => jest.advanceTimersByTime(200)); // fires safety timer, resets flag to false + + // Simulate sideOffset changing again with NO view-state change (e.g. window resize). + // Flag is now false. The useEffect does NOT re-run (popoverViewState unchanged). + // The layout effect for sideOffset runs clamp(skipVertical=false) directly — + // no safety timer involved. Clear the style first so the layout-effect write is observable. + el.style.translate = ""; + act(() => rerender({ popoverViewState: "summary", sideOffset: 8 })); + + // The sideOffset-only layout effect calls clamp(skipVertical=false) directly — + // no safety timer involved here. Assert both dx and dy are applied: + expect(el.style.translate).toBe(HORIZONTAL_AND_VERTICAL); + }); +}); diff --git a/src/react/hooks/useViewportBoundaryGuard.ts b/src/react/hooks/useViewportBoundaryGuard.ts index 33ac253f..e00ce7d7 100644 --- a/src/react/hooks/useViewportBoundaryGuard.ts +++ b/src/react/hooks/useViewportBoundaryGuard.ts @@ -74,10 +74,13 @@ export function useViewportBoundaryGuard( const isInitialOpen = prevViewStateRef.current === null; const isViewStateChange = !isInitialOpen && prevViewStateRef.current !== popoverViewState; prevViewStateRef.current = popoverViewState; - // Signal to the sibling useEffect (which runs after this) whether this cycle - // is a view-state transition. useEffect cannot compute this itself because - // prevViewStateRef has already been updated by this point. - isViewStateTransitionRef.current = isViewStateChange; + // Signal to the sibling useEffect whether this cycle is a view-state transition. + // Only set to true — never reset here. The useEffect consumes and resets the flag + // after reading so the next transition starts clean. This prevents a subsequent + // layout-effect run (e.g. triggered by sideOffset changing in the same flushSync + // batch when leaving expanded-page) from overwriting true with false before the + // useEffect has had a chance to read it. + if (isViewStateChange) isViewStateTransitionRef.current = true; if (isViewStateChange) { // Apply full vertical clamping immediately on view-state change. The @@ -128,6 +131,7 @@ export function useViewportBoundaryGuard( // isViewStateTransitionRef is written by the sibling useLayoutEffect on the same deps // and is readable here because layout effects flush before passive effects. const skipVerticalInTimer = isViewStateTransitionRef.current; + isViewStateTransitionRef.current = false; // consumed; reset for next transition const safetyTimer = setTimeout(() => { const current = popoverContentRef.current; if (current) clamp(current, skipVerticalInTimer, containerTopRef.current); From 96953a6cda1c59cd24c99869d5580364a90d015c Mon Sep 17 00:00:00 2001 From: Benson Date: Tue, 14 Apr 2026 13:32:45 -0600 Subject: [PATCH 09/22] feat(markdownToHtml): add claim card + MODEL meta item; remove audience preset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Header now renders an optional claim card (blockquote with eyebrow label) between the H1 and the meta strip when options.claim is provided. Inline markdown (bold/italic/code) is formatted; HTML is escaped; whitespace-only values are silently ignored. The AUDIENCE meta item is replaced by MODEL, shown when options.model is set. The audience feature (AUDIENCE_PRESETS, AudiencePreset, AUDIENCE_CONFIG, --audience CLI flag) is removed entirely — width and tier2Open are hardcoded to their general-preset values (960px / true), keeping the output unchanged for existing callers. Co-Authored-By: Claude Sonnet 4.6 --- src/__tests__/cliIntegration.test.ts | 25 ------ src/__tests__/markdownToHtml.test.ts | 111 +++++++++++++++++++-------- src/cli/commands.ts | 12 +-- src/cli/markdownToHtml.ts | 92 +++++++++++++++------- 4 files changed, 147 insertions(+), 93 deletions(-) diff --git a/src/__tests__/cliIntegration.test.ts b/src/__tests__/cliIntegration.test.ts index e9e759a8..a2bfa230 100644 --- a/src/__tests__/cliIntegration.test.ts +++ b/src/__tests__/cliIntegration.test.ts @@ -313,16 +313,6 @@ describe("verify command", () => { expect(r.stderr).toContain("--style"); }); - it("verify --markdown errors with invalid --audience", () => { - const mdFile = join(TEST_DIR, "audience-test.md"); - writeFileSync(mdFile, "test\n<<>>\n[]\n<<>>"); - const r = run(["verify", "--markdown", mdFile, "--audience", "casual"], { - env: { DEEPCITATION_API_KEY: "sk-dc-test12345678901234" }, - }); - expect(r.exitCode).toBe(1); - expect(r.stderr).toContain("--audience"); - }); - it("verify --html errors on nonexistent file", () => { const r = run(["verify", "--html", "/nonexistent.html"], { env: { DEEPCITATION_API_KEY: "sk-dc-test12345678901234" }, @@ -875,19 +865,4 @@ describe("verify --markdown output naming", () => { expect(r.stderr).toContain("1 citation"); }); - it("--style and --audience are forwarded through markdown pipeline", () => { - const mdDir = join(TEST_DIR, "md-style-fwd"); - mkdirSync(mdDir, { recursive: true }); - const mdFile = join(mdDir, "styled.md"); - writeFileSync( - mdFile, - `Claim [1].\n\n<<>>\n[{"n":1,"a":"att-1","r":"t","f":"claim","k":"Claim","p":"page_number_1_index_0","l":[1]}]\n<<>>`, - ); - - // plain style + executive audience should not error at parse stage - const r = run(["verify", "--markdown", mdFile, "--style", "plain", "--audience", "executive"], { - env: { DEEPCITATION_API_KEY: "sk-dc-test12345678901234" }, - }); - expect(r.stderr).toContain("1 citation"); - }); }); diff --git a/src/__tests__/markdownToHtml.test.ts b/src/__tests__/markdownToHtml.test.ts index 40e74dff..17b05e31 100644 --- a/src/__tests__/markdownToHtml.test.ts +++ b/src/__tests__/markdownToHtml.test.ts @@ -1,6 +1,5 @@ import { describe, expect, it } from "@jest/globals"; import { - AUDIENCE_PRESETS, buildCdnComparisonShowcaseHtml, markdownToHtml, wrapCitationMarkers, @@ -301,36 +300,6 @@ describe("buildCdnComparisonShowcaseHtml", () => { }); }); -// ── markdownToHtml — audience presets ───────────────────────────── - -describe("markdownToHtml audience presets", () => { - it("exports all five audience presets", () => { - expect(AUDIENCE_PRESETS).toEqual(["general", "executive", "technical", "legal", "medical"]); - }); - - it("uses narrower width for executive audience", () => { - const md = "# Report\n\n## Section\n\nContent."; - const executive = markdownToHtml(md, { style: "report", audience: "executive" }); - const general = markdownToHtml(md, { style: "report", audience: "general" }); - expect(executive).toContain("720px"); - expect(general).toContain("960px"); - }); - - it("collapses details for executive audience", () => { - const md = "# Report\n\n## Key Findings\n\nImportant.\n\n## Details\n\nMore."; - const executive = markdownToHtml(md, { style: "report", audience: "executive" }); - // executive tier2Open is false, so no "open" attribute - expect(executive).toContain("
"); - expect(executive).not.toContain("
"); - }); - - it("expands details for general audience", () => { - const md = "# Report\n\n## Key Findings\n\nImportant.\n\n## Details\n\nMore."; - const general = markdownToHtml(md, { style: "report", audience: "general" }); - expect(general).toContain("
"); - }); -}); - // ── markdownToHtml — report body structure ──────────────────────── describe("markdownToHtml report body (progressive disclosure)", () => { @@ -511,3 +480,83 @@ describe("markdownToHtml — §7 extraction-script structure regressions", () => expect(result).not.toMatch(/interest rate<\/strong>\s*/); }); }); + +// ── Header claim + model ────────────────────────────────────────── + +describe("markdownToHtml header — claim & model", () => { + it("renders a claim card when claim is provided", () => { + const result = markdownToHtml("# T\nbody", { claim: "Did revenue exceed $4B?" }); + expect(result).toContain('class="dc-claim"'); + expect(result).toContain(">CLAIM<"); + expect(result).toContain("Did revenue exceed $4B?"); + }); + + it("omits the claim card when claim is absent", () => { + const result = markdownToHtml("# T\nbody", {}); + expect(result).not.toContain('
CLAIM<"); + }); + + it("suppresses a whitespace-only claim", () => { + const result = markdownToHtml("# T\nbody", { claim: " " }); + expect(result).not.toContain('
CLAIM<"); + }); + + it("escapes HTML in the claim", () => { + const result = markdownToHtml("# T\nbody", { claim: "" }); + expect(result).toContain("<script>"); + expect(result).not.toContain("", idIdx); + if (closeIdx === -1) break; + const end = closeIdx + "".length; + // Consume trailing whitespace + let ws = end; + while ( + ws < result.length && + (result[ws] === " " || result[ws] === "\t" || result[ws] === "\n" || result[ws] === "\r") + ) + ws++; + result = result.slice(0, tagStart) + result.slice(ws); hadExisting = true; - // Reset lastIndex since we tested before replacing - pattern.lastIndex = 0; - result = result.replace(pattern, ""); + changed = true; } } + // Strip plain ", contentStart); + if (closeIdx === -1) break; + const content = result.slice(contentStart, closeIdx); + + // Init call: bounded check on first 80 chars (trimStart + literal prefix) + const trimmed = content.trimStart(); + const isInitCall = + trimmed.startsWith("window.DeepCitationPopover") && + /^window\.DeepCitationPopover\s*&&/.test(trimmed.slice(0, 80)); + // CDN bundle: linear regex applied only to bounded content string + const isCdnBundle = + content.includes("window.DeepCitationPopover") && /window\.DeepCitationPopover\s*=/.test(content); + + if (isInitCall || isCdnBundle) { + hadExisting = true; + const end = closeIdx + "".length; + let ws = end; + while ( + ws < result.length && + (result[ws] === " " || result[ws] === "\t" || result[ws] === "\n" || result[ws] === "\r") + ) + ws++; + result = result.slice(0, scriptStart) + result.slice(ws); + // Don't advance pos — content was removed at this position + } else { + pos = contentStart; + } } return { html: result, hadExisting }; @@ -223,18 +262,79 @@ export function autoFixDisplayLabels( verifications: Record, ): { html: string; log: string[] } { const log: string[] = []; - const elementRe = /<([a-zA-Z][a-zA-Z0-9]*)[^>]*\sdata-citation-key="([^"]+)"([^>]*)>([\s\S]*?)<\/\1>/g; - const fixedHtml = html.replace(elementRe, (fullMatch, _tag, hashedKey, rest, content) => { - // Skip if data-dc-display-label is already set on this element - if (/data-dc-display-label=/.test(rest) || /data-dc-display-label=/.test(fullMatch)) return fullMatch; + // Use string scanning instead of regex on the full HTML to avoid ReDoS. + // The pattern [^>]*\s...[^>]* applied to uncontrolled input is polynomial. + const attrMarker = ' data-citation-key="'; + const parts: string[] = []; + let lastEnd = 0; + let searchPos = 0; + + while (true) { + const attrIdx = html.indexOf(attrMarker, searchPos); + if (attrIdx === -1) break; + + // Find the enclosing tag's opening < (must not be a closing tag) + const tagStart = html.lastIndexOf("<", attrIdx); + if (tagStart === -1 || tagStart < lastEnd || html[tagStart + 1] === "/") { + searchPos = attrIdx + 1; + continue; + } + + // Find the end of the opening tag + const tagClose = html.indexOf(">", attrIdx); + if (tagClose === -1) { + searchPos = attrIdx + 1; + continue; + } + + // Extract tag name from between < and first whitespace or > + const afterAngle = tagStart + 1; + const tagHeaderSlice = html.slice(afterAngle, tagClose); + const tagNameEnd = tagHeaderSlice.search(/[\s/>]/); + const tagName = tagNameEnd >= 0 ? tagHeaderSlice.slice(0, tagNameEnd) : tagHeaderSlice; + if (!/^[a-zA-Z][a-zA-Z0-9]*$/.test(tagName)) { + searchPos = tagClose + 1; + continue; + } + + // Extract citation key (bounded between the attribute marker and the next quote) + const keyStart = attrIdx + attrMarker.length; + const keyEnd = html.indexOf('"', keyStart); + if (keyEnd === -1) { + searchPos = attrIdx + 1; + continue; + } + const hashedKey = html.slice(keyStart, keyEnd); + + // Skip if opening tag already has data-dc-display-label + const openingTag = html.slice(tagStart, tagClose + 1); + if (openingTag.includes("data-dc-display-label=")) { + searchPos = tagClose + 1; + continue; + } const sourceMatch = (verifications[hashedKey] as { citation?: { sourceMatch?: string } } | undefined)?.citation ?.sourceMatch; - if (!sourceMatch) return fullMatch; + if (!sourceMatch) { + searchPos = tagClose + 1; + continue; + } + + // Find closing tag (uses first occurrence — same behaviour as the original lazy regex) + const closeTag = ``; + const contentStart = tagClose + 1; + const closeIdx = html.indexOf(closeTag, contentStart); + if (closeIdx === -1) { + searchPos = tagClose + 1; + continue; + } + + const content = html.slice(contentStart, closeIdx); // Strip inner HTML tags to get approximate visible text. // Loop until stable to handle nested fragments like ipt>. - let visibleText = content as string; + // Applied to bounded content string — no ReDoS risk. + let visibleText = content; let prev: string; do { prev = visibleText; @@ -242,19 +342,34 @@ export function autoFixDisplayLabels( } while (visibleText !== prev); visibleText = visibleText.replace(/\s+/g, " ").trim(); - if (!visibleText || visibleText.length > 80) return fullMatch; - if (sourceMatch.toLowerCase().includes(visibleText.toLowerCase())) return fullMatch; + const matchEnd = closeIdx + closeTag.length; + + if (!visibleText || visibleText.length > 80 || sourceMatch.toLowerCase().includes(visibleText.toLowerCase())) { + searchPos = tagClose + 1; + continue; + } const escaped = visibleText.replace(/"/g, """); log.push( ` [${hashedKey.slice(0, 8)}…] claimText="${visibleText}" sourceMatch="${sourceMatch.slice(0, 60)}${sourceMatch.length > 60 ? "…" : ""}"`, ); - return fullMatch.replace( - `data-citation-key="${hashedKey}"`, - `data-citation-key="${hashedKey}" data-dc-display-label="${escaped}"`, + + // Emit unchanged HTML up to this element, then the patched element + parts.push(html.slice(lastEnd, tagStart)); + const fullMatch = html.slice(tagStart, matchEnd); + parts.push( + fullMatch.replace( + `data-citation-key="${hashedKey}"`, + `data-citation-key="${hashedKey}" data-dc-display-label="${escaped}"`, + ), ); - }); - return { html: fixedHtml, log }; + lastEnd = matchEnd; + searchPos = matchEnd; + } + + if (parts.length === 0) return { html, log }; + parts.push(html.slice(lastEnd)); + return { html: parts.join(""), log }; } /** Options for {@link injectCdnRuntime}. */ From f45308429e61054140becad0736ecf78d6d3da29 Mon Sep 17 00:00:00 2001 From: Benson Date: Tue, 14 Apr 2026 16:15:06 -0600 Subject: [PATCH 22/22] refactor(reportUtils): replace useless changed sentinel with while(true) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `changed` variable was set to false then unconditionally set back to true before the loop end — the while(changed) guard never gated anything. Replace with while(true) + the existing break exits, which makes control flow explicit. Co-Authored-By: Claude Sonnet 4.6 --- src/vanilla/reportUtils.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/vanilla/reportUtils.ts b/src/vanilla/reportUtils.ts index 2d5db017..232a0025 100644 --- a/src/vanilla/reportUtils.ts +++ b/src/vanilla/reportUtils.ts @@ -60,9 +60,7 @@ export function stripExistingInjection(html: string): { html: string; hadExistin // Regex with multiple [^>]* groups on uncontrolled input is polynomial. for (const id of ["dc-data", "dc-key-map"]) { const idMarker = `id="${id}"`; - let changed = true; - while (changed) { - changed = false; + while (true) { const idIdx = result.indexOf(idMarker); if (idIdx === -1) break; const tagStart = result.lastIndexOf("