From a0f4d5e94ca9447c99b67ddf72da3973be710daf Mon Sep 17 00:00:00 2001 From: Danh Doan Date: Sun, 10 May 2026 12:25:04 +0700 Subject: [PATCH 001/115] =?UTF-8?q?feat:=20[ENG-2737]=20HTML=20render=20la?= =?UTF-8?q?yer=20foundation=20=E2=80=94=20element=20registry=20+=20parser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lands the M1 keystone task: element vocabulary, per-element validators, data-driven registry, and HTML parser wrapper. No behavior change yet — no consumer wires the registry until T3 (curate path) and T4 (query path). Element vocabulary (5 elements): - bv-topic — root container; required `path`, optional importance/maturity/ recency/updatedat (lowercase per HTML5 normalization) - bv-rule — rule statements (severity, id) - bv-decision — decision records (id) - bv-bug — bug runbook entries (id, severity) - bv-fix — fix runbook entries (id) Architecture: - Data-driven `ELEMENT_REGISTRY` is the production-track guardrail. M2 vocabulary expansion is purely additive — new elements register via one entry + one schema/validator pair, no consumer code changes. - `makeAttributeValidator` factory eliminates per-element boilerplate; reduces M2's 12 additional element validators to one-line bindings. - HTML parser is parser-library-agnostic at the API boundary (`DocumentNode` / `ElementNode` / `TextNode`). parse5 used internally; consumers don't see parse5 types. - Attribute-case normalization is documented in `ElementNode` JSDoc: HTML5 lowercases attribute names at parse time, so schema keys must match parser output (lowercase), not source HTML (camelCase). Tests (106 new): - 5 per-element validator suites (61 cases total) — each ≥10 cases - Registry shape + wiring + metadata (14 cases) - HTML parser: basic parsing, malformed-input handling, walkElements, getInnerText, serializeHtml round-trip (22 cases) - End-to-end round-trip on a sample fixture covering all 5 elements (9 cases) Verification: - typecheck clean - lint 0 errors (242 warnings, all pre-existing baseline) - 7715 tests passing, 0 failing - build clean Adds parse5@^8.0.1 as a runtime dep — the W3C-spec HTML parser used by jsdom; types ship in-tree (no @types/parse5 needed). Justified vs hand- rolled parsing because robust HTML tag-soup handling is non-trivial and this is the standard library for the job. --- package-lock.json | 40 +++- package.json | 1 + .../core/domain/render/element-types.ts | 105 ++++++++++ .../infra/render/elements/bv-bug/schema.ts | 10 + .../infra/render/elements/bv-bug/validator.ts | 8 + .../render/elements/bv-decision/schema.ts | 10 + .../render/elements/bv-decision/validator.ts | 8 + .../infra/render/elements/bv-fix/schema.ts | 9 + .../infra/render/elements/bv-fix/validator.ts | 8 + .../infra/render/elements/bv-rule/schema.ts | 10 + .../render/elements/bv-rule/validator.ts | 8 + .../infra/render/elements/bv-topic/schema.ts | 35 ++++ .../render/elements/bv-topic/validator.ts | 9 + .../infra/render/elements/make-validator.ts | 43 ++++ src/server/infra/render/elements/registry.ts | 73 +++++++ src/server/infra/render/reader/html-parser.ts | 179 ++++++++++++++++ test/fixtures/render/sample-topic.html | 35 ++++ .../infra/render/elements/bv-bug.test.ts | 75 +++++++ .../infra/render/elements/bv-decision.test.ts | 80 +++++++ .../infra/render/elements/bv-fix.test.ts | 80 +++++++ .../infra/render/elements/bv-rule.test.ts | 74 +++++++ .../infra/render/elements/bv-topic.test.ts | 117 +++++++++++ .../infra/render/elements/registry.test.ts | 107 ++++++++++ .../infra/render/reader/html-parser.test.ts | 198 ++++++++++++++++++ .../render/sample-topic-roundtrip.test.ts | 132 ++++++++++++ 25 files changed, 1452 insertions(+), 2 deletions(-) create mode 100644 src/server/core/domain/render/element-types.ts create mode 100644 src/server/infra/render/elements/bv-bug/schema.ts create mode 100644 src/server/infra/render/elements/bv-bug/validator.ts create mode 100644 src/server/infra/render/elements/bv-decision/schema.ts create mode 100644 src/server/infra/render/elements/bv-decision/validator.ts create mode 100644 src/server/infra/render/elements/bv-fix/schema.ts create mode 100644 src/server/infra/render/elements/bv-fix/validator.ts create mode 100644 src/server/infra/render/elements/bv-rule/schema.ts create mode 100644 src/server/infra/render/elements/bv-rule/validator.ts create mode 100644 src/server/infra/render/elements/bv-topic/schema.ts create mode 100644 src/server/infra/render/elements/bv-topic/validator.ts create mode 100644 src/server/infra/render/elements/make-validator.ts create mode 100644 src/server/infra/render/elements/registry.ts create mode 100644 src/server/infra/render/reader/html-parser.ts create mode 100644 test/fixtures/render/sample-topic.html create mode 100644 test/unit/server/infra/render/elements/bv-bug.test.ts create mode 100644 test/unit/server/infra/render/elements/bv-decision.test.ts create mode 100644 test/unit/server/infra/render/elements/bv-fix.test.ts create mode 100644 test/unit/server/infra/render/elements/bv-rule.test.ts create mode 100644 test/unit/server/infra/render/elements/bv-topic.test.ts create mode 100644 test/unit/server/infra/render/elements/registry.test.ts create mode 100644 test/unit/server/infra/render/reader/html-parser.test.ts create mode 100644 test/unit/server/infra/render/sample-topic-roundtrip.test.ts diff --git a/package-lock.json b/package-lock.json index 549742619..5591c2561 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "byterover-cli", - "version": "3.11.0", + "version": "3.12.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "byterover-cli", - "version": "3.11.0", + "version": "3.12.0", "bundleDependencies": [ "@campfirein/brv-transport-client", "@campfirein/byterover-packages", @@ -80,6 +80,7 @@ "officeparser": "^6.0.4", "open": "^10.2.0", "openai": "^6.9.1", + "parse5": "^8.0.1", "proxy-agent": "^7.0.0", "react": "^19.2.1", "react-diff-viewer-continued": "^4.2.0", @@ -5911,6 +5912,7 @@ "os": [ "android" ], + "peer": true, "engines": { "node": ">= 10" }, @@ -5931,6 +5933,7 @@ "os": [ "darwin" ], + "peer": true, "engines": { "node": ">= 10" }, @@ -5951,6 +5954,7 @@ "os": [ "darwin" ], + "peer": true, "engines": { "node": ">= 10" }, @@ -5971,6 +5975,7 @@ "os": [ "linux" ], + "peer": true, "engines": { "node": ">= 10" }, @@ -5991,6 +5996,7 @@ "os": [ "linux" ], + "peer": true, "engines": { "node": ">= 10" }, @@ -6011,6 +6017,7 @@ "os": [ "linux" ], + "peer": true, "engines": { "node": ">= 10" }, @@ -6031,6 +6038,7 @@ "os": [ "linux" ], + "peer": true, "engines": { "node": ">= 10" }, @@ -6051,6 +6059,7 @@ "os": [ "linux" ], + "peer": true, "engines": { "node": ">= 10" }, @@ -6071,6 +6080,7 @@ "os": [ "linux" ], + "peer": true, "engines": { "node": ">= 10" }, @@ -6091,6 +6101,7 @@ "os": [ "win32" ], + "peer": true, "engines": { "node": ">= 10" }, @@ -6111,6 +6122,7 @@ "os": [ "win32" ], + "peer": true, "engines": { "node": ">= 10" }, @@ -12682,6 +12694,18 @@ "node": ">=10.13.0" } }, + "node_modules/entities": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-8.0.0.tgz", + "integrity": "sha512-zwfzJecQ/Uej6tusMqwAqU/6KL2XaB2VZ2Jg54Je6ahNBGNH6Ek6g3jjNCF0fG9EWQKGZNddNjU5F1ZQn/sBnA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=20.19.0" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/env-paths": { "version": "2.2.1", "resolved": "https://registry.npmjs.org/env-paths/-/env-paths-2.2.1.tgz", @@ -20773,6 +20797,18 @@ "dev": true, "license": "MIT" }, + "node_modules/parse5": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-8.0.1.tgz", + "integrity": "sha512-z1e/HMG90obSGeidlli3hj7cbocou0/wa5HacvI3ASx34PecNjNQeaHNo5WIZpWofN9kgkqV1q5YvXe3F0FoPw==", + "license": "MIT", + "dependencies": { + "entities": "^8.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, "node_modules/parseurl": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", diff --git a/package.json b/package.json index f75ace963..134a10434 100644 --- a/package.json +++ b/package.json @@ -73,6 +73,7 @@ "officeparser": "^6.0.4", "open": "^10.2.0", "openai": "^6.9.1", + "parse5": "^8.0.1", "proxy-agent": "^7.0.0", "react": "^19.2.1", "react-diff-viewer-continued": "^4.2.0", diff --git a/src/server/core/domain/render/element-types.ts b/src/server/core/domain/render/element-types.ts new file mode 100644 index 000000000..aefd154c9 --- /dev/null +++ b/src/server/core/domain/render/element-types.ts @@ -0,0 +1,105 @@ +/** + * Element type definitions for the M1 HTML render layer. + * + * This file is the type-only contract between: + * - the HTML parser (produces `ParsedNode` trees) + * - per-element validators (consume `ElementNode`s) + * - the element registry (catalogs `ElementSchema`s by `ElementName`) + * - downstream consumers (T3 curate writer, T4 query reader) + * + * See `features/html-memory-conversion/milestones/01-experiment/plan.md` + * for the M1 vocabulary scope (5 elements). M2 expansion is purely + * additive — adding an element name to `ELEMENT_NAMES` and a registry + * entry is sufficient; no consumer needs to be touched. + */ + +/** + * The five M1 element names. Adding to this list must be an additive + * operation; downstream consumers iterate the registry generically. + */ +export const ELEMENT_NAMES = [ + 'bv-topic', + 'bv-rule', + 'bv-decision', + 'bv-bug', + 'bv-fix', +] as const + +export type ElementName = typeof ELEMENT_NAMES[number] + +/** + * Normalized AST node produced by the HTML parser. Independent of any + * specific parser library so we can swap implementations without + * touching consumers. + */ +export type ParsedNode = DocumentNode | ElementNode | TextNode + +export type ElementNode = { + /** + * Attribute map. Values are always strings (HTML attribute semantics). + * + * NOTE on key case: per the HTML5 parsing spec, attribute names are + * lowercased during parsing — `updatedAt` in the source becomes + * `updatedat` in this map. Downstream consumers (T3 writer, T4 + * reader) MUST emit and look up attributes in lowercase. Schemas + * declared in per-element `schema.ts` files use lowercase to match. + */ + attributes: Readonly> + children: readonly ParsedNode[] + /** Tag name, lowercased. May or may not be a registered `ElementName`. */ + tagName: string + type: 'element' +} + +export type TextNode = { + text: string + type: 'text' +} + +export type DocumentNode = { + children: readonly ParsedNode[] + type: 'document' +} + +/** A single validation issue. Field is informational (often the attribute name). */ +export type ValidationError = { + field: string + message: string +} + +/** + * Validation outcome from a per-element validator. Discriminated union so + * consumers can branch without optional-undefined gymnastics. + */ +export type ValidationResult = + | {errors: readonly ValidationError[]; valid: false;} + | {valid: true} + +/** + * Allowed-children semantic hint. Informational only in M1 — the + * validator carries the enforcement; this is for documentation and the + * curate prompt template generator (T3). + */ +export type AllowedChildren = 'any' | 'block' | 'inline' | 'none' + +/** + * Per-element registry entry. The validator is the load-bearing field; + * everything else is metadata for the prompt template generator (T3) and + * the structural-axis index (T4). + */ +export type ElementSchema = { + /** Allowed-children semantic hint. Informational. */ + allowedChildren: AllowedChildren + /** Human-readable description for the curate prompt template generator. */ + description: string + name: ElementName + /** Optional attribute names. Informational; the validator enforces. */ + optionalAttributes: readonly string[] + /** Required attribute names. Informational; the validator enforces. */ + requiredAttributes: readonly string[] + /** Validate an `ElementNode`'s tag name + attributes. Light validation in M1 (per-attribute Zod schema); strict per ADR-007 §13 in M2. */ + validator: (node: ElementNode) => ValidationResult +} + +/** The full element registry — exactly one `ElementSchema` per `ElementName`. */ +export type ElementRegistry = Readonly> diff --git a/src/server/infra/render/elements/bv-bug/schema.ts b/src/server/infra/render/elements/bv-bug/schema.ts new file mode 100644 index 000000000..d7ec174d4 --- /dev/null +++ b/src/server/infra/render/elements/bv-bug/schema.ts @@ -0,0 +1,10 @@ +import {z} from 'zod' + +/** + * Zod schema for `` attributes. M1 light validation; passthrough + * tolerates unknown attributes (ADR-007 §13 strict validation is M2). + */ +export const BvBugAttributesSchema = z.object({ + id: z.string().min(1, {message: 'id must be non-empty if present'}).optional(), + severity: z.enum(['low', 'medium', 'high', 'critical']).optional(), +}).passthrough() diff --git a/src/server/infra/render/elements/bv-bug/validator.ts b/src/server/infra/render/elements/bv-bug/validator.ts new file mode 100644 index 000000000..40f8e9c65 --- /dev/null +++ b/src/server/infra/render/elements/bv-bug/validator.ts @@ -0,0 +1,8 @@ +import {makeAttributeValidator} from '../make-validator.js' +import {BvBugAttributesSchema} from './schema.js' + +/** + * Validate a `` element node. M1 light validation; strict per + * ADR-007 §13 is M2 work. + */ +export const validateBvBug = makeAttributeValidator('bv-bug', BvBugAttributesSchema) diff --git a/src/server/infra/render/elements/bv-decision/schema.ts b/src/server/infra/render/elements/bv-decision/schema.ts new file mode 100644 index 000000000..8fdef30ab --- /dev/null +++ b/src/server/infra/render/elements/bv-decision/schema.ts @@ -0,0 +1,10 @@ +import {z} from 'zod' + +/** + * Zod schema for `` attributes. M1 light validation; + * `passthrough` tolerates unknown attributes (ADR-007 §13 strict + * validation is M2). + */ +export const BvDecisionAttributesSchema = z.object({ + id: z.string().min(1, {message: 'id must be non-empty if present'}).optional(), +}).passthrough() diff --git a/src/server/infra/render/elements/bv-decision/validator.ts b/src/server/infra/render/elements/bv-decision/validator.ts new file mode 100644 index 000000000..e4360630c --- /dev/null +++ b/src/server/infra/render/elements/bv-decision/validator.ts @@ -0,0 +1,8 @@ +import {makeAttributeValidator} from '../make-validator.js' +import {BvDecisionAttributesSchema} from './schema.js' + +/** + * Validate a `` element node. M1 light validation; strict + * per ADR-007 §13 is M2 work. + */ +export const validateBvDecision = makeAttributeValidator('bv-decision', BvDecisionAttributesSchema) diff --git a/src/server/infra/render/elements/bv-fix/schema.ts b/src/server/infra/render/elements/bv-fix/schema.ts new file mode 100644 index 000000000..84dbd5b1f --- /dev/null +++ b/src/server/infra/render/elements/bv-fix/schema.ts @@ -0,0 +1,9 @@ +import {z} from 'zod' + +/** + * Zod schema for `` attributes. M1 light validation; passthrough + * tolerates unknown attributes (ADR-007 §13 strict validation is M2). + */ +export const BvFixAttributesSchema = z.object({ + id: z.string().min(1, {message: 'id must be non-empty if present'}).optional(), +}).passthrough() diff --git a/src/server/infra/render/elements/bv-fix/validator.ts b/src/server/infra/render/elements/bv-fix/validator.ts new file mode 100644 index 000000000..bc3dff211 --- /dev/null +++ b/src/server/infra/render/elements/bv-fix/validator.ts @@ -0,0 +1,8 @@ +import {makeAttributeValidator} from '../make-validator.js' +import {BvFixAttributesSchema} from './schema.js' + +/** + * Validate a `` element node. M1 light validation; strict per + * ADR-007 §13 is M2 work. + */ +export const validateBvFix = makeAttributeValidator('bv-fix', BvFixAttributesSchema) diff --git a/src/server/infra/render/elements/bv-rule/schema.ts b/src/server/infra/render/elements/bv-rule/schema.ts new file mode 100644 index 000000000..1595e7cc2 --- /dev/null +++ b/src/server/infra/render/elements/bv-rule/schema.ts @@ -0,0 +1,10 @@ +import {z} from 'zod' + +/** + * Zod schema for `` attributes. M1 light validation; passthrough + * tolerates unknown attributes (ADR-007 §13 strict validation is M2). + */ +export const BvRuleAttributesSchema = z.object({ + id: z.string().min(1, {message: 'id must be non-empty if present'}).optional(), + severity: z.enum(['info', 'must', 'should']).optional(), +}).passthrough() diff --git a/src/server/infra/render/elements/bv-rule/validator.ts b/src/server/infra/render/elements/bv-rule/validator.ts new file mode 100644 index 000000000..21600a4a3 --- /dev/null +++ b/src/server/infra/render/elements/bv-rule/validator.ts @@ -0,0 +1,8 @@ +import {makeAttributeValidator} from '../make-validator.js' +import {BvRuleAttributesSchema} from './schema.js' + +/** + * Validate a `` element node. M1 light validation; strict per + * ADR-007 §13 is M2 work. + */ +export const validateBvRule = makeAttributeValidator('bv-rule', BvRuleAttributesSchema) diff --git a/src/server/infra/render/elements/bv-topic/schema.ts b/src/server/infra/render/elements/bv-topic/schema.ts new file mode 100644 index 000000000..f04099c63 --- /dev/null +++ b/src/server/infra/render/elements/bv-topic/schema.ts @@ -0,0 +1,35 @@ +import {z} from 'zod' + +/** + * Zod schema for `` attributes. + * + * HTML attributes arrive as strings. Numeric and enum constraints are + * expressed via `z.coerce.number()` (with refinements) and `z.enum`. + * + * `passthrough` is intentional: M1 tolerates unknown attributes + * (warn-only behaviour). Strict validation per ADR-007 §13 is M2 work. + */ +export const BvTopicAttributesSchema = z.object({ + importance: z + .string() + .regex(/^\d+$/, {message: 'importance must be an integer string "0".."100"'}) + .refine((v) => { + const n = Number(v) + return n >= 0 && n <= 100 + }, {message: 'importance must be in [0, 100]'}) + .optional(), + maturity: z.enum(['draft', 'validated', 'core']).optional(), + path: z.string().min(1, {message: 'path is required and must be non-empty'}), + recency: z + .string() + .regex(/^[\d.]+$/, {message: 'recency must be a numeric string'}) + .refine((v) => { + const n = Number(v) + return Number.isFinite(n) && n >= 0 && n <= 1 + }, {message: 'recency must be in [0, 1]'}) + .optional(), + // Lowercase per HTML5 attribute-name normalization (parse5 lowercases + // `updatedAt="..."` to `updatedat`; schema keys must match the parser + // output, not the source HTML). See element-types.ts attribute-case note. + updatedat: z.string().datetime({message: 'updatedat must be ISO-8601 datetime'}).optional(), +}).passthrough() diff --git a/src/server/infra/render/elements/bv-topic/validator.ts b/src/server/infra/render/elements/bv-topic/validator.ts new file mode 100644 index 000000000..387c16e3c --- /dev/null +++ b/src/server/infra/render/elements/bv-topic/validator.ts @@ -0,0 +1,9 @@ +import {makeAttributeValidator} from '../make-validator.js' +import {BvTopicAttributesSchema} from './schema.js' + +/** + * Validate a `` element node. Light validation per M1 + * (per-attribute Zod schema in `./schema.ts`); strict per ADR-007 §13 + * is M2 work. + */ +export const validateBvTopic = makeAttributeValidator('bv-topic', BvTopicAttributesSchema) diff --git a/src/server/infra/render/elements/make-validator.ts b/src/server/infra/render/elements/make-validator.ts new file mode 100644 index 000000000..4806f7e2b --- /dev/null +++ b/src/server/infra/render/elements/make-validator.ts @@ -0,0 +1,43 @@ +import type {z} from 'zod' + +import type {ElementNode, ValidationError, ValidationResult} from '../../../core/domain/render/element-types.js' + +/** + * Build an element validator from a tag name and a Zod attribute schema. + * + * Every M1 element validator follows the same shape: + * 1. Reject if `node.tagName` doesn't match the expected tag. + * 2. Run the per-element Zod schema against `node.attributes`. + * 3. Map any Zod issues to `ValidationError` records. + * + * Centralizing the shape here means M2's vocabulary expansion (12 more + * elements per Andy's proposal §11) is purely additive — each new + * element is a `schema.ts` + a one-line `validator.ts` binding. No + * branching logic per element type until/unless an element legitimately + * needs custom validation beyond attributes. + */ +export function makeAttributeValidator( + tagName: string, + schema: z.ZodTypeAny, +): (node: ElementNode) => ValidationResult { + return (node) => { + if (node.tagName !== tagName) { + const errors: ValidationError[] = [{ + field: 'tagName', + message: `expected tagName "${tagName}", got "${node.tagName}"`, + }] + return {errors, valid: false} + } + + const parsed = schema.safeParse(node.attributes) + if (!parsed.success) { + const errors: ValidationError[] = parsed.error.issues.map((issue) => ({ + field: issue.path.join('.') || 'attributes', + message: issue.message, + })) + return {errors, valid: false} + } + + return {valid: true} + } +} diff --git a/src/server/infra/render/elements/registry.ts b/src/server/infra/render/elements/registry.ts new file mode 100644 index 000000000..5dc4ab88b --- /dev/null +++ b/src/server/infra/render/elements/registry.ts @@ -0,0 +1,73 @@ +import type {ElementRegistry} from '../../../core/domain/render/element-types.js' + +import {validateBvBug} from './bv-bug/validator.js' +import {validateBvDecision} from './bv-decision/validator.js' +import {validateBvFix} from './bv-fix/validator.js' +import {validateBvRule} from './bv-rule/validator.js' +import {validateBvTopic} from './bv-topic/validator.js' + +/** + * The M1 element registry — single source of truth for the 5-element + * vocabulary. M2 vocabulary expansion (12 more elements per Andy's + * proposal §11) is **purely additive**: add a new entry here and a new + * `/{schema,validator}.ts` pair under `elements/`. No consumer + * (writer, reader, indexer, prompt template generator) needs to be + * touched — they all walk this registry generically. + * + * The data-driven shape is the production-track guardrail. If you find + * yourself writing `switch (elementName)` anywhere in the render layer, + * push back: that pattern doesn't scale to M2's vocabulary expansion. + */ +export const ELEMENT_REGISTRY: ElementRegistry = { + 'bv-bug': { + allowedChildren: 'block', + description: + 'A bug runbook entry (symptom, root cause, fix). Optional `id` and `severity` ' + + '(low|medium|high|critical). Typically paired with a sibling ``.', + name: 'bv-bug', + optionalAttributes: ['id', 'severity'], + requiredAttributes: [], + validator: validateBvBug, + }, + 'bv-decision': { + allowedChildren: 'block', + description: + 'A decision record (with rationale and evidence). Optional `id` for ' + + 'cross-referencing.', + name: 'bv-decision', + optionalAttributes: ['id'], + requiredAttributes: [], + validator: validateBvDecision, + }, + 'bv-fix': { + allowedChildren: 'block', + description: + 'A fix runbook entry (steps to resolve a bug). Optional `id`. Typically the ' + + 'sibling of a ``.', + name: 'bv-fix', + optionalAttributes: ['id'], + requiredAttributes: [], + validator: validateBvFix, + }, + 'bv-rule': { + allowedChildren: 'inline', + description: + 'A rule statement the agent should follow. Optional `severity` (info|must|should) ' + + 'and `id` for cross-referencing.', + name: 'bv-rule', + optionalAttributes: ['severity', 'id'], + requiredAttributes: [], + validator: validateBvRule, + }, + 'bv-topic': { + allowedChildren: 'any', + description: + 'Root container per topic file. Carries file-level metadata as attributes ' + + '(importance, maturity, recency, updatedat). Required: `path`. Note: ' + + 'attribute names MUST be lowercase — HTML5 normalizes them at parse time.', + name: 'bv-topic', + optionalAttributes: ['importance', 'maturity', 'recency', 'updatedat'], + requiredAttributes: ['path'], + validator: validateBvTopic, + }, +} diff --git a/src/server/infra/render/reader/html-parser.ts b/src/server/infra/render/reader/html-parser.ts new file mode 100644 index 000000000..2ceafd133 --- /dev/null +++ b/src/server/infra/render/reader/html-parser.ts @@ -0,0 +1,179 @@ +import {type DefaultTreeAdapterMap, parseFragment, serialize} from 'parse5' + +import type {DocumentNode, ElementNode, ParsedNode} from '../../../core/domain/render/element-types.js' + +/** + * HTML parser wrapper around parse5. + * + * Produces a normalized AST (`DocumentNode` / `ElementNode` / + * `TextNode`) independent of parse5's internal types so consumers + * (T4 query reader, T3 round-trip validation, future indexers) can + * iterate without coupling to a specific HTML library. + * + * Why parse5 — it's the W3C-spec parser used by jsdom; widely vetted; + * forgiving on malformed input by design (a feature for migration + * tooling, neutral for M1 light validation). + * + * v1 parses everything as a fragment (no ``/``/`` + * wrapper required). M2 may add document-level parsing if topic files + * grow document-shaped headers; the wrapper gives us room. + */ + +type Parse5DocumentFragment = DefaultTreeAdapterMap['documentFragment'] +type Parse5Node = DefaultTreeAdapterMap['node'] +type Parse5Element = DefaultTreeAdapterMap['element'] +type Parse5TextNode = DefaultTreeAdapterMap['textNode'] + +/** + * Parse an HTML string into a normalized `DocumentNode`. parse5's + * forgiving mode means malformed input returns a best-effort tree + * rather than throwing. + */ +export function parseHtml(html: string): DocumentNode { + const fragment: Parse5DocumentFragment = parseFragment(html) + const children = fragment.childNodes + .map((c) => convertNode(c)) + .filter((n): n is ParsedNode => n !== undefined) + return {children, type: 'document'} +} + +/** + * Walk a parsed tree depth-first, returning every element node in + * document order. Used by element-axis indexing (T4) and by validators + * that need to find typed elements anywhere in the tree. + */ +export function walkElements(root: ParsedNode): ElementNode[] { + const out: ElementNode[] = [] + walk(root, out) + return out +} + +function walk(node: ParsedNode, out: ElementNode[]): void { + if (node.type === 'element') out.push(node) + if (node.type === 'element' || node.type === 'document') { + for (const child of node.children) walk(child, out) + } +} + +/** + * Concatenate all text-node descendants of an element into a single + * string. Used to extract BM25-ready text content from typed elements + * (T4). HTML entities are already decoded by parse5, so the output is + * usable verbatim by the tokenizer. + */ +export function getInnerText(node: ParsedNode): string { + if (node.type === 'text') return node.text + if (node.type === 'element' || node.type === 'document') { + return node.children.map((c) => getInnerText(c)).join('') + } + + return '' +} + +/** + * Serialize a normalized tree back to HTML. Used for round-trip + * validation in tests and for the writer's emit path (T3). + * + * Note: serialization is semantically equivalent, not byte-equivalent. + * Whitespace, attribute quoting, and self-closing tag style may + * normalize. + */ +export function serializeHtml(root: DocumentNode): string { + // Convert our normalized tree back to parse5's shape, then call serialize. + const fragment = toParse5Fragment(root) + return serialize(fragment) +} + +// ----- internal: parse5 → normalized ----- + +function convertNode(node: Parse5Node): ParsedNode | undefined { + if (isTextNode(node)) { + return {text: node.value, type: 'text'} + } + + if (isElementNode(node)) { + const attributes: Record = {} + for (const attr of node.attrs) { + attributes[attr.name] = attr.value + } + + const children = node.childNodes + .map((c) => convertNode(c)) + .filter((c): c is ParsedNode => c !== undefined) + + return { + attributes, + children, + tagName: node.tagName.toLowerCase(), + type: 'element', + } + } + + // Skip comments, doctype, processing instructions, etc. for M1. + return undefined +} + +function isTextNode(node: Parse5Node): node is Parse5TextNode { + return node.nodeName === '#text' +} + +function isElementNode(node: Parse5Node): node is Parse5Element { + return 'tagName' in node && 'attrs' in node && 'childNodes' in node +} + +// ----- internal: normalized → parse5 (for serialize) ----- + +type MutableParse5Element = { + attrs: Array<{name: string; value: string}> + childNodes: Array + namespaceURI: string + nodeName: string + parentNode: null + tagName: string +} + +type MutableParse5Text = { + nodeName: '#text' + parentNode: null + value: string +} + +type MutableParse5Fragment = { + childNodes: Array + nodeName: '#document-fragment' +} + +const HTML_NS = 'http://www.w3.org/1999/xhtml' + +function toParse5Fragment(doc: DocumentNode): Parse5DocumentFragment { + const fragment: MutableParse5Fragment = { + childNodes: doc.children.map((c) => toParse5Node(c)).filter((n): n is MutableParse5Element | MutableParse5Text => n !== undefined), + nodeName: '#document-fragment', + } + // parse5's serialize accepts any object with nodeName + childNodes; type + // assertion is the contained boundary between our normalized tree and + // parse5's expected input. + return fragment as unknown as Parse5DocumentFragment +} + +function toParse5Node(node: ParsedNode): MutableParse5Element | MutableParse5Text | undefined { + if (node.type === 'text') { + return {nodeName: '#text', parentNode: null, value: node.text} + } + + if (node.type === 'element') { + return { + attrs: Object.entries(node.attributes).map(([name, value]) => ({name, value})), + childNodes: node.children + .map((c) => toParse5Node(c)) + .filter((n): n is MutableParse5Element | MutableParse5Text => n !== undefined), + namespaceURI: HTML_NS, + nodeName: node.tagName, + parentNode: null, + tagName: node.tagName, + } + } + + // DocumentNode shouldn't appear inside a tree (it's the root only) + return undefined +} diff --git a/test/fixtures/render/sample-topic.html b/test/fixtures/render/sample-topic.html new file mode 100644 index 000000000..90346f4ac --- /dev/null +++ b/test/fixtures/render/sample-topic.html @@ -0,0 +1,35 @@ + +

Authentication and Authorization

+ +

Project-wide rules and decisions for the auth subsystem. JWTs are signed with RS256; refresh tokens are sliding-expiry with a 24-hour window.

+ + + Failed token validation MUST return 401 Unauthorized — never 403, never 500. + + + + The RS256 signing key SHOULD rotate every 30 days. Old keys remain in the JWKS until tokens signed with them expire. + + + +

Use RS256 (asymmetric), not HS256 (shared-secret).

+

Rationale: public-key validation lets downstream services verify tokens without holding the signing secret. Scales across services without rotating shared secrets.

+
+ + +

Symptom: Logged-out users could still access protected routes for up to 5 minutes.

+

Root cause: Refresh-token revocation was being read from a stale cache; the cache TTL was longer than the access-token expiry.

+
+ + +

On logout, evict the user's refresh-token entry from the revocation cache synchronously before responding to the client. The cache is now read-through with a 30-second TTL; revocations bypass it entirely.

+
    +
  • Updated logout-handler.ts:42 to call revocationCache.invalidate(userId) before response.send().
  • +
  • Added integration test logout-revocation.test.ts covering the post-logout 401 path.
  • +
+
+ + + Document access-token expiry on every public API endpoint. + +
diff --git a/test/unit/server/infra/render/elements/bv-bug.test.ts b/test/unit/server/infra/render/elements/bv-bug.test.ts new file mode 100644 index 000000000..1df4d6a38 --- /dev/null +++ b/test/unit/server/infra/render/elements/bv-bug.test.ts @@ -0,0 +1,75 @@ +/** + * bv-bug validator tests. + * + * A bug runbook entry. Optional attributes: + * - `id` — optional; non-empty string if present + * - `severity` — optional; one of {"low","medium","high","critical"} + */ + +import {expect} from 'chai' + +import type {ElementNode} from '../../../../../../src/server/core/domain/render/element-types.js' + +import {validateBvBug} from '../../../../../../src/server/infra/render/elements/bv-bug/validator.js' + +function makeNode(attributes: Record, tagName = 'bv-bug'): ElementNode { + return {attributes, children: [], tagName, type: 'element'} +} + +describe('bv-bug validator', () => { + describe('valid', () => { + it('accepts an empty attribute set (all optional)', () => { + expect(validateBvBug(makeNode({})).valid).to.equal(true) + }) + + it('accepts id only', () => { + expect(validateBvBug(makeNode({id: 'auth-leak-2026-04'})).valid).to.equal(true) + }) + + it('accepts severity only ("critical")', () => { + expect(validateBvBug(makeNode({severity: 'critical'})).valid).to.equal(true) + }) + + it('accepts severity "low"', () => { + expect(validateBvBug(makeNode({severity: 'low'})).valid).to.equal(true) + }) + + it('accepts severity "medium"', () => { + expect(validateBvBug(makeNode({severity: 'medium'})).valid).to.equal(true) + }) + + it('accepts severity "high"', () => { + expect(validateBvBug(makeNode({severity: 'high'})).valid).to.equal(true) + }) + + it('accepts id + severity together', () => { + expect(validateBvBug(makeNode({id: 'b1', severity: 'high'})).valid).to.equal(true) + }) + + it('tolerates unknown attributes (warn-only — M1 light validation)', () => { + expect(validateBvBug(makeNode({severity: 'high', someFutureAttr: 'x'})).valid).to.equal(true) + }) + }) + + describe('invalid', () => { + it('rejects empty id', () => { + expect(validateBvBug(makeNode({id: ''})).valid).to.equal(false) + }) + + it('rejects unknown severity value', () => { + expect(validateBvBug(makeNode({severity: 'minor'})).valid).to.equal(false) + }) + + it('rejects severity in wrong case (case-sensitive enum)', () => { + expect(validateBvBug(makeNode({severity: 'HIGH'})).valid).to.equal(false) + }) + + it('rejects wrong tag name', () => { + const result = validateBvBug(makeNode({}, 'bv-fix')) + expect(result.valid).to.equal(false) + if (!result.valid) { + expect(result.errors.some((e) => e.field === 'tagName')).to.equal(true) + } + }) + }) +}) diff --git a/test/unit/server/infra/render/elements/bv-decision.test.ts b/test/unit/server/infra/render/elements/bv-decision.test.ts new file mode 100644 index 000000000..ecf44854c --- /dev/null +++ b/test/unit/server/infra/render/elements/bv-decision.test.ts @@ -0,0 +1,80 @@ +/** + * bv-decision validator tests. + * + * A decision record. Optional attributes: + * - `id` — optional; non-empty string if present + */ + +import {expect} from 'chai' + +import type {ElementNode} from '../../../../../../src/server/core/domain/render/element-types.js' + +import {validateBvDecision} from '../../../../../../src/server/infra/render/elements/bv-decision/validator.js' + +function makeNode(attributes: Record, tagName = 'bv-decision'): ElementNode { + return {attributes, children: [], tagName, type: 'element'} +} + +describe('bv-decision validator', () => { + describe('valid', () => { + it('accepts an empty attribute set (all optional)', () => { + expect(validateBvDecision(makeNode({})).valid).to.equal(true) + }) + + it('accepts id only', () => { + expect(validateBvDecision(makeNode({id: 'rs256-over-hs256'})).valid).to.equal(true) + }) + + it('tolerates unknown attributes (warn-only — M1 light validation)', () => { + expect(validateBvDecision(makeNode({id: 'd1', someFutureAttr: 'x'})).valid).to.equal(true) + }) + + it('accepts ids with mixed casing and dashes (no enforced format in M1)', () => { + expect(validateBvDecision(makeNode({id: 'D-001-AcceptRS256'})).valid).to.equal(true) + }) + + it('accepts ids with numbers', () => { + expect(validateBvDecision(makeNode({id: 'd-2026-04-27'})).valid).to.equal(true) + }) + }) + + describe('invalid', () => { + it('rejects empty id', () => { + expect(validateBvDecision(makeNode({id: ''})).valid).to.equal(false) + }) + + it('rejects wrong tag name', () => { + const result = validateBvDecision(makeNode({}, 'bv-rule')) + expect(result.valid).to.equal(false) + if (!result.valid) { + expect(result.errors.some((e) => e.field === 'tagName')).to.equal(true) + } + }) + }) + + describe('error reporting', () => { + it('returns a populated errors list on failure', () => { + const result = validateBvDecision(makeNode({id: ''})) + expect(result.valid).to.equal(false) + if (!result.valid) { + expect(result.errors).to.have.lengthOf.greaterThan(0) + } + }) + + it('reports the failing field name', () => { + const result = validateBvDecision(makeNode({id: ''})) + expect(result.valid).to.equal(false) + if (!result.valid) { + expect(result.errors[0].field).to.equal('id') + } + }) + + it('reports a non-empty error message', () => { + const result = validateBvDecision(makeNode({}, 'wrong-tag')) + expect(result.valid).to.equal(false) + if (!result.valid) { + expect(result.errors[0].message).to.include('tagName') + } + }) + }) +}) diff --git a/test/unit/server/infra/render/elements/bv-fix.test.ts b/test/unit/server/infra/render/elements/bv-fix.test.ts new file mode 100644 index 000000000..aa64da620 --- /dev/null +++ b/test/unit/server/infra/render/elements/bv-fix.test.ts @@ -0,0 +1,80 @@ +/** + * bv-fix validator tests. + * + * A fix runbook entry. Optional attributes: + * - `id` — optional; non-empty string if present + */ + +import {expect} from 'chai' + +import type {ElementNode} from '../../../../../../src/server/core/domain/render/element-types.js' + +import {validateBvFix} from '../../../../../../src/server/infra/render/elements/bv-fix/validator.js' + +function makeNode(attributes: Record, tagName = 'bv-fix'): ElementNode { + return {attributes, children: [], tagName, type: 'element'} +} + +describe('bv-fix validator', () => { + describe('valid', () => { + it('accepts an empty attribute set (all optional)', () => { + expect(validateBvFix(makeNode({})).valid).to.equal(true) + }) + + it('accepts id only', () => { + expect(validateBvFix(makeNode({id: 'fix-jwt-rotation-2026-04-30'})).valid).to.equal(true) + }) + + it('tolerates unknown attributes (warn-only — M1 light validation)', () => { + expect(validateBvFix(makeNode({id: 'f1', someFutureAttr: 'x'})).valid).to.equal(true) + }) + + it('accepts ids with mixed casing and dashes', () => { + expect(validateBvFix(makeNode({id: 'F-001-RotateJWT'})).valid).to.equal(true) + }) + + it('accepts ids with numbers', () => { + expect(validateBvFix(makeNode({id: 'f-2026-04-30'})).valid).to.equal(true) + }) + }) + + describe('invalid', () => { + it('rejects empty id', () => { + expect(validateBvFix(makeNode({id: ''})).valid).to.equal(false) + }) + + it('rejects wrong tag name', () => { + const result = validateBvFix(makeNode({}, 'bv-bug')) + expect(result.valid).to.equal(false) + if (!result.valid) { + expect(result.errors.some((e) => e.field === 'tagName')).to.equal(true) + } + }) + }) + + describe('error reporting', () => { + it('returns at least one error on failure', () => { + const result = validateBvFix(makeNode({id: ''})) + expect(result.valid).to.equal(false) + if (!result.valid) { + expect(result.errors).to.have.lengthOf.greaterThan(0) + } + }) + + it('reports the id field name', () => { + const result = validateBvFix(makeNode({id: ''})) + expect(result.valid).to.equal(false) + if (!result.valid) { + expect(result.errors[0].field).to.equal('id') + } + }) + + it('reports a non-empty error message for tag mismatch', () => { + const result = validateBvFix(makeNode({}, 'wrong-tag')) + expect(result.valid).to.equal(false) + if (!result.valid) { + expect(result.errors[0].message).to.include('tagName') + } + }) + }) +}) diff --git a/test/unit/server/infra/render/elements/bv-rule.test.ts b/test/unit/server/infra/render/elements/bv-rule.test.ts new file mode 100644 index 000000000..d0a1edcae --- /dev/null +++ b/test/unit/server/infra/render/elements/bv-rule.test.ts @@ -0,0 +1,74 @@ +/** + * bv-rule validator tests. + * + * A rule statement. Optional attributes: + * - `severity` — optional; one of {"info","must","should"} + * - `id` — optional; non-empty string if present + */ + +import {expect} from 'chai' + +import type {ElementNode} from '../../../../../../src/server/core/domain/render/element-types.js' + +import {validateBvRule} from '../../../../../../src/server/infra/render/elements/bv-rule/validator.js' + +function makeNode(attributes: Record, tagName = 'bv-rule'): ElementNode { + return {attributes, children: [], tagName, type: 'element'} +} + +describe('bv-rule validator', () => { + describe('valid', () => { + it('accepts an empty attribute set (all optional)', () => { + expect(validateBvRule(makeNode({})).valid).to.equal(true) + }) + + it('accepts severity="must"', () => { + expect(validateBvRule(makeNode({severity: 'must'})).valid).to.equal(true) + }) + + it('accepts severity="info"', () => { + expect(validateBvRule(makeNode({severity: 'info'})).valid).to.equal(true) + }) + + it('accepts severity="should"', () => { + expect(validateBvRule(makeNode({severity: 'should'})).valid).to.equal(true) + }) + + it('accepts id only', () => { + expect(validateBvRule(makeNode({id: 'r-jwt-401'})).valid).to.equal(true) + }) + + it('accepts severity + id together', () => { + expect(validateBvRule(makeNode({id: 'r-jwt-401', severity: 'must'})).valid).to.equal(true) + }) + + it('tolerates unknown attributes (warn-only — M1 light validation)', () => { + expect(validateBvRule(makeNode({severity: 'must', someFutureAttr: 'x'})).valid).to.equal(true) + }) + }) + + describe('invalid', () => { + it('rejects unknown severity value', () => { + const result = validateBvRule(makeNode({severity: 'critical'})) + expect(result.valid).to.equal(false) + }) + + it('rejects empty id', () => { + const result = validateBvRule(makeNode({id: ''})) + expect(result.valid).to.equal(false) + }) + + it('rejects severity in wrong case (case-sensitive enum)', () => { + const result = validateBvRule(makeNode({severity: 'MUST'})) + expect(result.valid).to.equal(false) + }) + + it('rejects wrong tag name', () => { + const result = validateBvRule(makeNode({}, 'bv-decision')) + expect(result.valid).to.equal(false) + if (!result.valid) { + expect(result.errors.some((e) => e.field === 'tagName')).to.equal(true) + } + }) + }) +}) diff --git a/test/unit/server/infra/render/elements/bv-topic.test.ts b/test/unit/server/infra/render/elements/bv-topic.test.ts new file mode 100644 index 000000000..1b8ef29d0 --- /dev/null +++ b/test/unit/server/infra/render/elements/bv-topic.test.ts @@ -0,0 +1,117 @@ +/** + * bv-topic validator tests. + * + * The root container element. Carries file-level metadata as attributes: + * - `path` — required; non-empty string identifying the topic + * - `importance` — optional; integer string "0".."100" + * - `maturity` — optional; one of {"draft","validated","core"} + * - `recency` — optional; numeric string "0".."1" + * - `updatedat` — optional; ISO-8601 datetime + * + * Light validation per M1 (ADR-007 §13 strict validation is M2). + * Unknown attributes are tolerated (warn-only behaviour); test confirms + * tolerance, not absence. + */ + +import {expect} from 'chai' + +import type {ElementNode} from '../../../../../../src/server/core/domain/render/element-types.js' + +import {validateBvTopic} from '../../../../../../src/server/infra/render/elements/bv-topic/validator.js' + +function makeNode(attributes: Record, tagName = 'bv-topic'): ElementNode { + return {attributes, children: [], tagName, type: 'element'} +} + +describe('bv-topic validator', () => { + describe('valid', () => { + it('accepts the minimum: only `path` set', () => { + const result = validateBvTopic(makeNode({path: 'security/auth'})) + expect(result.valid).to.equal(true) + }) + + it('accepts all optional attributes set together', () => { + const result = validateBvTopic(makeNode({ + importance: '89', + maturity: 'core', + path: 'security/auth', + recency: '0.97', + updatedat: '2026-04-27T08:17:42Z', + })) + expect(result.valid).to.equal(true) + }) + + it('tolerates unknown attributes (warn-only — M1 light validation)', () => { + const result = validateBvTopic(makeNode({path: 'x', someFutureAttr: 'whatever'})) + expect(result.valid).to.equal(true) + }) + + it('accepts importance = "0"', () => { + const result = validateBvTopic(makeNode({importance: '0', path: 'x'})) + expect(result.valid).to.equal(true) + }) + + it('accepts importance = "100"', () => { + const result = validateBvTopic(makeNode({importance: '100', path: 'x'})) + expect(result.valid).to.equal(true) + }) + }) + + describe('invalid', () => { + it('rejects missing `path`', () => { + const result = validateBvTopic(makeNode({})) + expect(result.valid).to.equal(false) + if (!result.valid) { + expect(result.errors.some((e) => e.field === 'path')).to.equal(true) + } + }) + + it('rejects empty `path`', () => { + const result = validateBvTopic(makeNode({path: ''})) + expect(result.valid).to.equal(false) + }) + + it('rejects non-numeric importance', () => { + const result = validateBvTopic(makeNode({importance: 'high', path: 'x'})) + expect(result.valid).to.equal(false) + }) + + it('rejects out-of-range importance (>100)', () => { + const result = validateBvTopic(makeNode({importance: '101', path: 'x'})) + expect(result.valid).to.equal(false) + }) + + it('rejects out-of-range importance (negative)', () => { + const result = validateBvTopic(makeNode({importance: '-1', path: 'x'})) + expect(result.valid).to.equal(false) + }) + + it('rejects unknown maturity tier', () => { + const result = validateBvTopic(makeNode({maturity: 'experimental', path: 'x'})) + expect(result.valid).to.equal(false) + }) + + it('rejects malformed updatedat', () => { + const result = validateBvTopic(makeNode({path: 'x', updatedat: 'yesterday'})) + expect(result.valid).to.equal(false) + }) + + it('rejects non-numeric recency', () => { + const result = validateBvTopic(makeNode({path: 'x', recency: 'high'})) + expect(result.valid).to.equal(false) + }) + + it('rejects recency outside [0, 1]', () => { + const result = validateBvTopic(makeNode({path: 'x', recency: '1.5'})) + expect(result.valid).to.equal(false) + }) + + it('rejects wrong tag name (defensive — registry should never call wrong validator)', () => { + const result = validateBvTopic(makeNode({path: 'x'}, 'bv-rule')) + expect(result.valid).to.equal(false) + if (!result.valid) { + expect(result.errors.some((e) => e.field === 'tagName')).to.equal(true) + } + }) + }) +}) diff --git a/test/unit/server/infra/render/elements/registry.test.ts b/test/unit/server/infra/render/elements/registry.test.ts new file mode 100644 index 000000000..328f2d4ef --- /dev/null +++ b/test/unit/server/infra/render/elements/registry.test.ts @@ -0,0 +1,107 @@ +/** + * Element registry tests. + * + * The registry is the single source of truth for the M1 element + * vocabulary. Every consumer (curate writer, query reader, prompt + * template generator) walks the registry generically. M2 vocabulary + * expansion is purely additive — new entries only. + */ + +import {expect} from 'chai' + +import type {ElementName, ElementNode} from '../../../../../../src/server/core/domain/render/element-types.js' + +import {ELEMENT_NAMES} from '../../../../../../src/server/core/domain/render/element-types.js' +import {ELEMENT_REGISTRY} from '../../../../../../src/server/infra/render/elements/registry.js' + +function makeNode(tagName: string, attributes: Record = {}): ElementNode { + return {attributes, children: [], tagName, type: 'element'} +} + +describe('ELEMENT_REGISTRY', () => { + describe('shape', () => { + it('contains exactly 5 entries (M1 vocabulary)', () => { + expect(Object.keys(ELEMENT_REGISTRY)).to.have.lengthOf(5) + }) + + it('has one entry per `ElementName` listed in `ELEMENT_NAMES`', () => { + for (const name of ELEMENT_NAMES) { + expect(ELEMENT_REGISTRY[name], `expected entry for ${name}`).to.not.equal(undefined) + } + }) + + it('every entry exposes `name`, `validator`, `description`, `requiredAttributes`, `optionalAttributes`, `allowedChildren`', () => { + for (const name of ELEMENT_NAMES) { + const entry = ELEMENT_REGISTRY[name] + expect(entry.name).to.equal(name) + expect(typeof entry.validator).to.equal('function') + expect(typeof entry.description).to.equal('string') + expect(entry.description.length).to.be.greaterThan(0) + expect(Array.isArray(entry.requiredAttributes)).to.equal(true) + expect(Array.isArray(entry.optionalAttributes)).to.equal(true) + expect(['any', 'block', 'inline', 'none']).to.include(entry.allowedChildren) + } + }) + }) + + describe('validators are wired correctly', () => { + it('bv-topic validator accepts a valid bv-topic node', () => { + const result = ELEMENT_REGISTRY['bv-topic'].validator(makeNode('bv-topic', {path: 'x'})) + expect(result.valid).to.equal(true) + }) + + it('bv-topic validator rejects a wrong-tag node', () => { + const result = ELEMENT_REGISTRY['bv-topic'].validator(makeNode('bv-rule')) + expect(result.valid).to.equal(false) + }) + + it('bv-rule validator accepts an empty bv-rule node', () => { + const result = ELEMENT_REGISTRY['bv-rule'].validator(makeNode('bv-rule')) + expect(result.valid).to.equal(true) + }) + + it('bv-decision validator accepts a bv-decision node with id', () => { + const result = ELEMENT_REGISTRY['bv-decision'].validator(makeNode('bv-decision', {id: 'd1'})) + expect(result.valid).to.equal(true) + }) + + it('bv-bug validator accepts a bv-bug node with severity', () => { + const result = ELEMENT_REGISTRY['bv-bug'].validator(makeNode('bv-bug', {severity: 'high'})) + expect(result.valid).to.equal(true) + }) + + it('bv-fix validator accepts a bv-fix node', () => { + const result = ELEMENT_REGISTRY['bv-fix'].validator(makeNode('bv-fix')) + expect(result.valid).to.equal(true) + }) + }) + + describe('metadata for downstream consumers', () => { + it('bv-topic declares `path` as a required attribute', () => { + expect(ELEMENT_REGISTRY['bv-topic'].requiredAttributes).to.include('path') + }) + + it('bv-rule declares `severity` as an optional attribute', () => { + expect(ELEMENT_REGISTRY['bv-rule'].optionalAttributes).to.include('severity') + }) + + it('bv-bug declares `severity` as an optional attribute', () => { + expect(ELEMENT_REGISTRY['bv-bug'].optionalAttributes).to.include('severity') + }) + + it('every element has a non-trivial description for the prompt template generator', () => { + for (const name of ELEMENT_NAMES) { + expect(ELEMENT_REGISTRY[name].description.length).to.be.greaterThan(20) + } + }) + }) + + describe('readonly contract', () => { + it('registry is structurally Readonly>', () => { + // Compile-time guard via the type. Runtime sanity check: keys are exactly ELEMENT_NAMES. + const keys = Object.keys(ELEMENT_REGISTRY).sort() as ElementName[] + const expected = [...ELEMENT_NAMES].sort() + expect(keys).to.deep.equal(expected) + }) + }) +}) diff --git a/test/unit/server/infra/render/reader/html-parser.test.ts b/test/unit/server/infra/render/reader/html-parser.test.ts new file mode 100644 index 000000000..47b4896c4 --- /dev/null +++ b/test/unit/server/infra/render/reader/html-parser.test.ts @@ -0,0 +1,198 @@ +/** + * HTML parser wrapper tests. + * + * The parser produces a normalized AST (`ParsedNode`) independent of any + * specific HTML library. M1 uses parse5 underneath; consumers see only + * `ElementNode` / `TextNode` / `DocumentNode`. + * + * Key invariants: + * - Tag names are lowercased + * - Attributes are a string-only map + * - Whitespace-only text between elements is preserved (consumers + * decide whether to drop it) + * - Malformed input does not throw — parse5's forgiving parser + * returns a best-effort tree + */ + +import {expect} from 'chai' + +import type {ElementNode} from '../../../../../../src/server/core/domain/render/element-types.js' + +import {getInnerText, parseHtml, serializeHtml, walkElements} from '../../../../../../src/server/infra/render/reader/html-parser.js' + +describe('html-parser', () => { +describe('parseHtml', () => { + describe('basic parsing', () => { + it('parses a single bv-topic element', () => { + const html = '' + const result = parseHtml(html) + const elements = walkElements(result) + expect(elements.length).to.be.greaterThan(0) + const topic = elements.find((e) => e.tagName === 'bv-topic') + expect(topic, 'expected bv-topic element').to.not.equal(undefined) + expect(topic!.attributes.path).to.equal('security-auth') + }) + + it('lowercases tag names regardless of input case', () => { + const result = parseHtml('') + const elements = walkElements(result) + expect(elements.find((e) => e.tagName === 'bv-topic')).to.not.equal(undefined) + }) + + it('preserves attribute string values verbatim', () => { + const result = parseHtml('') + const topic = walkElements(result).find((e) => e.tagName === 'bv-topic')! + expect(topic.attributes.path).to.equal('security/auth') + expect(topic.attributes.importance).to.equal('89') + }) + + it('parses nested elements', () => { + const html = ` + + Test rule + + ` + const result = parseHtml(html) + const elements = walkElements(result) + expect(elements.find((e) => e.tagName === 'bv-rule')).to.not.equal(undefined) + }) + + it('parses sibling elements at root level', () => { + const html = 'AB' + const result = parseHtml(html) + const rules = walkElements(result).filter((e) => e.tagName === 'bv-rule') + expect(rules.length).to.equal(2) + }) + + it('handles standard HTML5 tags (h1, p, ul, li) alongside bv-* elements', () => { + const html = ` + +

Title

+

Narrative.

+
  • Item
+
+ ` + const result = parseHtml(html) + const elements = walkElements(result) + const tagNames = elements.map((e) => e.tagName) + expect(tagNames).to.include('h1') + expect(tagNames).to.include('p') + expect(tagNames).to.include('ul') + expect(tagNames).to.include('li') + }) + }) + + describe('malformed input handling', () => { + it('does not throw on empty string', () => { + expect(() => parseHtml('')).to.not.throw() + }) + + it('does not throw on plain text', () => { + expect(() => parseHtml('just some text without tags')).to.not.throw() + }) + + it('does not throw on unclosed tags', () => { + expect(() => parseHtml('unclosed')).to.not.throw() + }) + + it('does not throw on mismatched nesting', () => { + expect(() => parseHtml('')).to.not.throw() + }) + + it('does not throw on broken attribute syntax', () => { + expect(() => parseHtml('...')).to.not.throw() + }) + + it('does not throw on unknown tags', () => { + const result = parseHtml('content') + const elements = walkElements(result) + // parse5 is forgiving — unknown tags are still parsed as elements + expect(elements.find((e) => e.tagName === 'some-future-tag')).to.not.equal(undefined) + }) + }) +}) + +describe('walkElements', () => { + it('returns elements in document order (depth-first)', () => { + const result = parseHtml('') + const elements = walkElements(result) + const names = elements + .filter((e) => e.tagName.startsWith('bv-')) + .map((e) => e.tagName) + expect(names).to.deep.equal(['bv-topic', 'bv-rule', 'bv-decision']) + }) + + it('includes nested elements at any depth', () => { + const html = '
' + const result = parseHtml(html) + const elements = walkElements(result) + expect(elements.find((e) => e.tagName === 'bv-rule')).to.not.equal(undefined) + }) + + it('returns empty array on empty document', () => { + const result = parseHtml('') + expect(walkElements(result)).to.be.an('array') + }) +}) + +describe('getInnerText', () => { + it('extracts text content from a simple element', () => { + const node: ElementNode = { + attributes: {}, + children: [{text: 'Some rule text', type: 'text'}], + tagName: 'bv-rule', + type: 'element', + } + expect(getInnerText(node)).to.equal('Some rule text') + }) + + it('concatenates text from nested elements', () => { + const result = parseHtml('

First.

Second.

') + const topic = walkElements(result).find((e) => e.tagName === 'bv-topic')! + const innerText = getInnerText(topic) + expect(innerText).to.include('First.') + expect(innerText).to.include('Second.') + }) + + it('decodes HTML entities (e.g. & → &)', () => { + const result = parseHtml('Foo & bar') + const rule = walkElements(result).find((e) => e.tagName === 'bv-rule')! + expect(getInnerText(rule)).to.include('Foo & bar') + }) + + it('returns empty string for an element with no text descendants', () => { + const node: ElementNode = {attributes: {}, children: [], tagName: 'bv-rule', type: 'element'} + expect(getInnerText(node)).to.equal('') + }) +}) + +describe('serializeHtml', () => { + it('round-trips a simple bv-topic with attributes', () => { + const html = '' + const tree = parseHtml(html) + const out = serializeHtml(tree) + // Re-parse the output; semantic equivalence is what we test, not + // byte-exactness (whitespace / quoting may normalize) + const reparsed = parseHtml(out) + const topic = walkElements(reparsed).find((e) => e.tagName === 'bv-topic')! + expect(topic.attributes.path).to.equal('security-auth') + expect(topic.attributes.importance).to.equal('89') + }) + + it('round-trips nested elements semantically', () => { + const html = 'Be careful' + const tree = parseHtml(html) + const reparsed = parseHtml(serializeHtml(tree)) + const elements = walkElements(reparsed) + const rule = elements.find((e) => e.tagName === 'bv-rule')! + expect(rule.attributes.severity).to.equal('must') + expect(rule.attributes.id).to.equal('r1') + expect(getInnerText(rule)).to.include('Be careful') + }) + + it('does not throw on serialising a parse result of malformed input', () => { + const tree = parseHtml('unclosed') + expect(() => serializeHtml(tree)).to.not.throw() + }) +}) +}) diff --git a/test/unit/server/infra/render/sample-topic-roundtrip.test.ts b/test/unit/server/infra/render/sample-topic-roundtrip.test.ts new file mode 100644 index 000000000..8132d2c3b --- /dev/null +++ b/test/unit/server/infra/render/sample-topic-roundtrip.test.ts @@ -0,0 +1,132 @@ +/** + * Sample-topic round-trip test. + * + * Verifies that the M1 5-element vocabulary, applied to a realistic + * topic file, parses cleanly, validates per-element, and round-trips + * (parse → walk → re-serialize) without semantic loss. + * + * This is the closest M1 proxy for "could a real curated topic survive + * the pipeline?" — useful before T3 wires the writer to disk. + */ + +import {expect} from 'chai' +import {readFileSync} from 'node:fs' +import {join} from 'node:path' + +import type {ElementName} from '../../../../../src/server/core/domain/render/element-types.js' + +import {ELEMENT_NAMES} from '../../../../../src/server/core/domain/render/element-types.js' +import {ELEMENT_REGISTRY} from '../../../../../src/server/infra/render/elements/registry.js' +import {getInnerText, parseHtml, serializeHtml, walkElements} from '../../../../../src/server/infra/render/reader/html-parser.js' + +const FIXTURE_PATH = join(process.cwd(), 'test/fixtures/render/sample-topic.html') + +function loadFixture(): string { + return readFileSync(FIXTURE_PATH, 'utf8') +} + +function isRegisteredElementName(tag: string): tag is ElementName { + return (ELEMENT_NAMES as readonly string[]).includes(tag) +} + +describe('sample-topic.html round-trip', () => { + describe('parse', () => { + it('parses without errors', () => { + const html = loadFixture() + expect(() => parseHtml(html)).to.not.throw() + }) + + it('contains exactly one bv-topic element', () => { + const elements = walkElements(parseHtml(loadFixture())) + const topics = elements.filter((e) => e.tagName === 'bv-topic') + expect(topics).to.have.lengthOf(1) + }) + + it('contains all 5 M1 element types at least once', () => { + const elements = walkElements(parseHtml(loadFixture())) + const tagSet = new Set(elements.map((e) => e.tagName)) + for (const name of ELEMENT_NAMES) { + expect(tagSet.has(name), `expected at least one ${name}`).to.equal(true) + } + }) + + it('preserves the bv-topic root attributes', () => { + const elements = walkElements(parseHtml(loadFixture())) + const topic = elements.find((e) => e.tagName === 'bv-topic')! + expect(topic.attributes.path).to.equal('security/auth') + expect(topic.attributes.importance).to.equal('89') + expect(topic.attributes.maturity).to.equal('core') + expect(topic.attributes.updatedat).to.equal('2026-04-27T08:17:42Z') + }) + }) + + describe('validate', () => { + it('every bv-* element in the fixture passes its registered validator', () => { + const elements = walkElements(parseHtml(loadFixture())) + for (const el of elements) { + if (!isRegisteredElementName(el.tagName)) continue + const result = ELEMENT_REGISTRY[el.tagName].validator(el) + expect( + result.valid, + `expected ${el.tagName} (id=${el.attributes.id ?? 'n/a'}) to validate; errors: ${JSON.stringify(result.valid ? [] : result.errors)}`, + ).to.equal(true) + } + }) + }) + + describe('round-trip (parse → serialize → re-parse)', () => { + it('produces semantically equivalent output', () => { + const original = parseHtml(loadFixture()) + const out = serializeHtml(original) + const reparsed = parseHtml(out) + + const originalElements = walkElements(original) + const reparsedElements = walkElements(reparsed) + + // Same element count after round-trip + expect(reparsedElements.length).to.equal(originalElements.length) + + // Tag-name sequence preserved + expect(reparsedElements.map((e) => e.tagName)).to.deep.equal( + originalElements.map((e) => e.tagName), + ) + }) + + it('preserves attribute values across round-trip', () => { + const original = parseHtml(loadFixture()) + const reparsed = parseHtml(serializeHtml(original)) + + const originalTopic = walkElements(original).find((e) => e.tagName === 'bv-topic')! + const reparsedTopic = walkElements(reparsed).find((e) => e.tagName === 'bv-topic')! + expect(reparsedTopic.attributes).to.deep.equal(originalTopic.attributes) + }) + + it('preserves innerText (text content) across round-trip', () => { + const original = parseHtml(loadFixture()) + const reparsed = parseHtml(serializeHtml(original)) + + const originalText = getInnerText(original) + const reparsedText = getInnerText(reparsed) + + // Whitespace may normalize, but every word from the original should remain + const wordsOriginal = originalText.split(/\s+/).filter(Boolean) + const reparsedSet = new Set(reparsedText.split(/\s+/).filter(Boolean)) + const missing = wordsOriginal.filter((w) => !reparsedSet.has(w)) + expect(missing, `words lost in round-trip: ${missing.join(', ')}`).to.have.lengthOf(0) + }) + }) + + describe('innerText for BM25', () => { + it('contains expected substrings from each element type', () => { + const elements = walkElements(parseHtml(loadFixture())) + const topic = elements.find((e) => e.tagName === 'bv-topic')! + const innerText = getInnerText(topic) + + // Sample of expected content from each element + expect(innerText).to.include('401 Unauthorized') + expect(innerText).to.include('RS256') + expect(innerText).to.include('refresh') + expect(innerText).to.include('logout') + }) + }) +}) From b321ca02c11a81e6d02bc08719a4901a95e53a3a Mon Sep 17 00:00:00 2001 From: Danh Doan Date: Sun, 10 May 2026 13:23:58 +0700 Subject: [PATCH 002/115] refactor: [ENG-2737] address review-agent feedback on render layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - bv-topic schema: tighten recency regex to ^\d+(\.\d+)?$ (rejects '.', '..1', '1..2' which previously slipped past the regex), drop redundant Number.isFinite for style consistency with importance, accept timezone offsets in updatedat via {offset: true} - html-parser: replace `as unknown as Parse5DocumentFragment` cast with parse5 defaultTreeAdapter factories (createDocumentFragment, createElement, createTextNode, appendChild) — no more structural casts in the serialize path - html-parser: getInnerText now inserts a separator between sibling element children so compact emit (

foo.

bar.

) does not merge tokens for BM25 - html-parser: document