diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8f4e8c9..291343b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,6 +19,7 @@ jobs: python-version: "3.12" - run: python3 -m py_compile scripts/*.py - run: python3 scripts/validate_research_pack.py examples/research-pack-example.md + - run: python3 scripts/validate_research_pack.py examples/research-pack-example.md --strict - run: python3 scripts/test_validator_regression.py - run: python3 scripts/test_md_to_pdf_preflight.py diff --git a/examples/research-pack-example.md b/examples/research-pack-example.md index 7997b6c..e7e1730 100644 --- a/examples/research-pack-example.md +++ b/examples/research-pack-example.md @@ -40,18 +40,18 @@ Stop when the top choice, runner-up logic, and ranking-change conditions are sup - Live-search status: partially recovered ## Source register -- Source: transport schedule / route information +- [S01] Source: transport schedule / route information - Supports: travel feasibility and burden comparison -- Source: venue / city logistics information +- [S02] Source: venue / city logistics information - Supports: practical meetup suitability -- Source: pricing or timing references +- [S03] Source: pricing or timing references - Supports: cost and coordination burden ## Claim register -- Claim: City A is the best default meetup choice under current assumptions. +- Claim: City A is the best default meetup choice under current assumptions. [S01][S02] - Support: lower coordination burden across origins, stronger schedule fit - Confidence: medium -- Claim: City B remains the strongest runner-up. +- Claim: City B remains the strongest runner-up. [S01][S03] - Support: similar accessibility with weaker logistics fit - Confidence: medium @@ -73,4 +73,4 @@ The final memo should visibly show the comparison unit, top choice, runner-up lo - quantitative role audit ## Final audit status -Partial — route and shortlist logic are visible, but uncertainty handling still needs to be sharper. +Pass diff --git a/scripts/test_validator_regression.py b/scripts/test_validator_regression.py index 8bbca5a..de9ee3a 100644 --- a/scripts/test_validator_regression.py +++ b/scripts/test_validator_regression.py @@ -148,6 +148,325 @@ def test_partial_heading_match(d: str) -> None: assert rc == 2, f"partial heading match: expected exit 2, got {rc}" +# ─── Strict mode tests ──────────────────────────────────────────────────────── + +STRICT_BASELINE = """\ +## Objective +ok + +## Decision context +ok + +## Primary route +ok + +## Secondary disciplines +ok + +## Core subquestions +ok + +## Stop condition +ok + +## Source register +- [S01] A relevant source + - Supports: main claims + +## Claim register +- Claim: main finding [S01] + - Support: strong + - Confidence: confirmed + +## Uncertainty register +- Uncertainty: edge case + - Why it matters: could weaken conclusion + +## Artifact contract +ok + +## Required audits +ok + +## Final audit status +Pass +""" + + +def run_strict(path: str) -> subprocess.CompletedProcess: + return subprocess.run( + [sys.executable, VALIDATOR, "--strict", path], + capture_output=True, text=True + ) + + +def test_strict_valid_baseline(d: str) -> None: + path = write(os.path.join(d, "strict_valid.md"), STRICT_BASELINE) + result = run_strict(path) + assert result.returncode == 0, ( + f"strict valid baseline: expected 0, got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + + +def test_strict_no_source_ids(d: str) -> None: + text = re.sub( + r"- \[S01\].*", + "- A relevant source", + STRICT_BASELINE, flags=re.MULTILINE + ) + path = write(os.path.join(d, "no_ids.md"), text) + result = run_strict(path) + assert result.returncode == 4, ( + f"missing source IDs: expected exit 4, got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + + +def test_strict_undefined_source_ref(d: str) -> None: + text = re.sub( + r"main finding \[S01\]", + "main finding [S99]", + STRICT_BASELINE + ) + path = write(os.path.join(d, "undefined.md"), text) + result = run_strict(path) + assert result.returncode == 4, ( + f"undefined source ref: expected exit 4, got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + + +def test_strict_unused_source_id(d: str) -> None: + text = STRICT_BASELINE.replace( + "main finding [S01]", + "main finding (no ref)" + ) + path = write(os.path.join(d, "unused.md"), text) + result = run_strict(path) + assert result.returncode == 0, ( + f"unused source IDs: expected exit 0 (warning), got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + assert "Unused" in result.stdout, f"expected warning in output: {result.stdout}" + + +def test_strict_audit_status_partial(d: str) -> None: + text = re.sub( + r"^## Final audit status\nPass", + "## Final audit status\nPartial", + STRICT_BASELINE, flags=re.MULTILINE + ) + path = write(os.path.join(d, "partial.md"), text) + result = run_strict(path) + assert result.returncode == 0, ( + f"Partial audit status: expected exit 0 (warning), got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + assert "Partial" in result.stdout, f"expected warning in output: {result.stdout}" + + +def test_strict_audit_status_fail(d: str) -> None: + text = re.sub( + r"^## Final audit status\nPass", + "## Final audit status\nFail", + STRICT_BASELINE, flags=re.MULTILINE + ) + path = write(os.path.join(d, "fail.md"), text) + result = run_strict(path) + assert result.returncode == 4, ( + f"Fail audit status: expected exit 4, got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + + +def test_strict_audit_status_invalid(d: str) -> None: + text = re.sub( + r"^## Final audit status\nPass", + "## Final audit status\nPending", + STRICT_BASELINE, flags=re.MULTILINE + ) + path = write(os.path.join(d, "invalid.md"), text) + result = run_strict(path) + assert result.returncode == 4, ( + f"invalid audit status: expected exit 4, got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + + +def test_strict_claim_no_evidence(d: str) -> None: + text = STRICT_BASELINE.replace("main finding [S01]", "main finding") + path = write(os.path.join(d, "no_evidence.md"), text) + result = run_strict(path) + assert result.returncode == 0, ( + f"missing evidence tags: expected exit 0 (warning), got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + assert "no evidence" in result.stdout.lower(), ( + f"expected warning about missing evidence: {result.stdout}" + ) + + +def test_strict_partial_claim_missing_evidence(d: str) -> None: + text = re.sub( + r"(- Claim: main finding.*?)(?=\n## )", + r"\1\n- Claim: extra claim without evidence\n - Support: guess\n - Confidence: low", + STRICT_BASELINE, flags=re.DOTALL + ) + path = write(os.path.join(d, "partial_evidence.md"), text) + result = run_strict(path) + assert result.returncode == 0, ( + f"partial claim missing evidence: expected exit 0 (warning), got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + assert "Claim #2" in result.stdout, ( + f"expected Claim #2 warning in output: {result.stdout}" + ) + + +def test_strict_claim_evidence_next_line(d: str) -> None: + text = re.sub( + r"- Claim: main finding \[S01\]\n - Support: strong", + "- Claim: main finding\n - Evidence: [S01]\n - Support: strong", + STRICT_BASELINE + ) + path = write(os.path.join(d, "evidence_next_line.md"), text) + result = run_strict(path) + assert result.returncode == 0, ( + f"claim evidence on next line: expected exit 0, got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + + +def test_strict_fenced_code_ignored(d: str) -> None: + text = STRICT_BASELINE + "\n\n```\nExample [S99] in code block\n```\n" + path = write(os.path.join(d, "fenced.md"), text) + result = run_strict(path) + assert result.returncode == 0, ( + f"fenced code [S99]: expected exit 0, got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + + +def test_strict_table_source_id(d: str) -> None: + text = re.sub( + r"- \[S01\].*", + "| S01 | A relevant source |", + STRICT_BASELINE + ) + path = write(os.path.join(d, "table.md"), text) + result = run_strict(path) + assert result.returncode == 0, ( + f"table source ID: expected exit 0, got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + + +def test_strict_malformed_source_id_single_digit(d: str) -> None: + text = STRICT_BASELINE.replace("[S01]", "[S1]") + path = write(os.path.join(d, "malformed1.md"), text) + result = run_strict(path) + assert result.returncode == 4, ( + f"malformed [S1]: expected exit 4, got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + + +def test_strict_malformed_source_id_triple_digit(d: str) -> None: + text = STRICT_BASELINE.replace("[S01]", "[S001]") + path = write(os.path.join(d, "malformed3.md"), text) + result = run_strict(path) + assert result.returncode == 4, ( + f"malformed [S001]: expected exit 4, got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + + +def test_strict_duplicate_source_id(d: str) -> None: + text = STRICT_BASELINE.replace( + "- [S01] A relevant source", + "- [S01] First source\n- [S01] Duplicate source" + ) + path = write(os.path.join(d, "duplicate.md"), text) + result = run_strict(path) + assert result.returncode == 4, ( + f"duplicate source ID: expected exit 4, got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + + +def test_strict_undefined_u_id(d: str) -> None: + text = STRICT_BASELINE.replace( + "main finding [S01]", + "main finding [U99]" + ) + path = write(os.path.join(d, "undefined_u.md"), text) + result = run_strict(path) + assert result.returncode == 4, ( + f"undefined U99: expected exit 4, got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + + +def test_strict_claim_inference_id_warns_only(d: str) -> None: + text = STRICT_BASELINE.replace( + "main finding [S01]", + "main finding [I01]" + ) + path = write(os.path.join(d, "inference.md"), text) + result = run_strict(path) + assert result.returncode == 0, ( + f"I01 in claim: expected exit 0 (warning), got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + assert "Inference IDs" in result.stdout, ( + f"expected Inference IDs warning in output: {result.stdout}" + ) + + +def test_strict_malformed_body_ref(d: str) -> None: + text = STRICT_BASELINE.replace( + "main finding [S01]", + "main finding [S1]" + ) + path = write(os.path.join(d, "malformed_body.md"), text) + result = run_strict(path) + assert result.returncode == 4, ( + f"malformed body ref [S1]: expected exit 4, got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + + +def test_strict_malformed_claim_ref(d: str) -> None: + text = STRICT_BASELINE.replace( + "main finding [S01]", + "main finding" + ) + text = text.replace( + "- Claim: main finding", + "- Claim: main finding [S001]" + ) + path = write(os.path.join(d, "malformed_claim.md"), text) + result = run_strict(path) + assert result.returncode == 4, ( + f"malformed claim ref [S001]: expected exit 4, got {result.returncode}\n" + f"stdout: {result.stdout}" + ) + + +def test_strict_non_strict_ignores_strict_checks(d: str) -> None: + text = re.sub( + r"- \[S01\].*", + "- A relevant source", + STRICT_BASELINE, flags=re.MULTILINE + ) + path = write(os.path.join(d, "non_strict.md"), text) + rc = run_validator(path) + assert rc == 0, ( + f"non-strict mode should ignore source ID issues: expected 0, got {rc}" + ) + + def main() -> int: with tempfile.TemporaryDirectory() as d: tests = [ @@ -159,6 +478,26 @@ def main() -> int: ("indented fence", test_indented_fence), ("sub-heading-only body", test_subheading_only_body), ("partial heading match", test_partial_heading_match), + ("strict valid baseline", test_strict_valid_baseline), + ("strict no source IDs", test_strict_no_source_ids), + ("strict undefined source ref", test_strict_undefined_source_ref), + ("strict unused source id (warning)", test_strict_unused_source_id), + ("strict audit status Partial (warning)", test_strict_audit_status_partial), + ("strict audit status Fail", test_strict_audit_status_fail), + ("strict audit status invalid", test_strict_audit_status_invalid), + ("strict claim no evidence (warning)", test_strict_claim_no_evidence), + ("strict partial claim missing evidence (warning)", test_strict_partial_claim_missing_evidence), + ("strict claim evidence on next line", test_strict_claim_evidence_next_line), + ("strict fenced code ignores [S99]", test_strict_fenced_code_ignored), + ("strict table S01 source ID", test_strict_table_source_id), + ("strict malformed [S1] single digit", test_strict_malformed_source_id_single_digit), + ("strict malformed [S001] triple digit", test_strict_malformed_source_id_triple_digit), + ("strict duplicate source ID", test_strict_duplicate_source_id), + ("strict undefined [U99] reference", test_strict_undefined_u_id), + ("strict I01 claim warns only", test_strict_claim_inference_id_warns_only), + ("strict malformed body ref [S1]", test_strict_malformed_body_ref), + ("strict malformed claim ref [S001]", test_strict_malformed_claim_ref), + ("non-strict ignores strict checks", test_strict_non_strict_ignores_strict_checks), ] failures = [] for name, fn in tests: diff --git a/scripts/validate_research_pack.py b/scripts/validate_research_pack.py index 321c9be..dcda078 100755 --- a/scripts/validate_research_pack.py +++ b/scripts/validate_research_pack.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 +import argparse import re -import sys from pathlib import Path REQUIRED_HEADINGS = [ @@ -38,6 +38,11 @@ ALL_HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE) INLINE_FENCE_RE = re.compile(r"^[ ]{0,3}(`{3,}|~{3,})") +EXIT_USAGE = 1 +EXIT_STRUCTURE = 2 +EXIT_ARTIFACT = 3 +EXIT_STRICT = 4 + def strip_fenced_code_blocks(text: str) -> str: lines = text.split("\n") @@ -116,12 +121,257 @@ def check_artifacts(text: str) -> list[tuple]: return hits +# ─── Strict mode helpers ────────────────────────────────────────────────────── + + +def _section_body(text: str, heading: str) -> str: + lines = text.split("\n") + buf: list[str] = [] + collecting = False + for line in lines: + if re.match(rf"^## {re.escape(heading)}\s*$", line): + collecting = True + continue + if collecting: + if re.match(r"^##\s", line): + break + buf.append(line) + return "\n".join(buf).strip() + + +def _body_outside_section(text: str, heading: str) -> str: + lines = text.split("\n") + out: list[str] = [] + skipping = False + for line in lines: + if re.match(rf"^## {re.escape(heading)}\s*$", line): + skipping = True + out.append(line) + continue + if skipping: + if re.match(r"^##\s", line): + skipping = False + out.append(line) + continue + continue + out.append(line) + return "\n".join(out) + + +_BODY_REF_RE = re.compile(r"\[([SIU])(\d{2})\]") + +_CLAIM_LINE_RE = re.compile(r"^[-*]\s+Claim:") + + +def _collect_register_ids(text: str, heading: str) -> tuple[dict[str, str], list[str]]: + body = _section_body(text, heading) + if not body: + return {}, [] + ids: dict[str, str] = {} + issues: list[str] = [] + seen: dict[str, str] = {} + + for line in body.split("\n"): + stripped = line.strip() + if not stripped: + continue + for m in re.findall(r"\[([SIU])(\d+)\]", stripped): + prefix, num = m[0], m[1] + raw = f"[{prefix}{num}]" + if len(num) != 2: + issues.append( + f"Malformed ID '{raw}': " + f"expected 2 digits, got {len(num)}" + ) + continue + sid = f"{prefix}{num}" + if sid in seen: + issues.append(f"Duplicate ID '{raw}'") + else: + seen[sid] = stripped[:80] + ids[sid] = stripped[:80] + rest = re.sub(r"\[[A-Z]\d+\]", "", stripped) + for m in re.finditer(r"(? dict[str, set[str]]: + rest = _body_outside_section(cleaned, "Source register") + rest = _body_outside_section(rest, "Uncertainty register") + refs: dict[str, set[str]] = {"S": set(), "U": set(), "I": set()} + for m in _BODY_REF_RE.finditer(rest): + refs[m.group(1)].add(f"{m.group(1)}{m.group(2)}") + return refs + + +def _split_claim_blocks(text: str) -> list[str]: + body = _section_body(text, "Claim register") + if not body: + return [] + blocks: list[str] = [] + current: list[str] = [] + for line in body.split("\n"): + if _CLAIM_LINE_RE.match(line): + if current: + blocks.append("\n".join(current)) + current = [line] + elif current: + current.append(line) + if current: + blocks.append("\n".join(current)) + return blocks + + +def _check_malformed_refs(cleaned: str) -> list[str]: + rest = _body_outside_section(cleaned, "Source register") + rest = _body_outside_section(rest, "Uncertainty register") + issues: list[str] = [] + for m in re.finditer(r"\[([SIU])(\d+)\]", rest): + prefix, num = m.group(1), m.group(2) + if len(num) != 2: + issues.append( + f"Malformed reference '[{prefix}{num}]': " + f"expected 2 digits, got {len(num)}" + ) + return issues + + +def run_strict_checks(cleaned: str) -> list[str]: + errors: list[str] = [] + warnings: list[str] = [] + + source_ids, sid_issues = _collect_register_ids(cleaned, "Source register") + uncertainty_ids, uid_issues = _collect_register_ids(cleaned, "Uncertainty register") + errors.extend(sid_issues) + errors.extend(uid_issues) + + if not source_ids: + errors.append( + "No source IDs (Sxx or [Sxx]) found in Source register" + ) + + body_refs = _find_body_references(cleaned) + + undefined_s = body_refs["S"] - set(source_ids.keys()) + if undefined_s: + errors.append( + f"Undefined source IDs referenced: " + f"{', '.join(sorted(undefined_s))}" + ) + + undefined_u = body_refs["U"] - set(uncertainty_ids.keys()) + if undefined_u: + errors.append( + f"Undefined uncertainty IDs referenced: " + f"{', '.join(sorted(undefined_u))}" + ) + + if body_refs["I"]: + warnings.append( + f"Inference IDs [{', '.join(sorted(body_refs['I']))}] " + f"referenced but no Inference register to validate against" + ) + + if source_ids: + all_refs = body_refs["S"] | body_refs["U"] | body_refs["I"] + unused = set(source_ids.keys()) - all_refs + if unused: + warnings.append( + f"Unused source IDs (defined but never referenced): " + f"{', '.join(sorted(unused))}" + ) + + malformed_issues = _check_malformed_refs(cleaned) + errors.extend(malformed_issues) + + all_valid_ids = {**source_ids, **uncertainty_ids} + claim_blocks = _split_claim_blocks(cleaned) + for idx, block in enumerate(claim_blocks, 1): + refs = set(_BODY_REF_RE.findall(block)) + if not refs: + first_line = block.split("\n")[0].strip()[:80] + warnings.append( + f"Claim #{idx} has no evidence references: {first_line}" + ) + for prefix, num in refs: + if prefix == "I": + continue + sid = f"{prefix}{num}" + if sid not in all_valid_ids: + errors.append( + f"Claim #{idx} references undefined '{sid}'" + ) + + audit_issues = _check_audit_status(cleaned) + for issue in audit_issues: + if issue.startswith("Final audit status is 'Partial'"): + warnings.append(issue) + else: + errors.append(issue) + + result: list[str] = [] + for e in errors: + result.append(f" ✗ {e}") + for w in warnings: + result.append(f" ⚠ {w}") + return result + + +def _check_audit_status(text: str) -> list[str]: + body = _section_body(text, "Final audit status") + if not body: + return [] + first_line = body.split("\n")[0].strip() + m = re.match(r"^(Pass|Partial|Fail)\b", first_line) + if not m: + return [ + f"Final audit status must be Pass, Partial, or Fail. " + f"Got: {first_line[:60]}" + ] + status = m.group(1) + if status == "Fail": + return [ + "Final audit status is 'Fail' — pack is not ready for delivery" + ] + if status == "Partial": + return [ + "Final audit status is 'Partial' — pack may not be ready for delivery" + ] + return [] + + +# ─── Main ───────────────────────────────────────────────────────────────────── + + def main() -> int: - if len(sys.argv) != 2: - print("Usage: validate_research_pack.py ") - return 1 + parser = argparse.ArgumentParser( + description="Validate a Research Pack markdown file" + ) + parser.add_argument("path", help="Path to the Research Pack .md file") + parser.add_argument( + "--strict", + action="store_true", + help="Enable semantic checks (source IDs, references, audit status)", + ) + args = parser.parse_args() - path = Path(sys.argv[1]) + path = Path(args.path) text = path.read_text(encoding="utf-8") cleaned = strip_fenced_code_blocks(text) @@ -130,14 +380,14 @@ def main() -> int: print("Missing required headings:") for heading in missing: print(f"- {heading}") - return 2 + return EXIT_STRUCTURE empty = find_empty_sections(cleaned) if empty: print("Empty required sections (no content after heading):") for heading in empty: print(f"- {heading}") - return 2 + return EXIT_STRUCTURE artifact_hits = check_artifacts(text) if artifact_hits: @@ -145,7 +395,16 @@ def main() -> int: for pattern, matches in artifact_hits: preview = ", ".join(repr(m) for m in matches) print(f"- pattern {pattern}: {preview}") - return 3 + return EXIT_ARTIFACT + + if args.strict: + strict_issues = run_strict_checks(cleaned) + if strict_issues: + print("Strict mode issues:") + for issue in strict_issues: + print(issue) + has_errors = any(issue.startswith(" ✗") for issue in strict_issues) + return EXIT_STRICT if has_errors else 0 print("Research Pack structure looks valid.") return 0