Skip to content

Commit f2ccdf0

Browse files
committed
feat(verify): add 'pr' command — all tiers (0-3) for a PR's changed records
One markdown report combining Tier 0 (offline score), Tier 1 (source-URL liveness), Tier 2 (external cross-reference, exact-heading) and Tier 3 (promotion DRY-RUN) over the records changed vs origin/main, plus the full-dataset Tier 0 baseline. Network tiers are capped by --max and never write. Refs #1
1 parent 8d7e9ff commit f2ccdf0

1 file changed

Lines changed: 130 additions & 5 deletions

File tree

app/verify/cli.py

Lines changed: 130 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,127 @@ def cmd_promote(args: argparse.Namespace) -> int:
396396
return 0
397397

398398

399+
def cmd_pr(args: argparse.Namespace) -> int:
400+
"""All-tiers verification of a PR's changed records, as one markdown report.
401+
402+
Tier 0 (offline score) + Tier 1 (source-URL liveness) + Tier 2 (external
403+
cross-reference) + Tier 3 (promotion decision, DRY-RUN — never writes). Network
404+
tiers run only over the records changed vs origin/main, capped by --max.
405+
"""
406+
records = load_all()
407+
_, _, soc_release = foreign_key_sets(records)
408+
now_year = offline.now_year_today()
409+
410+
changed = _changed_data_slugs()
411+
changed_recs = [
412+
rec for cat in CATEGORIES for rec in records[cat]
413+
if rec.slug and rec.path in changed
414+
]
415+
416+
print("## 🔎 Data verification — Tiers 0–3 (on demand)\n")
417+
418+
if not changed_recs:
419+
print("_No data records changed in this PR. Showing the full-dataset "
420+
"Tier 0 baseline only; network tiers (1–3) have nothing to check._\n")
421+
else:
422+
sub = changed_recs[: args.max]
423+
truncated = len(changed_recs) > args.max
424+
note = f" (showing first {args.max} for network tiers)" if truncated else ""
425+
print(f"**{len(changed_recs)} changed data record(s)**{note}. "
426+
"Tier 3 is dry-run — no `verified` flags are written.\n")
427+
428+
# Tier 0 — offline score of the changed records.
429+
scored = [(r, offline.score_record(r, now_year, soc_release)) for r in sub]
430+
print("### Tier 0 — offline score (changed)\n")
431+
print("| Slug | Category | Band | Score | Flags |")
432+
print("| --- | --- | :--: | ---: | --- |")
433+
for r, s in scored:
434+
badge = {"green": "🟢", "yellow": "🟡", "red": "🔴"}.get(s.band, s.band)
435+
flags = ", ".join(f"`{f}`" for f in s.flags) or "—"
436+
print(f"| {r.slug} | {r.category} | {badge} | {s.score} | {flags} |")
437+
print()
438+
439+
# Tier 1 — source-URL liveness (network).
440+
urls = sorted({u for r, _ in scored
441+
for u in r.data.get("source_urls", []) if isinstance(u, str)})
442+
ts = _now_iso()
443+
url_cache: dict[str, dict] = {}
444+
try:
445+
for res in http_check.check_urls(urls, min_interval=0.5):
446+
url_cache[res.url] = http_check.result_to_entry(res, ts)
447+
except Exception as exc: # network hiccup must not sink the report
448+
print(f"_Tier 1 skipped: {exc}_\n")
449+
alive = sum(1 for e in url_cache.values() if e.get("alive"))
450+
dead = len(url_cache) - alive
451+
print("### Tier 1 — source-URL liveness (changed)\n")
452+
print(f"Checked **{len(url_cache)}** unique URL(s): **{alive} alive**, **{dead} dead**.\n")
453+
dead_reasons = Counter(e["reason"] for e in url_cache.values() if not e.get("alive"))
454+
if dead_reasons:
455+
print("| Dead reason | Count |")
456+
print("| --- | ---: |")
457+
for reason, n in dead_reasons.most_common(8):
458+
print(f"| `{reason}` | {n} |")
459+
print()
460+
461+
# Tier 2 — external cross-reference (network, exact-heading only).
462+
fetcher = crossref.WikipediaFetcher()
463+
xref: dict[str, str] = {}
464+
decisions = Counter()
465+
for r, _ in scored:
466+
try:
467+
res = crossref.crossref_record(r.data, fetcher)
468+
xref[r.slug] = res.decision
469+
decisions[res.decision] += 1
470+
except Exception:
471+
decisions["error"] += 1
472+
print("### Tier 2 — external cross-reference (changed)\n")
473+
if decisions:
474+
print("| Decision | Count |")
475+
print("| --- | ---: |")
476+
for d, n in decisions.most_common():
477+
print(f"| `{d}` | {n} |")
478+
print()
479+
480+
# Tier 3 — promotion decision (DRY-RUN).
481+
promote_rows = []
482+
hold = 0
483+
for r, s in scored:
484+
urls_r = [u for u in r.data.get("source_urls", []) if isinstance(u, str)]
485+
d = promote.decide(band=s.band, source_urls=urls_r,
486+
url_cache=url_cache, crossref_decision=xref.get(r.slug))
487+
if d.promote:
488+
promote_rows.append((r, d.reason))
489+
else:
490+
hold += 1
491+
print("### Tier 3 — promotion (dry-run)\n")
492+
print(f"**{len(promote_rows)}** record(s) would promote to `verified:true`, "
493+
f"**{hold}** held.\n")
494+
if promote_rows:
495+
print("| Slug | Reason |")
496+
print("| --- | --- |")
497+
for r, reason in promote_rows:
498+
print(f"| {r.slug} | `{reason}` |")
499+
print()
500+
501+
# Full-dataset Tier 0 baseline (always).
502+
hist: dict[str, Counter] = defaultdict(Counter)
503+
hard_flags: Counter = Counter()
504+
scored_n = 0
505+
for cat in CATEGORIES:
506+
for rec in records[cat]:
507+
if not rec.slug:
508+
continue
509+
s = offline.score_record(rec, now_year, soc_release)
510+
hist[rec.category][s.band] += 1
511+
scored_n += 1
512+
for f in s.flags:
513+
if f.startswith("!"):
514+
hard_flags[f] += 1
515+
print("### Full-dataset Tier 0 baseline\n")
516+
_print_markdown(hist, scored_n, hard_flags)
517+
return 0
518+
519+
399520
def _not_implemented(args: argparse.Namespace) -> int:
400521
print(f"`{args.cmd}` is a later-phase subcommand and is not implemented yet.")
401522
return 2
@@ -433,11 +554,15 @@ def build_parser() -> argparse.ArgumentParser:
433554
cr.add_argument("--recheck", action="store_true", help="ignore crossref cache")
434555
cr.set_defaults(func=cmd_crossref)
435556

436-
pr = sub.add_parser("promote", help="Tier 3: hybrid escalation + verified write-back")
437-
pr.add_argument("--category", nargs="*", choices=CATEGORIES, help="limit to categories")
438-
pr.add_argument("--max", type=int, default=None, help="cap number promoted")
439-
pr.add_argument("--apply", action="store_true", help="actually flip verified (default: dry-run)")
440-
pr.set_defaults(func=cmd_promote)
557+
pm = sub.add_parser("promote", help="Tier 3: hybrid escalation + verified write-back")
558+
pm.add_argument("--category", nargs="*", choices=CATEGORIES, help="limit to categories")
559+
pm.add_argument("--max", type=int, default=None, help="cap number promoted")
560+
pm.add_argument("--apply", action="store_true", help="actually flip verified (default: dry-run)")
561+
pm.set_defaults(func=cmd_promote)
562+
563+
pr = sub.add_parser("pr", help="all-tiers (0-3) markdown report for a PR's changed records")
564+
pr.add_argument("--max", type=int, default=40, help="cap changed records for network tiers")
565+
pr.set_defaults(func=cmd_pr)
441566

442567
return p
443568

0 commit comments

Comments
 (0)