From 4130783d366bb82da3c0a35a38486ee513462b58 Mon Sep 17 00:00:00 2001 From: Seungpyo Hong Date: Tue, 23 Jun 2026 10:25:42 +0900 Subject: [PATCH 1/2] =?UTF-8?q?feat(verify):=20add=20'status'=20command=20?= =?UTF-8?q?=E2=80=94=20aggregate=20verification=20state=20to=20JSON?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Emits data/_verify/status.json: per-category verified counts + Tier 0 bands + promotion candidates, as the synced source of truth for how much is verified. Refs #1 --- app/verify/cli.py | 72 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/app/verify/cli.py b/app/verify/cli.py index 92c61e3e3d7..6a9db3e56af 100644 --- a/app/verify/cli.py +++ b/app/verify/cli.py @@ -12,16 +12,20 @@ from __future__ import annotations import argparse +import json import subprocess from collections import Counter, defaultdict from datetime import datetime, timezone +from pathlib import Path from . import crossref, http_check, ledger, offline, promote from .common import ( CATEGORIES, SCORES_PATH, + VERIFY_DIR, Record, configure_stdout, + ensure_verify_dirs, foreign_key_sets, load_all, repo_path, @@ -230,6 +234,68 @@ def _print_markdown(hist, scored, hard_flags) -> None: print(f"| {n} | `{name}` |") +def cmd_status(args: argparse.Namespace) -> int: + """Aggregate the verification state into one JSON file (the synced source of + truth for "how much is verified"): per-category `verified` counts + Tier 0 + bands + promotion candidates. Default output: data/_verify/status.json.""" + records = load_all() + _, _, soc_release = foreign_key_sets(records) + now_year = offline.now_year_today() + + by_category: dict[str, dict] = {} + tot = ver = g = y = r = 0 + for cat in CATEGORIES: + ct = cv = cg = cy = cr = 0 + for rec in records[cat]: + if not rec.slug: + continue + ct += 1 + if rec.verified: + cv += 1 + band = offline.score_record(rec, now_year, soc_release).band + cg += band == "green" + cy += band == "yellow" + cr += band == "red" + by_category[cat] = { + "total": ct, + "verified": cv, + "verified_pct": round(100 * cv / ct, 2) if ct else 0.0, + "green": cg, + "yellow": cy, + "red": cr, + # green = high-confidence band; the promotion candidate pool. + "promotable": cg, + } + tot += ct; ver += cv; g += cg; y += cy; r += cr + + status = { + "generated_at": _now_iso(), + "schema": 1, + "totals": { + "records": tot, + "verified": ver, + "verified_pct": round(100 * ver / tot, 2) if tot else 0.0, + "green": g, + "yellow": y, + "red": r, + "promotable": g, + }, + "by_category": by_category, + } + blob = json.dumps(status, indent=2, ensure_ascii=False) + "\n" + + if args.stdout: + print(blob, end="") + else: + out = args.output or (VERIFY_DIR / "status.json") + ensure_verify_dirs() + out.write_text(blob, encoding="utf-8") + print(f"wrote verification status: {out} " + f"({ver}/{tot} verified = {100*ver/tot:.2f}%, " + f"{g} green / {y} yellow / {r} red)") + return 0 + + def cmd_report(args: argparse.Namespace) -> int: if not SCORES_PATH.exists(): print("no scores cache — run `python -m app.verify score` first") @@ -583,6 +649,12 @@ def build_parser() -> argparse.ArgumentParser: rp = sub.add_parser("report", help="summarize latest ledger state") rp.set_defaults(func=cmd_report) + st = sub.add_parser("status", help="write the aggregated verification status JSON") + st.add_argument("--output", type=Path, default=None, + help="output path (default: data/_verify/status.json)") + st.add_argument("--stdout", action="store_true", help="print JSON instead of writing a file") + st.set_defaults(func=cmd_status) + cu = sub.add_parser("check-urls", help="Tier 1: source_urls HTTP liveness") cu.add_argument("--category", nargs="*", choices=CATEGORIES, help="limit to categories") cu.add_argument("--max", type=int, default=500, help="number of frontier records to target") From fe3d8ce64c63caf5b7efe40ee4fd4f1601c1e2e1 Mon Sep 17 00:00:00 2001 From: Seungpyo Hong Date: Tue, 23 Jun 2026 10:25:44 +0900 Subject: [PATCH 2/2] ci: add verify-status sync (regenerate status.json as TechEngineBot) --- .github/workflows/verify-status.yml | 52 +++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 .github/workflows/verify-status.yml diff --git a/.github/workflows/verify-status.yml b/.github/workflows/verify-status.yml new file mode 100644 index 00000000000..b923ed2f53d --- /dev/null +++ b/.github/workflows/verify-status.yml @@ -0,0 +1,52 @@ +name: verify-status + +# Keep data/_verify/status.json — the synced aggregate of the verification state +# (how many records are verified + Tier 0 green/yellow/red bands per category) — +# current. Regenerates whenever the dataset changes and daily as a backstop, then +# commits directly as TechEngineBot. The `!data/_verify/**` negation stops its own +# status commit from retriggering the workflow. +on: + push: + branches: [main] + paths: + - "data/**" + - "!data/_verify/**" + schedule: + - cron: "30 5 * * *" # daily 05:30 UTC backstop + workflow_dispatch: + +permissions: + contents: write + +concurrency: + group: verify-status + cancel-in-progress: false + +jobs: + status: + runs-on: ubuntu-latest + env: + PYTHONIOENCODING: utf-8 + steps: + - uses: actions/checkout@v4 + with: + token: ${{ secrets.TECHAPI_TOKEN || secrets.GITHUB_TOKEN }} + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Regenerate verification status aggregate + run: python -m app.verify status --output data/_verify/status.json + + - name: Commit if changed (TechEngineBot) + run: | + if git diff --quiet -- data/_verify/status.json; then + echo "verification status unchanged"; exit 0 + fi + # Attribute the commit to the TechEngineBot account (id 289859915). + git config user.name "TechEngineBot" + git config user.email "289859915+TechEngineBot@users.noreply.github.com" + git add data/_verify/status.json + git commit -m "chore(verify): refresh verification status aggregate" + git push origin HEAD:main