Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions .github/workflows/verify-status.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: verify-status

# Keep data/_verify/status.json — the synced aggregate of the verification state
# (how many records are verified + Tier 0 green/yellow/red bands per category) —
# current. Regenerates whenever the dataset changes and daily as a backstop, then
# commits directly as TechEngineBot. The `!data/_verify/**` negation stops its own
# status commit from retriggering the workflow.
on:
push:
branches: [main]
paths:
- "data/**"
- "!data/_verify/**"
schedule:
- cron: "30 5 * * *" # daily 05:30 UTC backstop
workflow_dispatch:

permissions:
contents: write

concurrency:
group: verify-status
cancel-in-progress: false

jobs:
status:
runs-on: ubuntu-latest
env:
PYTHONIOENCODING: utf-8
steps:
- uses: actions/checkout@v4
with:
token: ${{ secrets.TECHAPI_TOKEN || secrets.GITHUB_TOKEN }}

- uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Regenerate verification status aggregate
run: python -m app.verify status --output data/_verify/status.json

- name: Commit if changed (TechEngineBot)
run: |
if git diff --quiet -- data/_verify/status.json; then
echo "verification status unchanged"; exit 0
fi
# Attribute the commit to the TechEngineBot account (id 289859915).
git config user.name "TechEngineBot"
git config user.email "289859915+TechEngineBot@users.noreply.github.com"
git add data/_verify/status.json
git commit -m "chore(verify): refresh verification status aggregate"
git push origin HEAD:main
72 changes: 72 additions & 0 deletions app/verify/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,20 @@
from __future__ import annotations

import argparse
import json
import subprocess
from collections import Counter, defaultdict
from datetime import datetime, timezone
from pathlib import Path

from . import crossref, http_check, ledger, offline, promote
from .common import (
CATEGORIES,
SCORES_PATH,
VERIFY_DIR,
Record,
configure_stdout,
ensure_verify_dirs,
foreign_key_sets,
load_all,
repo_path,
Expand Down Expand Up @@ -230,6 +234,68 @@ def _print_markdown(hist, scored, hard_flags) -> None:
print(f"| {n} | `{name}` |")


def cmd_status(args: argparse.Namespace) -> int:
"""Aggregate the verification state into one JSON file (the synced source of
truth for "how much is verified"): per-category `verified` counts + Tier 0
bands + promotion candidates. Default output: data/_verify/status.json."""
records = load_all()
_, _, soc_release = foreign_key_sets(records)
now_year = offline.now_year_today()

by_category: dict[str, dict] = {}
tot = ver = g = y = r = 0
for cat in CATEGORIES:
ct = cv = cg = cy = cr = 0
for rec in records[cat]:
if not rec.slug:
continue
ct += 1
if rec.verified:
cv += 1
band = offline.score_record(rec, now_year, soc_release).band
cg += band == "green"
cy += band == "yellow"
cr += band == "red"
by_category[cat] = {
"total": ct,
"verified": cv,
"verified_pct": round(100 * cv / ct, 2) if ct else 0.0,
"green": cg,
"yellow": cy,
"red": cr,
# green = high-confidence band; the promotion candidate pool.
"promotable": cg,
}
tot += ct; ver += cv; g += cg; y += cy; r += cr

status = {
"generated_at": _now_iso(),
"schema": 1,
"totals": {
"records": tot,
"verified": ver,
"verified_pct": round(100 * ver / tot, 2) if tot else 0.0,
"green": g,
"yellow": y,
"red": r,
"promotable": g,
},
"by_category": by_category,
}
blob = json.dumps(status, indent=2, ensure_ascii=False) + "\n"

if args.stdout:
print(blob, end="")
else:
out = args.output or (VERIFY_DIR / "status.json")
ensure_verify_dirs()
out.write_text(blob, encoding="utf-8")
print(f"wrote verification status: {out} "
f"({ver}/{tot} verified = {100*ver/tot:.2f}%, "
f"{g} green / {y} yellow / {r} red)")
return 0


def cmd_report(args: argparse.Namespace) -> int:
if not SCORES_PATH.exists():
print("no scores cache — run `python -m app.verify score` first")
Expand Down Expand Up @@ -583,6 +649,12 @@ def build_parser() -> argparse.ArgumentParser:
rp = sub.add_parser("report", help="summarize latest ledger state")
rp.set_defaults(func=cmd_report)

st = sub.add_parser("status", help="write the aggregated verification status JSON")
st.add_argument("--output", type=Path, default=None,
help="output path (default: data/_verify/status.json)")
st.add_argument("--stdout", action="store_true", help="print JSON instead of writing a file")
st.set_defaults(func=cmd_status)

cu = sub.add_parser("check-urls", help="Tier 1: source_urls HTTP liveness")
cu.add_argument("--category", nargs="*", choices=CATEGORIES, help="limit to categories")
cu.add_argument("--max", type=int, default=500, help="number of frontier records to target")
Expand Down
Loading