techapi-pr-validate #106
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: techapi-pr-validation-comment | |
| # TechAPI data PRs can ask this repository to validate their head commit and | |
| # leave a curator-facing comment back on the PR. This keeps the PR owned by the | |
| # human contributor while TechEngineBot reports the engine verdict. | |
| on: | |
| repository_dispatch: | |
| types: [techapi-pr-validate] | |
| workflow_dispatch: | |
| inputs: | |
| pr_number: | |
| description: "TechAPI PR number to comment on" | |
| type: string | |
| required: true | |
| head_sha: | |
| description: "TechAPI commit SHA to validate" | |
| type: string | |
| required: true | |
| pr_url: | |
| description: "TechAPI PR URL" | |
| type: string | |
| required: false | |
| default: "" | |
| permissions: | |
| contents: read | |
| concurrency: | |
| group: techapi-pr-validation-${{ github.event.client_payload.pr_number || inputs.pr_number }} | |
| cancel-in-progress: true | |
| jobs: | |
| validate: | |
| runs-on: ubuntu-latest | |
| env: | |
| TECHAPI_COMMENT_TOKEN: ${{ secrets.TECHENGINEBOT_TOKEN || secrets.TECHAPI_TOKEN }} | |
| TECHAPI_PR_NUMBER: ${{ github.event.client_payload.pr_number || inputs.pr_number }} | |
| TECHAPI_HEAD_SHA: ${{ github.event.client_payload.head_sha || inputs.head_sha }} | |
| TECHAPI_HEAD_REF: ${{ github.event.client_payload.head_ref || '' }} | |
| TECHAPI_PR_URL: ${{ github.event.client_payload.pr_url || inputs.pr_url }} | |
| REQUESTED_BY: ${{ github.event.client_payload.requested_by || github.actor }} | |
| TECHAPI_DATA_DIR: ${{ github.workspace }}/TechAPI/data | |
| TECHAPI_SITE_DIR: ${{ github.workspace }}/TechAPI/site | |
| steps: | |
| - name: Checkout TechEngine | |
| uses: actions/checkout@v4 | |
| - name: Checkout TechAPI PR head | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: GetTechAPI/TechAPI | |
| ref: ${{ env.TECHAPI_HEAD_SHA }} | |
| path: TechAPI | |
| - name: Checkout TechAPI main | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: GetTechAPI/TechAPI | |
| ref: main | |
| path: TechAPI-main | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| cache: pip | |
| - name: Detect TechAPI homepage changes | |
| id: site_changes | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| python - <<'PY' >> "$GITHUB_OUTPUT" | |
| from __future__ import annotations | |
| import hashlib | |
| import re | |
| from pathlib import Path | |
| def digest(path: Path) -> str: | |
| return hashlib.sha256(path.read_bytes()).hexdigest() | |
| def rel_site_files(root: Path) -> dict[str, Path]: | |
| site = root / "site" | |
| if not site.exists(): | |
| return {} | |
| wanted_files = {"site/package.json", "site/package-lock.json"} | |
| files: dict[str, Path] = {} | |
| for path in sorted(site.rglob("*")): | |
| if not path.is_file(): | |
| continue | |
| rel = str(path.relative_to(root)).replace("\\", "/") | |
| if ( | |
| rel.startswith("site/src/") | |
| or rel.startswith("site/public/") | |
| or rel in wanted_files | |
| or re.match(r"site/astro\.config\.[cm]?[jt]s$", rel) | |
| ): | |
| if rel.startswith("site/public/v1/") or rel == "site/public/openapi.json": | |
| continue | |
| files[rel] = path | |
| return files | |
| head = rel_site_files(Path("TechAPI")) | |
| base = rel_site_files(Path("TechAPI-main")) | |
| added = sorted(set(head) - set(base)) | |
| deleted = sorted(set(base) - set(head)) | |
| modified = sorted(key for key in set(head) & set(base) if digest(head[key]) != digest(base[key])) | |
| has_changes = bool(added or modified or deleted) | |
| print(f"changed={'true' if has_changes else 'false'}") | |
| print(f"added={len(added)}") | |
| print(f"modified={len(modified)}") | |
| print(f"deleted={len(deleted)}") | |
| PY | |
| - uses: actions/setup-node@v4 | |
| if: steps.site_changes.outputs.changed == 'true' | |
| with: | |
| node-version: "22" | |
| cache: npm | |
| cache-dependency-path: TechAPI/site/package-lock.json | |
| - name: Install TechEngine | |
| run: pip install -e . | |
| - name: Validate TechAPI data | |
| id: validate | |
| shell: bash | |
| run: | | |
| set +e | |
| { | |
| echo "## app.validate" | |
| python -m app.validate | |
| echo "app_validate_status=$?" | |
| } > validation.log 2>&1 | |
| app_status=$(grep "app_validate_status=" validation.log | tail -n 1 | cut -d= -f2) | |
| { | |
| echo | |
| echo "## integrity_check.py --strict" | |
| python integrity_check.py TechAPI/data --strict | |
| echo "integrity_status=$?" | |
| } >> validation.log 2>&1 | |
| integrity_status=$(grep "integrity_status=" validation.log | tail -n 1 | cut -d= -f2) | |
| sed -i '/_status=/d' validation.log | |
| status="success" | |
| if [ "${app_status:-1}" != "0" ] || [ "${integrity_status:-1}" != "0" ]; then | |
| status="failure" | |
| fi | |
| echo "status=$status" >> "$GITHUB_OUTPUT" | |
| echo "app_status=${app_status:-1}" >> "$GITHUB_OUTPUT" | |
| echo "integrity_status=${integrity_status:-1}" >> "$GITHUB_OUTPUT" | |
| - name: Build TechAPI homepage | |
| if: steps.site_changes.outputs.changed == 'true' | |
| id: site_build | |
| shell: bash | |
| run: | | |
| set +e | |
| site_build_log="${GITHUB_WORKSPACE}/site-build.log" | |
| { | |
| echo "## TechAPI homepage build" | |
| cd "${TECHAPI_SITE_DIR}" | |
| npm ci | |
| npm run build | |
| echo "site_build_status=$?" | |
| } > "${site_build_log}" 2>&1 | |
| site_build_status=$(grep "site_build_status=" "${site_build_log}" | tail -n 1 | cut -d= -f2) | |
| sed -i '/site_build_status=/d' "${site_build_log}" | |
| echo "status=${site_build_status:-1}" >> "$GITHUB_OUTPUT" | |
| - name: Build data quality summary | |
| shell: bash | |
| run: | | |
| git -C TechAPI fetch --no-tags --depth=1 origin main | |
| python - <<'PY' | |
| from __future__ import annotations | |
| import hashlib | |
| import json | |
| import re | |
| import subprocess | |
| from collections import Counter | |
| from pathlib import Path | |
| from typing import Any | |
| HEAD = Path("TechAPI/data") | |
| BASE = Path("TechAPI-main/data") | |
| CATEGORIES = ( | |
| "brand", | |
| "soc", | |
| "smartphone", | |
| "tablet", | |
| "watch", | |
| "pda", | |
| "gpu", | |
| "cpu", | |
| ) | |
| MAX_WARNINGS = 20 | |
| def load_json(path: Path) -> dict[str, Any]: | |
| return json.loads(path.read_text(encoding="utf-8-sig")) | |
| def rel_jsons(root: Path, category: str) -> dict[str, Path]: | |
| base = root / category | |
| if not base.exists(): | |
| return {} | |
| return { | |
| str(path.relative_to(root)).replace("\\", "/"): path | |
| for path in sorted(base.rglob("*.json")) | |
| } | |
| def digest(path: Path) -> str: | |
| return hashlib.sha256(path.read_bytes()).hexdigest() | |
| def changed_data_from_git() -> dict[str, dict[str, list[str]]]: | |
| changes = { | |
| category: {"added": [], "modified": [], "deleted": []} | |
| for category in CATEGORIES | |
| } | |
| output = subprocess.check_output( | |
| [ | |
| "git", | |
| "-C", | |
| "TechAPI", | |
| "diff", | |
| "--name-status", | |
| "--no-renames", | |
| "FETCH_HEAD", | |
| "HEAD", | |
| "--", | |
| "data", | |
| ], | |
| text=True, | |
| ) | |
| status_map = {"A": "added", "M": "modified", "D": "deleted"} | |
| for line in output.splitlines(): | |
| parts = line.split("\t") | |
| if len(parts) < 2: | |
| continue | |
| bucket = status_map.get(parts[0][:1]) | |
| path = parts[1] | |
| if bucket is None or not path.startswith("data/") or not path.endswith(".json"): | |
| continue | |
| rel = path.removeprefix("data/").replace("\\", "/") | |
| category = rel.split("/", 1)[0] | |
| if category in changes: | |
| changes[category][bucket].append(rel) | |
| for category_changes in changes.values(): | |
| for paths in category_changes.values(): | |
| paths.sort() | |
| return changes | |
| def rel_site_files(root: Path) -> dict[str, Path]: | |
| site = root / "site" | |
| if not site.exists(): | |
| return {} | |
| wanted_files = {"site/package.json", "site/package-lock.json"} | |
| files: dict[str, Path] = {} | |
| for path in sorted(site.rglob("*")): | |
| if not path.is_file(): | |
| continue | |
| rel = str(path.relative_to(root)).replace("\\", "/") | |
| if ( | |
| rel.startswith("site/src/") | |
| or rel.startswith("site/public/") | |
| or rel in wanted_files | |
| or re.match(r"site/astro\.config\.[cm]?[jt]s$", rel) | |
| ): | |
| if rel.startswith("site/public/v1/") or rel == "site/public/openapi.json": | |
| continue | |
| files[rel] = path | |
| return files | |
| def verified_value(record: dict[str, Any]) -> bool | None: | |
| value = record.get("verified") | |
| return value if isinstance(value, bool) else None | |
| def has_kaggle_source(record: dict[str, Any]) -> bool: | |
| return any( | |
| isinstance(url, str) and "kaggle.com" in url.lower() | |
| for url in record.get("source_urls", []) | |
| ) | |
| def name_warnings(category: str, rel: str, record: dict[str, Any]) -> list[str]: | |
| warnings: list[str] = [] | |
| name = record.get("name") | |
| if not isinstance(name, str): | |
| return warnings | |
| if name != name.strip(): | |
| warnings.append("leading/trailing whitespace in name") | |
| if " " in name: | |
| warnings.append("double spaces in name") | |
| if "\ufffd" in name: | |
| warnings.append("replacement character in name") | |
| if name.count("(") != name.count(")"): | |
| warnings.append("unbalanced parentheses in name") | |
| words = re.findall(r"[A-Za-z0-9]+", name.lower()) | |
| if any(a == b and len(a) > 1 for a, b in zip(words, words[1:])): | |
| warnings.append("repeated adjacent word in name") | |
| if re.search(r"\b(unknown|unk|n/a|tbd|null)\b", name, re.I): | |
| warnings.append("placeholder-like token in name") | |
| return [f"{category}: {rel}: {warning}" for warning in warnings] | |
| def value_warnings(category: str, rel: str, record: dict[str, Any]) -> list[str]: | |
| warnings: list[str] = [] | |
| if category == "cpu": | |
| cores = record.get("cores") | |
| threads = record.get("threads") | |
| base = record.get("base_clock_ghz") | |
| boost = record.get("boost_clock_ghz") | |
| if isinstance(cores, int) and isinstance(threads, int) and threads < cores: | |
| warnings.append("threads < cores") | |
| if ( | |
| isinstance(base, (int, float)) | |
| and isinstance(boost, (int, float)) | |
| and boost > 0 | |
| and base > 0 | |
| and boost < base | |
| ): | |
| warnings.append("boost clock below base clock") | |
| arch = record.get("architecture") | |
| if isinstance(arch, str) and re.search(r"\b(unknown|n/a|tbd|null)\b", arch, re.I): | |
| warnings.append("placeholder-like architecture") | |
| if category == "gpu": | |
| base = record.get("base_clock_mhz") | |
| boost = record.get("boost_clock_mhz") | |
| if ( | |
| isinstance(base, (int, float)) | |
| and isinstance(boost, (int, float)) | |
| and boost > 0 | |
| and base > 0 | |
| and boost < base | |
| ): | |
| warnings.append("boost clock below base clock") | |
| return [f"{category}: {rel}: {warning}" for warning in warnings] | |
| LOW_VERIFIED_WARNING_PCT = 50.0 | |
| stats_lines: list[str] = [] | |
| stats_lines.append("## Data summary") | |
| stats_lines.append("") | |
| stats_lines.append("| Category | Total | Verified | Unverified | Missing verified | Tracked | Verified % of tracked |") | |
| stats_lines.append("| --- | ---: | ---: | ---: | ---: | ---: | ---: |") | |
| total_all = verified_all = unverified_all = missing_verified_all = 0 | |
| by_category: dict[str, dict[str, int]] = {} | |
| low_verified_categories: list[tuple[str, float, int, int]] = [] | |
| for category in CATEGORIES: | |
| paths = rel_jsons(HEAD, category) | |
| verified = unverified = missing_verified = 0 | |
| for path in paths.values(): | |
| value = verified_value(load_json(path)) | |
| if value is True: | |
| verified += 1 | |
| elif value is False: | |
| unverified += 1 | |
| else: | |
| missing_verified += 1 | |
| total = len(paths) | |
| tracked = verified + unverified | |
| pct_value = verified / tracked * 100 if tracked else None | |
| pct = f"{pct_value:.1f}%" if pct_value is not None else "n/a" | |
| if pct_value is not None and pct_value < LOW_VERIFIED_WARNING_PCT: | |
| low_verified_categories.append((category, pct_value, verified, tracked)) | |
| by_category[category] = { | |
| "total": total, | |
| "verified": verified, | |
| "unverified": unverified, | |
| "missing_verified": missing_verified, | |
| } | |
| total_all += total | |
| verified_all += verified | |
| unverified_all += unverified | |
| missing_verified_all += missing_verified | |
| stats_lines.append( | |
| f"| {category} | {total} | {verified} | {unverified} | {missing_verified} | {tracked} | {pct} |" | |
| ) | |
| tracked_all = verified_all + unverified_all | |
| pct_all_value = verified_all / tracked_all * 100 if tracked_all else None | |
| pct_all = f"{pct_all_value:.1f}%" if pct_all_value is not None else "n/a" | |
| stats_lines.append( | |
| f"| **all** | **{total_all}** | **{verified_all}** | **{unverified_all}** | " | |
| f"**{missing_verified_all}** | **{tracked_all}** | **{pct_all}** |" | |
| ) | |
| if pct_all_value is not None and pct_all_value < LOW_VERIFIED_WARNING_PCT: | |
| low_verified_categories.append(("all", pct_all_value, verified_all, tracked_all)) | |
| if low_verified_categories: | |
| low_verified_categories.sort(key=lambda item: item[1]) | |
| coverage_list = ", ".join( | |
| f"{category} {pct:.1f}% ({verified}/{tracked})" | |
| for category, pct, verified, tracked in low_verified_categories[:8] | |
| ) | |
| if len(low_verified_categories) > 8: | |
| coverage_list += f", and {len(low_verified_categories) - 8} more" | |
| stats_lines.append("") | |
| stats_lines.append("> [!WARNING]") | |
| stats_lines.append( | |
| f"> Tracked verified coverage is below {LOW_VERIFIED_WARNING_PCT:.0f}% for {coverage_list}." | |
| ) | |
| stats_lines.append( | |
| "> Tracked coverage excludes records missing the `verified` field; see the `Missing verified` column for those records." | |
| ) | |
| stats_lines.append( | |
| "> This does not fail validation. Keep imported records `verified: false` until " | |
| "manual audit, but treat this as follow-up verification work before relying on " | |
| "the affected categories as curated data." | |
| ) | |
| change_lines: list[str] = [] | |
| change_lines.append("## Changed data") | |
| change_lines.append("") | |
| change_lines.append("| Category | Added | Modified | Deleted | Added verified | Added unverified | Added Kaggle-sourced |") | |
| change_lines.append("| --- | ---: | ---: | ---: | ---: | ---: | ---: |") | |
| all_added: list[tuple[str, str, Path]] = [] | |
| changed_by_category = changed_data_from_git() | |
| for category in CATEGORIES: | |
| added_keys = changed_by_category[category]["added"] | |
| modified_keys = changed_by_category[category]["modified"] | |
| deleted_keys = changed_by_category[category]["deleted"] | |
| added_verified = added_unverified = added_kaggle = 0 | |
| for key in added_keys: | |
| path = HEAD / key | |
| record = load_json(path) | |
| all_added.append((category, key, path)) | |
| if verified_value(record) is True: | |
| added_verified += 1 | |
| elif verified_value(record) is False: | |
| added_unverified += 1 | |
| if has_kaggle_source(record): | |
| added_kaggle += 1 | |
| change_lines.append( | |
| f"| {category} | {len(added_keys)} | {len(modified_keys)} | {len(deleted_keys)} | " | |
| f"{added_verified} | {added_unverified} | {added_kaggle} |" | |
| ) | |
| def display_record(root: Path, rel: str) -> str: | |
| record = load_json(root / rel) | |
| name = record.get("name") | |
| label = name if isinstance(name, str) and name else rel | |
| return f"`{rel}` - {label}" | |
| def append_examples(title: str, root: Path, keys: list[str], limit: int = 15) -> None: | |
| if not keys: | |
| return | |
| change_lines.append("") | |
| change_lines.append(f"### {title}") | |
| for rel in keys[:limit]: | |
| change_lines.append(f"- {display_record(root, rel)}") | |
| if len(keys) > limit: | |
| change_lines.append(f"- ... {len(keys) - limit} more") | |
| change_lines.append("") | |
| change_lines.append("## Changed record examples") | |
| for category, changes in changed_by_category.items(): | |
| append_examples(f"{category} added", HEAD, changes["added"]) | |
| append_examples(f"{category} modified", HEAD, changes["modified"]) | |
| append_examples(f"{category} deleted", BASE, changes["deleted"]) | |
| if not any( | |
| changes["added"] or changes["modified"] or changes["deleted"] | |
| for changes in changed_by_category.values() | |
| ): | |
| change_lines.append("") | |
| change_lines.append("- No data file changes detected.") | |
| site_lines: list[str] = [] | |
| site_lines.append("## Changed site") | |
| site_lines.append("") | |
| head_site = rel_site_files(Path("TechAPI")) | |
| base_site = rel_site_files(Path("TechAPI-main")) | |
| site_added = sorted(set(head_site) - set(base_site)) | |
| site_deleted = sorted(set(base_site) - set(head_site)) | |
| site_modified = sorted( | |
| key for key in set(head_site) & set(base_site) if digest(head_site[key]) != digest(base_site[key]) | |
| ) | |
| site_lines.append("| Area | Added | Modified | Deleted |") | |
| site_lines.append("| --- | ---: | ---: | ---: |") | |
| site_lines.append(f"| homepage/site | {len(site_added)} | {len(site_modified)} | {len(site_deleted)} |") | |
| if site_added or site_modified or site_deleted: | |
| def append_site_examples(title: str, keys: list[str], limit: int = 20) -> None: | |
| if not keys: | |
| return | |
| site_lines.append("") | |
| site_lines.append(f"### {title}") | |
| for rel in keys[:limit]: | |
| site_lines.append(f"- `{rel}`") | |
| if len(keys) > limit: | |
| site_lines.append(f"- ... {len(keys) - limit} more") | |
| append_site_examples("Added site files", site_added) | |
| append_site_examples("Modified site files", site_modified) | |
| append_site_examples("Deleted site files", site_deleted) | |
| else: | |
| site_lines.append("") | |
| site_lines.append("- No homepage/site file changes detected.") | |
| change_lines.append("") | |
| change_lines.append("## Heuristic review") | |
| change_lines.append("") | |
| warnings: list[str] = [] | |
| manufacturer_counter: Counter[str] = Counter() | |
| source_counter: Counter[str] = Counter() | |
| for category, rel, path in all_added: | |
| record = load_json(path) | |
| manufacturer = record.get("manufacturer") or record.get("brand") | |
| if isinstance(manufacturer, str): | |
| manufacturer_counter[manufacturer] += 1 | |
| if has_kaggle_source(record): | |
| source_counter["kaggle"] += 1 | |
| else: | |
| source_counter["other"] += 1 | |
| warnings.extend(name_warnings(category, rel, record)) | |
| warnings.extend(value_warnings(category, rel, record)) | |
| if manufacturer_counter: | |
| top = ", ".join(f"{name}: {count}" for name, count in manufacturer_counter.most_common(8)) | |
| change_lines.append(f"- Added records by manufacturer/brand: {top}") | |
| if source_counter: | |
| top = ", ".join(f"{name}: {count}" for name, count in source_counter.most_common()) | |
| change_lines.append(f"- Added records by source class: {top}") | |
| if warnings: | |
| change_lines.append(f"- Heuristic warnings: {len(warnings)} total; showing first {min(MAX_WARNINGS, len(warnings))}.") | |
| change_lines.append("") | |
| for warning in warnings[:MAX_WARNINGS]: | |
| change_lines.append(f" - {warning}") | |
| else: | |
| change_lines.append("- Heuristic warnings: none found.") | |
| Path("change-review.md").write_text("\n".join(change_lines) + "\n", encoding="utf-8") | |
| Path("site-change-review.md").write_text("\n".join(site_lines) + "\n", encoding="utf-8") | |
| Path("data-stats.md").write_text("\n".join(stats_lines) + "\n", encoding="utf-8") | |
| PY | |
| - name: Build PR comment | |
| shell: bash | |
| run: | | |
| short_sha="${TECHAPI_HEAD_SHA:0:7}" | |
| result="PASS" | |
| site_changed="${{ steps.site_changes.outputs.changed }}" | |
| site_build_status="${{ steps.site_build.outputs.status }}" | |
| if [ "${{ steps.validate.outputs.status }}" != "success" ] || { [ "${site_changed}" = "true" ] && [ "${site_build_status}" != "0" ]; }; then | |
| result="FAIL" | |
| fi | |
| VALIDATION_STATUS="${{ steps.validate.outputs.status }}" SITE_CHANGED="${{ steps.site_changes.outputs.changed }}" SITE_BUILD_STATUS="${{ steps.site_build.outputs.status }}" python - <<'PY' | |
| from __future__ import annotations | |
| import os | |
| import re | |
| from pathlib import Path | |
| log_path = Path("validation.log") | |
| log = log_path.read_text(encoding="utf-8", errors="replace") | |
| lines = log.splitlines() | |
| failed = os.environ.get("VALIDATION_STATUS") != "success" | |
| site_changed = os.environ.get("SITE_CHANGED") == "true" | |
| site_failed = site_changed and os.environ.get("SITE_BUILD_STATUS") != "0" | |
| section_counts: dict[str, int] = {} | |
| current_section: str | None = None | |
| key_lines: list[str] = [] | |
| hard_lines: list[str] = [] | |
| for line in lines: | |
| if line.startswith("## ") or line.startswith("loaded ") or "integrity gate:" in line: | |
| key_lines.append(line) | |
| if line.startswith("### "): | |
| current_section = line.removeprefix("### ").strip() | |
| section_counts[current_section] = 0 | |
| continue | |
| if current_section and line.startswith(" "): | |
| section_counts[current_section] += 1 | |
| if any(token in line for token in ("DUP ", "slug!=file", " > ")): | |
| hard_lines.append(line) | |
| out: list[str] = [] | |
| out.append("## Validation notes") | |
| out.append("") | |
| out.append("- Full advisory outlier listings are suppressed on successful runs because they are dataset-wide and mostly stable between PRs.") | |
| out.append("- Failure runs still include a detailed log excerpt for debugging.") | |
| if key_lines: | |
| out.append("") | |
| out.append("Key output:") | |
| out.append("") | |
| out.append("```text") | |
| out.extend(key_lines) | |
| out.append("```") | |
| if section_counts: | |
| out.append("") | |
| out.append("| Integrity section | Flagged lines |") | |
| out.append("| --- | ---: |") | |
| for name, count in section_counts.items(): | |
| out.append(f"| {name} | {count} |") | |
| if hard_lines: | |
| out.append("") | |
| out.append("Potential blocking lines:") | |
| out.append("") | |
| out.append("```text") | |
| out.extend(hard_lines[:80]) | |
| if len(hard_lines) > 80: | |
| out.append(f"... {len(hard_lines) - 80} more") | |
| out.append("```") | |
| if failed: | |
| out.append("") | |
| out.append("<details><summary>Detailed validation log excerpt</summary>") | |
| out.append("") | |
| out.append("```text") | |
| excerpt = log[-12000:] if len(log) > 12000 else log | |
| out.append(excerpt.rstrip()) | |
| out.append("```") | |
| out.append("") | |
| out.append("</details>") | |
| if site_changed: | |
| site_log_path = Path("site-build.log") | |
| site_log = site_log_path.read_text(encoding="utf-8", errors="replace") if site_log_path.exists() else "" | |
| site_log = re.sub(r"\x1b\[[0-9;]*m", "", site_log) | |
| if site_failed: | |
| out.append("") | |
| out.append("<details><summary>Detailed homepage build log excerpt</summary>") | |
| out.append("") | |
| out.append("```text") | |
| excerpt = site_log[-12000:] if len(site_log) > 12000 else site_log | |
| out.append(excerpt.rstrip()) | |
| out.append("```") | |
| out.append("") | |
| out.append("</details>") | |
| elif site_log: | |
| summary = [line for line in site_log.splitlines() if "Complete!" in line or "page(s) built" in line] | |
| if summary: | |
| out.append("") | |
| out.append("Homepage build:") | |
| out.append("") | |
| out.append("```text") | |
| out.extend(summary[-4:]) | |
| out.append("```") | |
| Path("validation-notes.md").write_text("\n".join(out) + "\n", encoding="utf-8") | |
| PY | |
| { | |
| echo "<!-- techengine-pr-validation -->" | |
| echo "## TechEngine change review: ${result}" | |
| echo | |
| echo "- PR: #${TECHAPI_PR_NUMBER}" | |
| echo "- Ref: \`${TECHAPI_HEAD_REF:-detached}\`" | |
| echo "- Commit: \`${short_sha}\`" | |
| echo "- Requested by: @${REQUESTED_BY}" | |
| echo "- Run: ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" | |
| echo | |
| echo "| Check | Result |" | |
| echo "| --- | --- |" | |
| echo "| \`python -m app.validate\` | $([ "${{ steps.validate.outputs.app_status }}" = "0" ] && echo PASS || echo FAIL) |" | |
| echo "| \`python integrity_check.py TechAPI/data --strict\` | $([ "${{ steps.validate.outputs.integrity_status }}" = "0" ] && echo PASS || echo FAIL) |" | |
| if [ "${site_changed}" = "true" ]; then | |
| echo "| \`cd TechAPI/site && npm ci && npm run build\` | $([ "${site_build_status}" = "0" ] && echo PASS || echo FAIL) |" | |
| fi | |
| echo | |
| cat change-review.md | |
| if [ "${site_changed}" = "true" ]; then | |
| echo | |
| cat site-change-review.md | |
| fi | |
| } > change-comment.md | |
| { | |
| echo "<!-- techengine-pr-validation-stats -->" | |
| echo "## TechEngine validation stats: ${result}" | |
| echo | |
| echo "- PR: #${TECHAPI_PR_NUMBER}" | |
| echo "- Ref: \`${TECHAPI_HEAD_REF:-detached}\`" | |
| echo "- Commit: \`${short_sha}\`" | |
| echo "- Run: ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" | |
| echo | |
| cat data-stats.md | |
| echo | |
| cat validation-notes.md | |
| } > stats-comment.md | |
| - name: Comment on TechAPI PR | |
| if: env.TECHAPI_COMMENT_TOKEN != '' | |
| env: | |
| GH_TOKEN: ${{ env.TECHAPI_COMMENT_TOKEN }} | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| upsert_comment() { | |
| local marker="$1" | |
| local body_file="$2" | |
| local payload_file="$3" | |
| local comment_id | |
| comment_id="$(gh api "repos/GetTechAPI/TechAPI/issues/${TECHAPI_PR_NUMBER}/comments" --paginate \ | |
| --jq ".[] | select(.body | contains(\"${marker}\")) | .id" | tail -n 1)" | |
| jq -n --rawfile body "$body_file" '{body: $body}' > "$payload_file" | |
| if [ -n "$comment_id" ]; then | |
| gh api "repos/GetTechAPI/TechAPI/issues/comments/${comment_id}" \ | |
| --method PATCH \ | |
| --input "$payload_file" | |
| else | |
| gh api "repos/GetTechAPI/TechAPI/issues/${TECHAPI_PR_NUMBER}/comments" \ | |
| --method POST \ | |
| --input "$payload_file" | |
| fi | |
| } | |
| upsert_comment "<!-- techengine-pr-validation -->" change-comment.md change-comment.json | |
| upsert_comment "<!-- techengine-pr-validation-stats -->" stats-comment.md stats-comment.json | |
| - name: Warn when comment token is unset | |
| if: env.TECHAPI_COMMENT_TOKEN == '' | |
| run: echo "::warning::TECHENGINEBOT_TOKEN/TECHAPI_TOKEN is not configured; validation ran but no PR comment was posted." | |
| - name: Fail on validation errors | |
| if: steps.validate.outputs.status != 'success' || (steps.site_changes.outputs.changed == 'true' && steps.site_build.outputs.status != '0') | |
| run: exit 1 |