weekly-ingest #10
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: weekly-ingest | |
| # Weekly: scrape upstream catalogs, draft missing SKUs into a TechAPI worktree, | |
| # open a PR for curator review. | |
| on: | |
| schedule: | |
| - cron: "29 6 * * 1" # Mondays 06:29 UTC (weekly to keep curator review load sane) | |
| workflow_dispatch: | |
| inputs: | |
| category: | |
| description: "Category to ingest" | |
| type: choice | |
| options: [cpu, gpu, smartphone] | |
| default: cpu | |
| limit: | |
| description: "Max candidates per source" | |
| type: string | |
| default: "50" | |
| include_drafts: | |
| description: "Write incomplete records too (PR marked as draft)" | |
| type: boolean | |
| default: false | |
| permissions: | |
| contents: read | |
| jobs: | |
| ingest: | |
| runs-on: ubuntu-latest | |
| env: | |
| CATEGORY: ${{ inputs.category || 'cpu' }} | |
| LIMIT: ${{ inputs.limit || '50' }} | |
| INCLUDE_DRAFTS: ${{ inputs.include_drafts || 'false' }} | |
| TECHAPI_TOKEN: ${{ secrets.TECHAPI_TOKEN }} | |
| TECHAPI_WRITE_TOKEN: ${{ secrets.TECHENGINEBOT_TOKEN || secrets.TECHAPI_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| # Use the PAT when present so we can push to TechAPI later; | |
| # fall back to the default token for read-only test runs. | |
| - uses: actions/checkout@v4 | |
| with: | |
| repository: GetTechAPI/TechAPI | |
| path: TechAPI | |
| token: ${{ secrets.TECHENGINEBOT_TOKEN || secrets.TECHAPI_TOKEN || secrets.GITHUB_TOKEN }} | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| cache: pip | |
| - name: Install | |
| run: pip install -e . | |
| - name: Run ingest | |
| env: | |
| TECHAPI_DATA_DIR: ${{ github.workspace }}/TechAPI/data | |
| run: | | |
| DRAFTS_FLAG="" | |
| if [ "$INCLUDE_DRAFTS" = "true" ]; then | |
| DRAFTS_FLAG="--include-drafts" | |
| fi | |
| python -m app.ingest \ | |
| --category "$CATEGORY" \ | |
| --limit "$LIMIT" \ | |
| --data-root TechAPI/data \ | |
| --summary ingest-summary.md \ | |
| $DRAFTS_FLAG | |
| # Variant-safe benchmark backfill on existing CPU records (PassMark). | |
| # CPU-only; never overwrites, only fills nulls on exact heading matches. | |
| # Non-fatal: a scrape hiccup must not sink the weekly ingest PR. | |
| - name: Enrich benchmarks (PassMark, cpu only) | |
| if: env.CATEGORY == 'cpu' | |
| continue-on-error: true | |
| env: | |
| TECHAPI_DATA_DIR: ${{ github.workspace }}/TechAPI/data | |
| run: | | |
| python -m app.ingest.enrich \ | |
| --data-root TechAPI/data \ | |
| --limit "$LIMIT" \ | |
| --min-year 2008 \ | |
| --sleep 0.5 \ | |
| --summary enrich-summary.md | |
| - name: Combine summaries for PR body | |
| run: | | |
| cp ingest-summary.md pr-body.md | |
| if [ -f enrich-summary.md ]; then | |
| printf '\n\n---\n\n' >> pr-body.md | |
| cat enrich-summary.md >> pr-body.md | |
| fi | |
| - name: Upload summary artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ingest-summary | |
| path: | | |
| ingest-summary.md | |
| enrich-summary.md | |
| pr-body.md | |
| - name: Check whether ingest produced any additions | |
| id: changes | |
| run: | | |
| cd TechAPI | |
| if [ -n "$(git status --porcelain)" ]; then | |
| echo "has_changes=true" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "has_changes=false" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Open PR against TechAPI | |
| if: steps.changes.outputs.has_changes == 'true' | |
| env: | |
| GH_TOKEN: ${{ env.TECHAPI_WRITE_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| if [ -z "${GH_TOKEN:-}" ]; then | |
| echo "::warning::Ingest produced additions but TECHAPI_TOKEN is unset; skipping PR. Summary attached as artifact." | |
| exit 0 | |
| fi | |
| cd TechAPI | |
| BRANCH="ingest/${CATEGORY}-$(date -u +%Y%m%d-%H%M%S)" | |
| git config user.name "TechEngineBot" | |
| git config user.email "289859915+TechEngineBot@users.noreply.github.com" | |
| git checkout -b "$BRANCH" | |
| git add data/ | |
| git commit -m "feat(data/${CATEGORY}): weekly ingest" | |
| git push -u origin "$BRANCH" | |
| DRAFT_FLAG="" | |
| if [ "$INCLUDE_DRAFTS" = "true" ]; then | |
| DRAFT_FLAG="--draft" | |
| fi | |
| gh pr create \ | |
| --title "feat(data/${CATEGORY}): weekly ingest" \ | |
| --body-file ../pr-body.md \ | |
| --base main \ | |
| --head "$BRANCH" \ | |
| $DRAFT_FLAG |