Skip to content

weekly-ingest

weekly-ingest #4

Workflow file for this run

name: weekly-ingest
# Weekly: scrape upstream catalogs, draft missing SKUs into a TechAPI worktree,
# open a PR for curator review.
on:
schedule:
- cron: "29 6 * * 1" # Mondays 06:29 UTC, after coverage-report (06:23)
workflow_dispatch:
inputs:
category:
description: "Category to ingest"
type: choice
options: [cpu, gpu, smartphone]
default: cpu
limit:
description: "Max candidates per source"
type: string
default: "50"
include_drafts:
description: "Write incomplete records too (PR marked as draft)"
type: boolean
default: false
permissions:
contents: read
jobs:
ingest:
runs-on: ubuntu-latest
env:
CATEGORY: ${{ inputs.category || 'cpu' }}
LIMIT: ${{ inputs.limit || '50' }}
INCLUDE_DRAFTS: ${{ inputs.include_drafts || 'false' }}
TECHAPI_PR_TOKEN: ${{ secrets.TECHAPI_PR_TOKEN }}
steps:
- uses: actions/checkout@v4
# Use the PAT when present so we can push to TechAPI later;
# fall back to the default token for read-only test runs.
- uses: actions/checkout@v4
with:
repository: Seungpyo1007/TechAPI
path: TechAPI
token: ${{ secrets.TECHAPI_PR_TOKEN || secrets.GITHUB_TOKEN }}
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip
- name: Install
run: pip install -e .
- name: Run ingest
env:
TECHAPI_DATA_DIR: ${{ github.workspace }}/TechAPI/data
run: |
DRAFTS_FLAG=""
if [ "$INCLUDE_DRAFTS" = "true" ]; then
DRAFTS_FLAG="--include-drafts"
fi
python -m app.ingest \
--category "$CATEGORY" \
--limit "$LIMIT" \
--data-root TechAPI/data \
--summary ingest-summary.md \
$DRAFTS_FLAG
- name: Upload summary artifact
uses: actions/upload-artifact@v4
with:
name: ingest-summary
path: ingest-summary.md
- name: Check whether ingest produced any additions
id: changes
run: |
cd TechAPI
if [ -n "$(git status --porcelain)" ]; then
echo "has_changes=true" >> "$GITHUB_OUTPUT"
else
echo "has_changes=false" >> "$GITHUB_OUTPUT"
fi
- name: Open PR against TechAPI
if: steps.changes.outputs.has_changes == 'true'
env:
GH_TOKEN: ${{ secrets.TECHAPI_PR_TOKEN }}
run: |
set -euo pipefail
if [ -z "${GH_TOKEN:-}" ]; then
echo "::warning::Ingest produced additions but TECHAPI_PR_TOKEN is unset; skipping PR. Summary attached as artifact."
exit 0
fi
cd TechAPI
BRANCH="ingest/${CATEGORY}-$(date -u +%Y%m%d-%H%M%S)"
git config user.name "techengine-bot"
git config user.email "techengine-bot@users.noreply.github.com"
git checkout -b "$BRANCH"
git add data/
git commit -m "feat(data/${CATEGORY}): weekly ingest"
git push -u origin "$BRANCH"
DRAFT_FLAG=""
if [ "$INCLUDE_DRAFTS" = "true" ]; then
DRAFT_FLAG="--draft"
fi
gh pr create \
--title "feat(data/${CATEGORY}): weekly ingest" \
--body-file ../ingest-summary.md \
--base main \
--head "$BRANCH" \
$DRAFT_FLAG