Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions .github/workflows/monthly-vision-eval.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ on:

permissions:
contents: write
pull-requests: write # open the auto-merge report PR

jobs:
vision-eval:
Expand All @@ -63,6 +64,11 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v6
with:
# RELEASE_PAT (not GITHUB_TOKEN) so the report PR triggers
# ci.yml's `test` check; GITHUB_TOKEN-opened PRs don't start
# workflow runs, leaving the required check pending forever.
token: ${{ secrets.RELEASE_PAT }}

- name: Setup Node (pinned)
uses: actions/setup-node@v6
Expand Down Expand Up @@ -158,23 +164,36 @@ jobs:
--critic anthropic \
--report "docs/evals/monthly/${{ env.REPORT_DATE }}-image.md"

- name: Commit all three reports
- name: Open report PR (auto-merge)
# Smoke runs prove integration; their numbers are not
# publishable (n=2 is well under any reportable threshold) so
# the smoke path skips the commit entirely. The reports still
# the smoke path skips the PR entirely. The reports still
# exist in the workflow's working tree if you need to inspect
# them via the Actions UI.
#
# main is protected (requires the `test` check), so push the
# reports to a dated branch and open a PR that auto-merges
# once ci.yml goes green.
if: inputs.smoke_mode != true
env:
GH_TOKEN: ${{ secrets.RELEASE_PAT }}
run: |
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git config user.name "github-actions[bot]"
git add docs/evals/monthly/
if git diff --cached --quiet; then
echo "No report changes to commit."
else
git commit -m "docs(evals): monthly source + vision + image-gen run ${{ env.REPORT_DATE }}" -m "Automated monthly eval across all three verticals: text source linter, text rendered-pixel vision critic, and image-generation vision critic. See the three files for per-cell numbers."
git push
exit 0
fi
BRANCH="bot/monthly-vision-eval-${REPORT_DATE}"
git checkout -b "$BRANCH"
git commit -m "docs(evals): monthly source + vision + image-gen run ${REPORT_DATE}" -m "Automated monthly eval across all three verticals: text source linter, text rendered-pixel vision critic, and image-generation vision critic. See the three files for per-cell numbers."
git push -u origin "$BRANCH" --force-with-lease
gh pr create --base main --head "$BRANCH" \
--title "docs(evals): monthly run ${REPORT_DATE}" \
--body "Automated monthly eval report (source + vision + image-gen). Merges automatically once CI passes." \
|| echo "PR already exists for $BRANCH"
gh pr merge "$BRANCH" --auto --squash --delete-branch

- name: Smoke-mode summary
if: inputs.smoke_mode == true
Expand Down
29 changes: 24 additions & 5 deletions .github/workflows/weekly-eval.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ on:
default: ""

permissions:
contents: write # commit the report
contents: write # push the report branch
issues: write # open a drift issue if the run moves outside tolerance
pull-requests: write # open the auto-merge report PR

jobs:
eval:
Expand All @@ -50,6 +51,12 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v6
with:
# RELEASE_PAT (not GITHUB_TOKEN) so the report PR triggers
# ci.yml's `test` check. PRs opened by GITHUB_TOKEN don't
# start workflow runs, which would leave the required check
# forever pending and auto-merge stuck.
token: ${{ secrets.RELEASE_PAT }}

- name: Setup Node
uses: actions/setup-node@v6
Expand Down Expand Up @@ -125,17 +132,29 @@ jobs:
echo "drift=stable" >> "$GITHUB_OUTPUT"
fi

- name: Commit weekly report
- name: Open report PR (auto-merge)
env:
GH_TOKEN: ${{ secrets.RELEASE_PAT }}
run: |
# main is protected (requires the `test` check), so the bot
# can't push reports directly. Instead push to a dated branch
# and open a PR that auto-merges once ci.yml goes green.
git config user.email "noreply@adastracomputing.com"
git config user.name "ahd-weekly-eval"
git add docs/evals/weekly/
if git diff --cached --quiet; then
echo "No report changes to commit."
else
git commit -m "docs(evals): weekly run ${{ env.REPORT_DATE }}" -m "Automated weekly eval against Cloudflare Workers AI OSS roster. See the report for per-cell numbers."
git push
exit 0
fi
BRANCH="bot/weekly-eval-${REPORT_DATE}"
git checkout -b "$BRANCH"
git commit -m "docs(evals): weekly run ${REPORT_DATE}" -m "Automated weekly eval against Cloudflare Workers AI OSS roster. See the report for per-cell numbers."
git push -u origin "$BRANCH" --force-with-lease
gh pr create --base main --head "$BRANCH" \
--title "docs(evals): weekly run ${REPORT_DATE}" \
--body "Automated weekly eval report. Merges automatically once CI passes." \
|| echo "PR already exists for $BRANCH"
gh pr merge "$BRANCH" --auto --squash --delete-branch

- name: Open drift issue if flagged
if: steps.drift.outputs.drift == 'flagged'
Expand Down