diff --git a/.github/workflows/github-actions-on-label-create.yml b/.github/workflows/github-actions-on-label-create.yml index 6b809642b11..f52d1b69608 100644 --- a/.github/workflows/github-actions-on-label-create.yml +++ b/.github/workflows/github-actions-on-label-create.yml @@ -88,3 +88,86 @@ jobs: env: GITHUB_TOKEN: ${{ github.token }} UPSTREAM_PR: ${{ steps.send_pr.outputs.pr }} + + - name: Request CODEOWNERS reviewers on upstream PR + if: steps.send_pr.outputs.pr != '' + env: + GH_TOKEN: ${{ steps.resolve_token.outputs.token }} + PR: ${{ steps.send_pr.outputs.pr }} + UPSTREAM: ${{ env.UPSTREAM_OWNER }}/${{ env.UPSTREAM_REPO }} + run: | + set -euo pipefail + # System Python on the runner is PEP 668 externally-managed; use a venv. + python3 -m venv /tmp/codeowners-venv + /tmp/codeowners-venv/bin/pip install --quiet pathspec + /tmp/codeowners-venv/bin/python <<'PY' + import base64, json, os, subprocess, sys + from pathspec import GitIgnoreSpec + + pr = os.environ["PR"] + upstream = os.environ["UPSTREAM"] + + pr_json = subprocess.check_output( + ["gh", "api", f"repos/{upstream}/pulls/{pr}"], text=True) + pr_data = json.loads(pr_json) + author = pr_data["user"]["login"] + base_ref = pr_data["base"]["ref"] + + # Authoritative CODEOWNERS is the one on the PR base branch. + raw = subprocess.check_output( + ["gh", "api", "--method", "GET", + f"repos/{upstream}/contents/.github/CODEOWNERS", + "-f", f"ref={base_ref}", "--jq", ".content"], text=True).strip() + codeowners = base64.b64decode(raw).decode() + + rules = [] + for line in codeowners.splitlines(): + line = line.split("#", 1)[0].strip() + if not line: + continue + pattern, *rule_owners = line.split() + rules.append((GitIgnoreSpec.from_lines([pattern]), rule_owners)) + + # GitHub caps /pulls/{n}/files at 3000 even with --paginate; truly + # enormous PRs will under-request owners for the overflow. + files = subprocess.check_output( + ["gh", "api", f"repos/{upstream}/pulls/{pr}/files", "--paginate", + "--jq", ".[].filename"], text=True).splitlines() + + owners = set() + for path in files: + matched = None + for spec, rule_owners in rules: + if spec.match_file(path): + matched = rule_owners # last match wins + if matched: + owners.update(o.lstrip("@") for o in matched) + + # CODEOWNERS lists teams as "org/slug"; the REST endpoint wants the + # bare slug in team_reviewers. + team_slugs = sorted(t.split("/", 1)[1] for t in owners if "/" in t) + users = sorted(o for o in owners + if "/" not in o and o.lower() != author.lower()) + + if not (team_slugs or users): + print("No CODEOWNERS-matched reviewers.") + sys.exit(0) + + # Use the REST POST endpoint directly: `gh pr edit --add-reviewer` + # runs a GraphQL query that needs read:org, which our tokens don't + # have. POST /pulls/{n}/requested_reviewers only writes, so the + # existing repo / pull-requests:write scope is enough. Each array + # is capped at 15 per call. + def request(body): + print("Requesting:", body) + subprocess.run( + ["gh", "api", "--method", "POST", + f"repos/{upstream}/pulls/{pr}/requested_reviewers", + "--input", "-"], + input=json.dumps(body), text=True, check=True) + + for i in range(0, len(team_slugs), 15): + request({"team_reviewers": team_slugs[i:i + 15]}) + for i in range(0, len(users), 15): + request({"reviewers": users[i:i + 15]}) + PY