|
| 1 | +#!/usr/bin/env bash |
| 2 | +# scripts/pre-pr-check.sh |
| 3 | +# |
| 4 | +# Local secrecy linter for PR body / title / branch name before `gh pr create` |
| 5 | +# or `gh pr edit --body-file`. Mirrors .github/workflows/secrecy-check.yml so |
| 6 | +# failures surface locally instead of being caught by CI after the PR opens. |
| 7 | +# |
| 8 | +# Usage: |
| 9 | +# scripts/pre-pr-check.sh --body-file /tmp/body.md [--title "..."] [--branch "feat/..."] |
| 10 | +# scripts/pre-pr-check.sh --body "inline body text" |
| 11 | +# scripts/pre-pr-check.sh --stdin < body.md |
| 12 | +# scripts/pre-pr-check.sh --scan-sources # same scan CI does for firstdata/sources |
| 13 | +# |
| 14 | +# Exit code: 0 = clean, 1 = confidential term found. |
| 15 | +# |
| 16 | +# Keep the BANNED_TERMS list in sync with .github/workflows/secrecy-check.yml. |
| 17 | +set -euo pipefail |
| 18 | + |
| 19 | +BANNED_TERMS=( |
| 20 | + "langfuse" |
| 21 | + "insight pipeline" |
| 22 | + "gitlab" |
| 23 | + "code.mlamp.cn" |
| 24 | + "codex.mlamp.cn" |
| 25 | + "glab" |
| 26 | + "im.deepminer" |
| 27 | + "im-test.xming" |
| 28 | +) |
| 29 | + |
| 30 | +BODY="" |
| 31 | +BODY_FILE="" |
| 32 | +TITLE="" |
| 33 | +BRANCH="" |
| 34 | +SCAN_SOURCES=0 |
| 35 | +READ_STDIN=0 |
| 36 | + |
| 37 | +usage() { |
| 38 | + sed -n '2,18p' "$0" |
| 39 | + exit 2 |
| 40 | +} |
| 41 | + |
| 42 | +while [[ $# -gt 0 ]]; do |
| 43 | + case "$1" in |
| 44 | + --body) BODY="$2"; shift 2 ;; |
| 45 | + --body-file) BODY_FILE="$2"; shift 2 ;; |
| 46 | + --title) TITLE="$2"; shift 2 ;; |
| 47 | + --branch) BRANCH="$2"; shift 2 ;; |
| 48 | + --stdin) READ_STDIN=1; shift ;; |
| 49 | + --scan-sources) SCAN_SOURCES=1; shift ;; |
| 50 | + -h|--help) usage ;; |
| 51 | + *) echo "Unknown arg: $1" >&2; usage ;; |
| 52 | + esac |
| 53 | +done |
| 54 | + |
| 55 | +if [[ -n "$BODY_FILE" ]]; then |
| 56 | + if [[ ! -f "$BODY_FILE" ]]; then |
| 57 | + echo "::error::body file not found: $BODY_FILE" >&2 |
| 58 | + exit 2 |
| 59 | + fi |
| 60 | + BODY="$(cat "$BODY_FILE")" |
| 61 | +fi |
| 62 | + |
| 63 | +if [[ "$READ_STDIN" -eq 1 ]]; then |
| 64 | + BODY="$(cat)" |
| 65 | +fi |
| 66 | + |
| 67 | +if [[ -z "$BRANCH" ]] && command -v git >/dev/null 2>&1; then |
| 68 | + BRANCH="$(git rev-parse --abbrev-ref HEAD 2>/dev/null || true)" |
| 69 | +fi |
| 70 | + |
| 71 | +found=0 |
| 72 | + |
| 73 | +check_field() { |
| 74 | + local label="$1" |
| 75 | + local text="$2" |
| 76 | + [[ -z "$text" ]] && return 0 |
| 77 | + local lower_text |
| 78 | + lower_text=$(printf '%s' "$text" | tr '[:upper:]' '[:lower:]') |
| 79 | + for term in "${BANNED_TERMS[@]}"; do |
| 80 | + local lower_term |
| 81 | + lower_term=$(printf '%s' "$term" | tr '[:upper:]' '[:lower:]') |
| 82 | + if [[ "$lower_text" == *"$lower_term"* ]]; then |
| 83 | + echo "🔴 BLOCKED: '$term' found in $label" >&2 |
| 84 | + found=1 |
| 85 | + fi |
| 86 | + done |
| 87 | +} |
| 88 | + |
| 89 | +check_field "branch name" "$BRANCH" |
| 90 | +check_field "PR title" "$TITLE" |
| 91 | +check_field "PR description" "$BODY" |
| 92 | + |
| 93 | +if [[ "$SCAN_SOURCES" -eq 1 ]]; then |
| 94 | + repo_root="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" |
| 95 | + src_dir="$repo_root/firstdata/sources" |
| 96 | + if [[ -d "$src_dir" ]]; then |
| 97 | + for term in "${BANNED_TERMS[@]}"; do |
| 98 | + matches=$(grep -ril "$term" "$src_dir" 2>/dev/null || true) |
| 99 | + if [[ -n "$matches" ]]; then |
| 100 | + echo "🔴 '$term' found in source files:" >&2 |
| 101 | + printf ' %s\n' $matches >&2 |
| 102 | + found=1 |
| 103 | + fi |
| 104 | + done |
| 105 | + fi |
| 106 | +fi |
| 107 | + |
| 108 | +if [[ "$found" -eq 1 ]]; then |
| 109 | + echo "::error::PR metadata or sources contain confidential term(s). Rewrite before opening / updating the PR." >&2 |
| 110 | + exit 1 |
| 111 | +fi |
| 112 | + |
| 113 | +echo "✅ Pre-PR secrecy check passed." |
0 commit comments