Skip to content

Commit 67887b9

Browse files
committed
chore(scripts): add local pre-PR secrecy linter
Mirror .github/workflows/secrecy-check.yml so banned terms are caught locally before opening/updating a PR instead of after CI fails. This addresses the repeat 4-time miss on PRs #188/#203/#207/#220 where the same manual fix had to be applied after CI blocked the PR.
1 parent 9385a9b commit 67887b9

1 file changed

Lines changed: 113 additions & 0 deletions

File tree

scripts/pre-pr-check.sh

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#!/usr/bin/env bash
2+
# scripts/pre-pr-check.sh
3+
#
4+
# Local secrecy linter for PR body / title / branch name before `gh pr create`
5+
# or `gh pr edit --body-file`. Mirrors .github/workflows/secrecy-check.yml so
6+
# failures surface locally instead of being caught by CI after the PR opens.
7+
#
8+
# Usage:
9+
# scripts/pre-pr-check.sh --body-file /tmp/body.md [--title "..."] [--branch "feat/..."]
10+
# scripts/pre-pr-check.sh --body "inline body text"
11+
# scripts/pre-pr-check.sh --stdin < body.md
12+
# scripts/pre-pr-check.sh --scan-sources # same scan CI does for firstdata/sources
13+
#
14+
# Exit code: 0 = clean, 1 = confidential term found.
15+
#
16+
# Keep the BANNED_TERMS list in sync with .github/workflows/secrecy-check.yml.
17+
set -euo pipefail
18+
19+
BANNED_TERMS=(
20+
"langfuse"
21+
"insight pipeline"
22+
"gitlab"
23+
"code.mlamp.cn"
24+
"codex.mlamp.cn"
25+
"glab"
26+
"im.deepminer"
27+
"im-test.xming"
28+
)
29+
30+
BODY=""
31+
BODY_FILE=""
32+
TITLE=""
33+
BRANCH=""
34+
SCAN_SOURCES=0
35+
READ_STDIN=0
36+
37+
usage() {
38+
sed -n '2,18p' "$0"
39+
exit 2
40+
}
41+
42+
while [[ $# -gt 0 ]]; do
43+
case "$1" in
44+
--body) BODY="$2"; shift 2 ;;
45+
--body-file) BODY_FILE="$2"; shift 2 ;;
46+
--title) TITLE="$2"; shift 2 ;;
47+
--branch) BRANCH="$2"; shift 2 ;;
48+
--stdin) READ_STDIN=1; shift ;;
49+
--scan-sources) SCAN_SOURCES=1; shift ;;
50+
-h|--help) usage ;;
51+
*) echo "Unknown arg: $1" >&2; usage ;;
52+
esac
53+
done
54+
55+
if [[ -n "$BODY_FILE" ]]; then
56+
if [[ ! -f "$BODY_FILE" ]]; then
57+
echo "::error::body file not found: $BODY_FILE" >&2
58+
exit 2
59+
fi
60+
BODY="$(cat "$BODY_FILE")"
61+
fi
62+
63+
if [[ "$READ_STDIN" -eq 1 ]]; then
64+
BODY="$(cat)"
65+
fi
66+
67+
if [[ -z "$BRANCH" ]] && command -v git >/dev/null 2>&1; then
68+
BRANCH="$(git rev-parse --abbrev-ref HEAD 2>/dev/null || true)"
69+
fi
70+
71+
found=0
72+
73+
check_field() {
74+
local label="$1"
75+
local text="$2"
76+
[[ -z "$text" ]] && return 0
77+
local lower_text
78+
lower_text=$(printf '%s' "$text" | tr '[:upper:]' '[:lower:]')
79+
for term in "${BANNED_TERMS[@]}"; do
80+
local lower_term
81+
lower_term=$(printf '%s' "$term" | tr '[:upper:]' '[:lower:]')
82+
if [[ "$lower_text" == *"$lower_term"* ]]; then
83+
echo "🔴 BLOCKED: '$term' found in $label" >&2
84+
found=1
85+
fi
86+
done
87+
}
88+
89+
check_field "branch name" "$BRANCH"
90+
check_field "PR title" "$TITLE"
91+
check_field "PR description" "$BODY"
92+
93+
if [[ "$SCAN_SOURCES" -eq 1 ]]; then
94+
repo_root="$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
95+
src_dir="$repo_root/firstdata/sources"
96+
if [[ -d "$src_dir" ]]; then
97+
for term in "${BANNED_TERMS[@]}"; do
98+
matches=$(grep -ril "$term" "$src_dir" 2>/dev/null || true)
99+
if [[ -n "$matches" ]]; then
100+
echo "🔴 '$term' found in source files:" >&2
101+
printf ' %s\n' $matches >&2
102+
found=1
103+
fi
104+
done
105+
fi
106+
fi
107+
108+
if [[ "$found" -eq 1 ]]; then
109+
echo "::error::PR metadata or sources contain confidential term(s). Rewrite before opening / updating the PR." >&2
110+
exit 1
111+
fi
112+
113+
echo "✅ Pre-PR secrecy check passed."

0 commit comments

Comments
 (0)