Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitleaks.repo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ paths = [
'''verify\.sh$''',
'''repomix-output\..*$''',
'''tests/test_npmrc_validation\.sh$''', # Contains test fixtures with fake tokens
'''tests/test_arn_detection\.sh$''', # Contains test ARNs with fake account IDs
]

regexes = [
Expand All @@ -18,4 +19,9 @@ regexes = [
# fake npm tokens used in test_npmrc_validation.sh
'''npm_abc123456789''',
'''npm_secret123''',
# fake account IDs used in test_arn_detection.sh (non-documentation examples)
'''987654321098''',
'''876543210987''',
'''765432109876''',
'''555566667777''',
]
33 changes: 33 additions & 0 deletions docs/blocked-files.md
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,42 @@ Gitleaks detects 100+ secret patterns including:
- Database connection strings
- Private keys embedded in code
- Generic high-entropy strings
- **AWS ARNs with hardcoded account IDs** (custom rule)

**This runs on ALL files**, not just the ones listed above.

### AWS ARN Detection

**Why detected:** AWS ARNs containing hardcoded 12-digit account IDs reveal sensitive infrastructure information useful for reconnaissance attacks.

| Blocked | Allowed |
|---------|---------|
| `arn:aws:iam::987654321098:role/prod` | `arn:aws:iam::${ACCOUNT_ID}:role/prod` |
| `arn:aws-us-gov:iam::876543210987:user/admin` | `arn:aws:s3:::my-bucket` (no account ID) |
| `arn:aws-cn:lambda:cn-north-1:555566667777:function:app` | `arn:aws:iam::*:role/ServiceRole` (wildcard) |

**Allowed patterns:**

- **Variable interpolation:** `${ACCOUNT_ID}`, `${AWS_ACCOUNT_ID}`, `$ACCOUNT`
- **S3 bucket ARNs:** No account ID in S3 ARN format
- **Wildcard accounts:** `arn:aws:iam::*:role/ServiceRole`
- **Documentation examples:** AWS standard example IDs (`123456789012`, `111122223333`, etc.)

**All AWS partitions detected:**
- `aws` (commercial)
- `aws-us-gov` (GovCloud)
- `aws-cn` (China)

**Alternative:** Use environment variables or parameter store for account IDs:

```bash
# Instead of:
aws iam get-role --role-name arn:aws:iam::987654321098:role/prod

# Use:
aws iam get-role --role-name "arn:aws:iam::${AWS_ACCOUNT_ID}:role/prod"
```

### Example Detection

Even if you name a file innocuously, gitleaks will detect secrets:
Expand Down
29 changes: 29 additions & 0 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,35 @@ title = "Global Developer Default (gitleaks)"
[extend]
useDefault = true

# AWS ARN detection - warns on hardcoded 12-digit account IDs
# Detects: arn:aws:iam::123456789012:user/admin
# Allows: arn:aws:iam::${ACCOUNT_ID}:user/admin (variable interpolation)
# Allows: arn:aws:s3:::bucket-name (S3 buckets have no account ID field)
# See: https://github.com/cloud-gov/caulking/issues/111
[[rules]]
id = "aws-arn-account-id"
description = "AWS ARN with hardcoded account ID (reconnaissance risk)"
regex = '''arn:aws(-us-gov|-cn)?:[a-z0-9-]+:[a-z0-9-]*:[0-9]{12}:[a-zA-Z0-9/_=+.@:-]+'''
keywords = ["arn:aws"]
tags = ["aws", "arn", "account-id"]

[rules.allowlist]
description = "Safe ARN patterns (variable interpolation, documentation)"
regexes = [
# Variable interpolation patterns
'''\$\{[^}]*ACCOUNT[^}]*\}''',
'''\$\{[^}]*AWS[^}]*\}''',
'''\$ACCOUNT''',
'''\$AWS_ACCOUNT''',
# Documentation example account IDs (AWS docs use these)
'''123456789012''',
'''111122223333''',
'''444455556666''',
'''999999999999''',
# Wildcard patterns
''':\*:''',
]

[allowlist]
description = "Global allowlist for known-safe patterns (keep small)."

Expand Down
218 changes: 218 additions & 0 deletions tests/test_arn_detection.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
#!/usr/bin/env bash
# test_arn_detection.sh
#
# Test AWS ARN detection - warns on hardcoded account IDs
# Issue: https://github.com/cloud-gov/caulking/issues/111

set -euo pipefail

ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
WRAPPER_SRC="$ROOT/hooks/hook-wrapper.sh"

tmp="$(mktemp -d "${TMPDIR:-/tmp}/caulking.arn.XXXXXX")"
cleanup() { rm -rf "$tmp" || true; }
trap cleanup EXIT

command -v gitleaks > /dev/null 2>&1 || {
echo "SKIP: gitleaks not installed"
exit 0
}

export HOME="$tmp/home"
export XDG_CONFIG_HOME="$tmp/xdg"
mkdir -p "$HOME" "$XDG_CONFIG_HOME"

repo="$tmp/repo"
mkdir -p "$repo"
cd "$repo"
git init -q
git config user.name "test"
git config user.email "test@gsa.gov"

# Create gitleaks config with ARN detection rule
mkdir -p "$XDG_CONFIG_HOME/gitleaks"
cat > "$XDG_CONFIG_HOME/gitleaks/config.toml" << 'EOF'
title = "ARN Detection Test Config"

[extend]
useDefault = true

[[rules]]
id = "aws-arn-account-id"
description = "AWS ARN with hardcoded account ID"
regex = '''arn:aws(-us-gov|-cn)?:[a-z0-9-]+:[a-z0-9-]*:[0-9]{12}:[a-zA-Z0-9/_=+.@:-]+'''
keywords = ["arn:aws"]

[rules.allowlist]
regexes = [
'''\$\{[^}]*ACCOUNT[^}]*\}''',
'''\$\{[^}]*AWS[^}]*\}''',
'''\$ACCOUNT''',
'''\$AWS_ACCOUNT''',
'''123456789012''',
'''111122223333''',
'''444455556666''',
'''999999999999''',
''':\*:''',
]
EOF

hook="$tmp/pre-commit"
cp -f "$WRAPPER_SRC" "$hook"
chmod +x "$hook"

echo "=== Test 1: Hardcoded IAM ARN should be BLOCKED ==="
cat > arn_test.txt << 'CONTENT'
# Production IAM role
arn:aws:iam::987654321098:role/prod-admin-role
CONTENT
git add arn_test.txt

"$hook" > /dev/null 2>&1 && rc=0 || rc=$?

if [[ "$rc" -eq 0 ]]; then
echo "FAIL: Hardcoded IAM ARN should have been blocked"
exit 1
fi
echo "PASS: Hardcoded IAM ARN blocked"

git reset -q HEAD -- arn_test.txt 2> /dev/null || true
rm -f arn_test.txt

echo ""
echo "=== Test 2: GovCloud ARN should be BLOCKED ==="
cat > arn_test.txt << 'CONTENT'
# GovCloud resource
arn:aws-us-gov:iam::876543210987:user/gov-admin
CONTENT
git add arn_test.txt

"$hook" > /dev/null 2>&1 && rc=0 || rc=$?

if [[ "$rc" -eq 0 ]]; then
echo "FAIL: GovCloud ARN should have been blocked"
exit 1
fi
echo "PASS: GovCloud ARN blocked"

git reset -q HEAD -- arn_test.txt 2> /dev/null || true
rm -f arn_test.txt

echo ""
echo "=== Test 3: China region ARN should be BLOCKED ==="
cat > arn_test.txt << 'CONTENT'
# China region resource
arn:aws-cn:iam::765432109876:role/cn-role
CONTENT
git add arn_test.txt

"$hook" > /dev/null 2>&1 && rc=0 || rc=$?

if [[ "$rc" -eq 0 ]]; then
echo "FAIL: China ARN should have been blocked"
exit 1
fi
echo "PASS: China ARN blocked"

git reset -q HEAD -- arn_test.txt 2> /dev/null || true
rm -f arn_test.txt

echo ""
echo "=== Test 4: Variable interpolation ARN should be ALLOWED ==="
cat > arn_test.txt << 'CONTENT'
# Safe - uses variable interpolation
arn:aws:iam::${ACCOUNT_ID}:role/service-role
arn:aws:lambda:us-east-1:${AWS_ACCOUNT_ID}:function:processor
CONTENT
git add arn_test.txt

"$hook" > /dev/null 2>&1 && rc=0 || rc=$?

if [[ "$rc" -ne 0 ]]; then
echo "FAIL: Variable interpolation ARN should have been allowed"
exit 1
fi
echo "PASS: Variable interpolation ARN allowed"

git reset -q HEAD -- arn_test.txt 2> /dev/null || true
rm -f arn_test.txt

echo ""
echo "=== Test 5: AWS documentation example account IDs should be ALLOWED ==="
cat > arn_test.txt << 'CONTENT'
# Documentation examples (AWS uses these in docs)
arn:aws:iam::123456789012:user/ExampleUser
arn:aws:iam::111122223333:role/ExampleRole
CONTENT
git add arn_test.txt

"$hook" > /dev/null 2>&1 && rc=0 || rc=$?

if [[ "$rc" -ne 0 ]]; then
echo "FAIL: Documentation example ARNs should have been allowed"
exit 1
fi
echo "PASS: Documentation example ARNs allowed"

git reset -q HEAD -- arn_test.txt 2> /dev/null || true
rm -f arn_test.txt

echo ""
echo "=== Test 6: S3 ARN (no account ID) should be ALLOWED ==="
cat > arn_test.txt << 'CONTENT'
# S3 bucket ARNs don't have account IDs
arn:aws:s3:::my-production-bucket
arn:aws:s3:::my-bucket/*
CONTENT
git add arn_test.txt

"$hook" > /dev/null 2>&1 && rc=0 || rc=$?

if [[ "$rc" -ne 0 ]]; then
echo "FAIL: S3 ARN should have been allowed"
exit 1
fi
echo "PASS: S3 ARN allowed"

git reset -q HEAD -- arn_test.txt 2> /dev/null || true
rm -f arn_test.txt

echo ""
echo "=== Test 7: Wildcard account ARN should be ALLOWED ==="
cat > arn_test.txt << 'CONTENT'
# IAM policy with wildcard
arn:aws:iam::*:role/ServiceRole
CONTENT
git add arn_test.txt

"$hook" > /dev/null 2>&1 && rc=0 || rc=$?

if [[ "$rc" -ne 0 ]]; then
echo "FAIL: Wildcard ARN should have been allowed"
exit 1
fi
echo "PASS: Wildcard ARN allowed"

git reset -q HEAD -- arn_test.txt 2> /dev/null || true
rm -f arn_test.txt

echo ""
echo "=== Test 8: Multiple services with hardcoded IDs should be BLOCKED ==="
cat > arn_test.txt << 'CONTENT'
# Multiple sensitive ARNs
arn:aws:lambda:us-east-1:555566667777:function:process-secrets
arn:aws:secretsmanager:us-west-2:555566667777:secret:api-key
arn:aws:ec2:us-east-1:555566667777:instance/i-abc123
CONTENT
git add arn_test.txt

"$hook" > /dev/null 2>&1 && rc=0 || rc=$?

if [[ "$rc" -eq 0 ]]; then
echo "FAIL: Multiple hardcoded ARNs should have been blocked"
exit 1
fi
echo "PASS: Multiple hardcoded ARNs blocked"

echo ""
echo "All ARN detection tests passed!"
Loading