Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions pkg/secretsscan/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,75 @@ var rules = sync.OnceValue(func() []rule {
expression: `(?i)(dckr_pat_[-0-9a-zA-Z]{27})`,
keywords: []string{"dckr_pat"},
},

// --- Patterns added on top of the upstream Trivy / mcp-gateway
// catalogue. Each one targets a credential format whose prefix
// is unique enough to keep the keyword pre-filter cheap and the
// regex's false-positive rate low.

{
// openai-api-key. Every modern OpenAI key (project keys
// `sk-proj-…`, service-account keys `sk-svcacct-…`, admin
// keys `sk-admin-…`, and the original `sk-…` keys reissued
// after May 2024) embeds the literal substring "T3BlbkFJ"
// (base64 for "OpenAI") between two long alphanumeric runs.
// That marker keeps both the keyword filter and the regex
// extremely specific.
expression: `sk-[A-Za-z0-9_-]{20,}T3BlbkFJ[A-Za-z0-9_-]{20,}`,
keywords: []string{"T3BlbkFJ"},
},
{
// anthropic-api-key. Claude keys follow
// `sk-ant-(api|sid)NN-<base64url>` and are ~108 chars long;
// the trailing "AA" is the standard base64 padding.
expression: `sk-ant-(api|sid)\d{2}-[A-Za-z0-9_-]{93}AA`,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MEDIUM] Anthropic key regex hard-codes exact body length — brittle to format variations

The regex sk-ant-(api|sid)\d{2}-[A-Za-z0-9_-]{93}AA requires the payload section to be exactly 93 characters followed by a literal AA suffix. While the accompanying comment says keys are "~108 chars long" and AA is standard base64 padding, using a fixed length means any key that doesn't precisely match this layout will silently escape detection.

Real Anthropic API keys come in at least two observed formats (api03-… and sid01-…), and Anthropic could change the key length in a future rotation without notice. A more resilient pattern would use a minimum bound:

// Current (brittle — misses keys of any other length)
expression: `sk-ant-(api|sid)\d{2}-[A-Za-z0-9_-]{93}AA`

// More resilient alternative
expression: `sk-ant-(api|sid)\d{2}-[A-Za-z0-9_-]{90,100}AA`

If the exact 93-char length is a deliberate high-precision choice to minimise false positives, it would help to document that reasoning in the comment so future maintainers don't wonder why it's a fixed count.

keywords: []string{"sk-ant-"},
},
{
// google-api-key. Used by Maps, Cloud, Firebase, Gemini and
// most other Google REST APIs. The `AIza` prefix is fixed.
expression: `AIza[0-9A-Za-z_-]{35}`,
keywords: []string{"AIza"},
},
{
// google-oauth-client-secret. Issued in the Google Cloud
// Console for OAuth 2.0 clients; always 35 chars total.
expression: `GOCSPX-[A-Za-z0-9_-]{28}`,
keywords: []string{"GOCSPX-"},
},
{
// digitalocean-pat. v1 personal-access tokens are 71 chars
// total: `dop_v1_` + 64 lowercase hex.
expression: `dop_v1_[a-f0-9]{64}`,
keywords: []string{"dop_v1_"},
},
{
// stripe-webhook-signing-secret. Used to verify incoming
// webhook payloads; leakage lets attackers forge events.
expression: `whsec_[A-Za-z0-9]{32,}`,
keywords: []string{"whsec_"},
},
{
// jfrog-artifactory-api-key. Distinct from access tokens;
// the `AKCp` prefix is documented and the body is between
// 69 and 73 alphanumeric characters depending on when the
// key was issued.
expression: `AKCp[A-Za-z0-9]{69,73}`,
keywords: []string{"AKCp"},
},
{
// tencent-cloud-secret-id. Tencent's analogue of an AWS
// access-key-id, used by the COS / CVM / etc. APIs.
expression: `AKID[A-Za-z0-9]{32}`,
keywords: []string{"AKID"},
},
{
// sentry-user-auth-token. The `sntrys_` prefix is followed
// by a base64url-encoded JWT-style payload that always
// starts with `eyJ` (the base64 of `{"`).
expression: `sntrys_eyJ[A-Za-z0-9+/=_-]{40,}`,
keywords: []string{"sntrys_"},
},
}
})

Expand Down
17 changes: 17 additions & 0 deletions pkg/secretsscan/secrets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,28 @@ func TestContainsSecretsRecognisesKnownTokens(t *testing.T) {
}{
{"github_pat", "ghp_cxLeRrvbJfmYdUtr70xnNE3Q7Gvli43s19PD"},
{"docker_pat", "dckr_pat_" + "AAAAAAAAAAAAAAAAAAAAAAAAAAA"},
// Patterns added on top of the upstream catalogue. Each value
// is split across string concatenation so the verbatim token
// never appears on a single source line in case downstream
// tooling scans the test file itself.
{"openai_project_key", "sk-proj-" + strings.Repeat("A", 25) + "T3Blbk" + "FJ" + strings.Repeat("B", 25)},
{"anthropic_api_key", "sk-ant-" + "api03-" + strings.Repeat("X", 93) + "AA"},
{"google_api_key", "AIza" + strings.Repeat("a", 35)},
{"google_oauth_client_secret", "GOCSPX-" + strings.Repeat("a", 28)},
{"digitalocean_pat", "dop_v1_" + strings.Repeat("a", 64)},
{"stripe_webhook_signing_secret", "whsec_" + strings.Repeat("a", 40)},
{"jfrog_api_key", "AKCp" + strings.Repeat("a", 73)},
{"tencent_cloud_secret_id", "AKID" + strings.Repeat("a", 32)},
{"sentry_user_auth_token", "sntrys_" + "eyJ" + strings.Repeat("a", 60)},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
assert.Truef(t, secretsscan.ContainsSecrets(tc.text), "must detect %s", tc.name)
out := secretsscan.Redact(tc.text)
assert.NotContainsf(t, out, tc.text, "raw secret must be gone after Redact: %q", out)
assert.Containsf(t, out, secretsscan.RedactionMarker,
"redaction marker must appear in %q", out)
})
}
}
Expand Down
Loading