diff --git a/internal/topsrv/nginx/log.go b/internal/topsrv/nginx/log.go index e37d244..ae9df57 100644 --- a/internal/topsrv/nginx/log.go +++ b/internal/topsrv/nginx/log.go @@ -635,8 +635,13 @@ func (c *LogCollector) recordLine(p *ParsedLine) { //nolint:gocognit,nestif func normalizeURI(request string) string { parts := strings.SplitN(request, " ", 3) + // A well-formed $request is "METHOD URI HTTP/x.y" — at least one space. + // Binary handshakes (TLS ClientHello hitting an HTTP port, SSH probes, etc.) + // arrive as a single space-less blob; without this guard they bypassed + // normalizePath entirely and pushed raw bytes into the uri label, blowing + // past Prometheus' 256-byte cap. if len(parts) < 2 { - return request + return invalidMarker } path := parts[1] if i := strings.IndexByte(path, '?'); i >= 0 { diff --git a/internal/topsrv/nginx/nginx_test.go b/internal/topsrv/nginx/nginx_test.go index 8c9df03..bcb5da4 100644 --- a/internal/topsrv/nginx/nginx_test.go +++ b/internal/topsrv/nginx/nginx_test.go @@ -551,9 +551,16 @@ func TestNormalizeURI(t *testing.T) { {"GET /people/tommy-brewster-6401345/ HTTP/1.1", "/people/:slug/"}, // hex hashes in media URLs — truncated by depth {"GET /media/roles/e/ef/d763d0a80cfe86a9fcc378db4d5935bf.jpg HTTP/1.1", "/media/roles/:rest"}, + // Garbage $request with no spaces (TLS handshake hitting plain HTTP port, + // raw SSH probe, etc.) must NOT bypass normalization and pollute the uri + // label — pre-0.1.3 it was returned verbatim and blew past the 256-byte + // Prometheus label cap downstream. + {"\x06\x00\x00\x003_xgDcA\x00\x00\x00\x00\x00\x00\x00\xB0(\xC7\xEF~", invalidMarker}, + {"", invalidMarker}, + {"GET", invalidMarker}, } for _, tt := range tests { - assert.Equal(t, tt.want, normalizeURI(tt.request), tt.request) + assert.Equal(t, tt.want, normalizeURI(tt.request), "%q", tt.request) } }