From 0815bfe8ad86a953378b367f5395148fa2588bf4 Mon Sep 17 00:00:00 2001 From: Sergey Bykov Date: Tue, 19 May 2026 23:56:52 +0300 Subject: [PATCH] Fix normalizeURI bypass for binary $request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Return invalidMarker when $request has no spaces — previously the SplitN early-return handed the raw blob back to the caller, skipping every normalizePath guard (length cap, utf8 check, control-byte and \x escape checks) and letting binary land in the uri label - This surfaced in prod after 0.1.2: gatesrv kept logging label_value_too_long for topsrv_nginx_4xx_requests_total on riderhelp with samples like "\x06\x00\x00\x003_xgDcA\x00..." even though the agent reported version=0.1.2; the giveaway was no "/:invalid" bucket ever appearing in VM for that instance - Add regression cases (the riderhelp TLS-handshake sample, empty string, single token) to TestNormalizeURI so the early-return cannot regress unnoticed --- internal/topsrv/nginx/log.go | 7 ++++++- internal/topsrv/nginx/nginx_test.go | 9 ++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/internal/topsrv/nginx/log.go b/internal/topsrv/nginx/log.go index e37d244..ae9df57 100644 --- a/internal/topsrv/nginx/log.go +++ b/internal/topsrv/nginx/log.go @@ -635,8 +635,13 @@ func (c *LogCollector) recordLine(p *ParsedLine) { //nolint:gocognit,nestif func normalizeURI(request string) string { parts := strings.SplitN(request, " ", 3) + // A well-formed $request is "METHOD URI HTTP/x.y" — at least one space. + // Binary handshakes (TLS ClientHello hitting an HTTP port, SSH probes, etc.) + // arrive as a single space-less blob; without this guard they bypassed + // normalizePath entirely and pushed raw bytes into the uri label, blowing + // past Prometheus' 256-byte cap. if len(parts) < 2 { - return request + return invalidMarker } path := parts[1] if i := strings.IndexByte(path, '?'); i >= 0 { diff --git a/internal/topsrv/nginx/nginx_test.go b/internal/topsrv/nginx/nginx_test.go index 8c9df03..bcb5da4 100644 --- a/internal/topsrv/nginx/nginx_test.go +++ b/internal/topsrv/nginx/nginx_test.go @@ -551,9 +551,16 @@ func TestNormalizeURI(t *testing.T) { {"GET /people/tommy-brewster-6401345/ HTTP/1.1", "/people/:slug/"}, // hex hashes in media URLs — truncated by depth {"GET /media/roles/e/ef/d763d0a80cfe86a9fcc378db4d5935bf.jpg HTTP/1.1", "/media/roles/:rest"}, + // Garbage $request with no spaces (TLS handshake hitting plain HTTP port, + // raw SSH probe, etc.) must NOT bypass normalization and pollute the uri + // label — pre-0.1.3 it was returned verbatim and blew past the 256-byte + // Prometheus label cap downstream. + {"\x06\x00\x00\x003_xgDcA\x00\x00\x00\x00\x00\x00\x00\xB0(\xC7\xEF~", invalidMarker}, + {"", invalidMarker}, + {"GET", invalidMarker}, } for _, tt := range tests { - assert.Equal(t, tt.want, normalizeURI(tt.request), tt.request) + assert.Equal(t, tt.want, normalizeURI(tt.request), "%q", tt.request) } }