diff --git a/internal/engine/engine.go b/internal/engine/engine.go index 64dc8b20a..7a127fccd 100644 --- a/internal/engine/engine.go +++ b/internal/engine/engine.go @@ -1229,9 +1229,15 @@ Docs: https://stockyard.dev/docs // Gzip compression (outermost — compresses all HTML, JSON, text responses) srv.WrapHandler(gzipMiddleware) - // Panic recovery (outermost — catches panics from any handler, returns clean 500) + // Panic recovery (catches panics from any handler, returns clean 500) srv.WrapHandler(recoveryMiddleware) + // Scanner block (outermost — runs first on every request). + // Short-circuits known credential/exploit scanner paths (.env, phpinfo, + // wp-admin, etc.) with a 403 before any handler, template, or DB work. + // See internal/engine/scanner_block.go for pattern list and rationale. + srv.WrapHandler(scannerBlockMiddleware) + // Register marketing website (/, /docs/, /pricing/, etc.) site.Register(srv.Mux(), db.Conn()) diff --git a/internal/engine/scanner_block.go b/internal/engine/scanner_block.go new file mode 100644 index 000000000..d4c60d1bf --- /dev/null +++ b/internal/engine/scanner_block.go @@ -0,0 +1,40 @@ +package engine + +import ( + "net/http" + "strings" +) + +// scannerBlockPatterns are URL path substrings that only credential/exploit +// scanners ever request. The marketing site has no real route that matches +// any of these, so blocking is safe and stops bots from probing for leaked +// .env files, phpinfo dumps, WordPress installs, etc. +// +// Observed in the wild against stockyard.dev (Cloudflare analytics, 2026-04): +// ~60 unique scanner paths/day driving ~75% of all 404s. None hit real routes. +var scannerBlockPatterns = []string{ + ".env", // catches /.env, /.env.bak, /.env.production, /backend/.env, etc. + "phpinfo", // catches /phpinfo.php, /old_phpinfo.php, /hosting/phpinfo.php, etc. + "wp-admin", // catches /wp-admin/install.php and any wp-admin probe + "wp-login", // catches /wp-login.php + "/debug.php", // exact-match scanner path + "/php.php", // exact-match scanner path +} + +// scannerBlockMiddleware short-circuits requests for known credential/exploit +// scanner paths with a 403, before any handler, template, or DB work runs. +// +// Wrap this OUTERMOST (last WrapHandler call) so it runs first on every +// request. Cost per blocked request: a handful of strings.Contains calls. +func scannerBlockMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + for _, pat := range scannerBlockPatterns { + if strings.Contains(path, pat) { + http.Error(w, "Forbidden", http.StatusForbidden) + return + } + } + next.ServeHTTP(w, r) + }) +} diff --git a/internal/engine/scanner_block_test.go b/internal/engine/scanner_block_test.go new file mode 100644 index 000000000..9957442fa --- /dev/null +++ b/internal/engine/scanner_block_test.go @@ -0,0 +1,67 @@ +package engine + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func TestScannerBlockMiddleware(t *testing.T) { + // Inner handler always returns 200 — middleware should short-circuit + // before this runs for blocked paths. + inner := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ok")) + }) + h := scannerBlockMiddleware(inner) + + tests := []struct { + name string + path string + wantCode int + }{ + // Should be blocked (real scanner paths observed in production) + {"dotenv root", "/.env", http.StatusForbidden}, + {"dotenv backup", "/.env.bak", http.StatusForbidden}, + {"dotenv production", "/.env.production", http.StatusForbidden}, + {"dotenv test", "/.env.test", http.StatusForbidden}, + {"dotenv nested backend", "/backend/.env", http.StatusForbidden}, + {"dotenv nested storage", "/storage/.env", http.StatusForbidden}, + {"phpinfo plain", "/phpinfo.php", http.StatusForbidden}, + {"phpinfo old", "/old_phpinfo.php", http.StatusForbidden}, + {"phpinfo nested", "/hosting/phpinfo.php", http.StatusForbidden}, + {"wp-admin install", "/wp-admin/install.php", http.StatusForbidden}, + {"wp-login", "/wp-login.php", http.StatusForbidden}, + {"debug.php", "/debug.php", http.StatusForbidden}, + {"php.php", "/php.php", http.StatusForbidden}, + + // Should pass through (real site paths must NEVER be blocked) + {"homepage", "/", http.StatusOK}, + {"pricing", "/pricing/", http.StatusOK}, + {"desktop", "/desktop/", http.StatusOK}, + {"installer macos", "/api/installer/macos", http.StatusOK}, + {"installer linux", "/api/installer/linux", http.StatusOK}, + {"installer windows", "/api/installer/windows", http.StatusOK}, + {"recommend", "/api/recommend", http.StatusOK}, + {"toolkit count", "/api/toolkit-count", http.StatusOK}, + {"robots", "/robots.txt", http.StatusOK}, + {"sitemap", "/sitemap.xml", http.StatusOK}, + {"favicon", "/favicon.ico", http.StatusOK}, + {"site asset svg", "/site-assets/assets/screenshots/tool-assay.svg", http.StatusOK}, + {"for solo developers", "/for/solo-developers/", http.StatusOK}, + {"tools listing", "/tools/", http.StatusOK}, + {"playground", "/playground", http.StatusOK}, + {"v1 chat completions", "/v1/chat/completions", http.StatusOK}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, tt.path, nil) + w := httptest.NewRecorder() + h.ServeHTTP(w, req) + if w.Code != tt.wantCode { + t.Errorf("path %s: got status %d, want %d", tt.path, w.Code, tt.wantCode) + } + }) + } +}