Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,10 @@ Channel = "stable" # stable / beta
# UATruncate = 1024 # truncate user-agent at this length
# URITruncate = 2048 # truncate request URI at this length
# ExtraUAPatterns = ["MyCustomCrawler/"] # local additions to the bot list
# Field aliases — only set when discovery cannot infer the right name
# (e.g. operator-defined `set $custom $http_referer;`). Empty falls back.
# [BotLogs.FieldAliases]
# Referer = "ref"
```

| Parameter | Default | Description |
Expand Down Expand Up @@ -190,6 +194,7 @@ Channel = "stable" # stable / beta
| `BotLogs.UATruncate` | `1024` | Max UA length per event |
| `BotLogs.URITruncate` | `2048` | Max URI length per event |
| `BotLogs.ExtraUAPatterns` | `[]` | Local additions to known-bots UA patterns |
| `BotLogs.FieldAliases.*` | auto | Per-format field-name overrides (UserAgent/Host/ServerName/RemoteAddr/Referer). Discovery auto-detects from `log_format`; only set when operator uses non-standard variables nginx config |

### Environment variables

Expand Down Expand Up @@ -380,11 +385,19 @@ Enabling `[BotLogs]` extends nginx auto-discovery to log_formats *without*
timing histograms are simply skipped per-line for those. Installs without
`[BotLogs]` keep the original behavior.

**Required log_format variables**: `$http_user_agent`, `$server_name`,
`$remote_addr`, `$http_referer`. Any user-supplied `[Nginx].ExtraLabels`
are dropped while bot-logs is enabled (ParsedLine has only four label slots)
— this is logged on startup. Missing variables surface as empty strings;
events still ship.
**Required log_format variables**: `$http_user_agent`, `$host` /
`$server_name`, `$remote_addr`, `$http_referer`. Field names are
auto-detected from each tailed `log_format` — common alternates work
out of the box (`http_host`, `realip_remote_addr`, `http_x_real_ip`,
`http_x_forwarded_for`, `http_referrer` typo, custom JSON keys). The
resolution and its provenance (override / detected / default) are
emitted as a `botlog: resolved field aliases` log line at startup.
When discovery cannot infer the right name (e.g. operator-defined
`set $custom $http_referer;`), override via `[BotLogs.FieldAliases]`.
If a format genuinely lacks UA, a `botlog_no_ua_field` warning is
raised and `topsrv_collector_config_warnings_total{kind=...}` ticks.
Operator-supplied `[Nginx].ExtraLabels` are kept on metrics labels;
required botlog fields are unioned into `ExtractFields` for parsing.

Metrics: `topsrv_botlog_events_total{state=enqueued|sent|spooled|dropped}`,
`topsrv_botlog_match_total{family}`, `topsrv_botlog_send_errors_total{kind}`,
Expand Down
6 changes: 6 additions & 0 deletions cfg/local.toml.dist
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,9 @@ Channel = "stable" # stable / beta
# UATruncate = 1024 # truncate user-agent at this length
# URITruncate = 2048 # truncate request URI at this length
# ExtraUAPatterns = ["MyCustomCrawler/"] # local additions to the bot list
# Field aliases — override only if discovery cannot infer the right name
# (e.g. nginx `set $custom $http_referer;` puts $custom in your log_format).
# Empty entries fall back to auto-detected, then to default nginx variable.
# [BotLogs.FieldAliases]
# Referer = "ref"
# UserAgent = ""
2 changes: 1 addition & 1 deletion docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -306,4 +306,4 @@ Per-collector instrumentation. Any collector registered via `addCollector` is wr
|--------|------|--------|-------------|
| `topsrv_collector_scrape_duration_seconds` | gauge | collector | Last scrape duration. Alert: `> 5s` = monitoring is adding overhead to the target |
| `topsrv_collector_scrape_panics_total` | counter | collector | Panics recovered during Collect. Any non-zero rate = bug, page immediately |
| `topsrv_collector_config_warnings_total` | counter | kind | Operator-config warnings raised at startup. `kind` ∈ {`high_card_label` (denylisted variable in ExtraLabels), `missing_extract` (ExtraLabels references variable absent from ExtractFields → empty label values), `truncated_extract` (ExtractFields > MaxExtras=8 → tail dropped), `botlog_no_ua_field` (BotLogs enabled but no log_format carries http_user_agent)}. Any non-zero value points at a fixable config — see the matching WARN in stdout/journald for the offending names |
| `topsrv_collector_config_warnings_total` | counter | kind | Operator-config warnings raised at startup. `kind` ∈ {`high_card_label` (denylisted variable in ExtraLabels), `missing_extract` (ExtraLabels references variable absent from ExtractFields → empty label values), `truncated_extract` (ExtractFields > MaxExtras=8 → tail dropped), `botlog_no_ua_field` (BotLogs enabled but no log_format carries http_user_agent — override via `[BotLogs.FieldAliases].UserAgent`), `botlog_alias_mismatch` (tailed paths resolve to non-identical botlog aliases; first path's resolution is used)}. Any non-zero value points at a fixable config — see the matching WARN in stdout/journald for the offending names |
108 changes: 96 additions & 12 deletions internal/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ type App struct {
// Observer's positional field-index resolution.
extractFields []string

// Per-format field aliases resolved from discovered log_format strings.
// registerLogCollector picks one canonical set (warning on mismatch) and
// registerBotLogs hands it to NewObserver.
botlogAliases botlog.FieldAliases

// Tracks pusher/log collector/smart/updater so Shutdown can wait for
// their drain paths before the process exits.
bg sync.WaitGroup
Expand Down Expand Up @@ -132,7 +137,7 @@ func New(appName, version string, logger embedlog.Logger, cfg Config) *App {
}, []string{"collector"}),
configWarnings: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "topsrv_collector_config_warnings_total",
Help: "Operator-config warnings raised at startup. kind=high_card_label|missing_extract|truncated_extract|botlog_no_ua_field.",
Help: "Operator-config warnings raised at startup. kind=high_card_label|missing_extract|truncated_extract|botlog_no_ua_field|botlog_alias_mismatch.",
}, []string{"kind"}),
}
a.registry.MustRegister(a.scrapeDuration, a.scrapePanics, a.configWarnings)
Expand Down Expand Up @@ -356,10 +361,11 @@ func (a *App) registerLogCollector(ctx context.Context, cfg nginx.LogConfig) {

cfg.ExtractFields = cfg.ExtraLabels
if a.cfg.BotLogs != nil && a.cfg.BotLogs.Enabled {
cfg.ExtractFields = mergeUnique(cfg.ExtraLabels, botlog.RequiredFields())
if !logHasUserAgent(cfg) {
a.botlogAliases = a.resolveBotlogAliases(ctx, cfg)
cfg.ExtractFields = mergeUnique(cfg.ExtraLabels, botlog.RequiredFields(a.botlogAliases))
if a.botlogAliases.UserAgent == "" {
a.warnConfig(ctx, "botlog_no_ua_field",
"BotLogs enabled but no tailed log_format contains http_user_agent; events will never match — check nginx/angie log_format directives",
"BotLogs enabled but no tailed log_format contains http_user_agent; events will never match — check nginx/angie log_format directives, or set [BotLogs.FieldAliases].UserAgent to the custom field name",
"log_paths", cfg.LogPaths)
}
}
Expand Down Expand Up @@ -417,16 +423,94 @@ func truncateList(s []string, n int) []string {
return append(append([]string(nil), s[:n]...), "…+more")
}

func logHasUserAgent(cfg nginx.LogConfig) bool {
if strings.Contains(cfg.LogFormat, "http_user_agent") {
return true
// resolveBotlogAliases inspects every log_format paired with cfg's LogPaths
// (default plus per-path overrides) and returns one canonical alias set for
// the Observer. The TOML override from BotLogs.FieldAliases wins, then the
// detected aliases, with DefaultAliases as the last-resort layer.
//
// Multi-format setups: if paths resolve to non-identical alias sets we keep
// the first path's resolution and emit a config warning — the v2 plan is
// per-path Observer wiring once an operator hits this in production.
func (a *App) resolveBotlogAliases(ctx context.Context, cfg nginx.LogConfig) botlog.FieldAliases {
override := botlog.FieldAliases{}
if a.cfg.BotLogs != nil {
override = a.cfg.BotLogs.FieldAliases
}

formatFor := func(p string) (string, bool) {
if f, ok := cfg.LogFormats[p]; ok {
return f, cfg.JSONPaths[p]
}
return cfg.LogFormat, false
}
for _, f := range cfg.LogFormats {
if strings.Contains(f, "http_user_agent") {
return true

// Memoise per (format, isJSON) — N paths sharing one log_format incur
// one DetectAliases call instead of N.
type formatKey struct {
format string
isJSON bool
}
detectCache := make(map[formatKey]botlog.FieldAliases)

var canonical, canonicalDetected botlog.FieldAliases
var canonicalPath string
mismatched := make([]string, 0)
for _, p := range cfg.LogPaths {
format, isJSON := formatFor(p)
key := formatKey{format, isJSON}
detected, ok := detectCache[key]
if !ok {
detected = botlog.DetectAliases(format, isJSON)
detectCache[key] = detected
}
resolved := override.WithFallback(detected).WithFallback(botlog.DefaultAliases())
if canonicalPath == "" {
canonical = resolved
canonicalDetected = detected
canonicalPath = p
continue
}
if resolved != canonical {
mismatched = append(mismatched, p)
}
}
if len(mismatched) > 0 {
a.warnConfig(ctx, "botlog_alias_mismatch",
"log_formats across tailed paths resolve to different botlog aliases; using first path's resolution",
"canonical_path", canonicalPath, "mismatched_paths", truncateList(mismatched, 8))
}
if canonicalPath != "" {
a.Print(ctx, "botlog: resolved field aliases",
"aliases", canonical.String(),
"sources", aliasSources(override, canonicalDetected),
"canonical_path", canonicalPath)
}
return canonical
}

// aliasSources renders per-field provenance: "override" if the operator set
// the alias via [BotLogs.FieldAliases], "detected" if it came from
// log_format inspection, "default" if neither matched and DefaultAliases
// supplied the value. Mirrors FieldAliases.String() shape so log readers can
// align the two lines.
func aliasSources(override, detected botlog.FieldAliases) string {
src := func(o, d string) string {
switch {
case o != "":
return "override"
case d != "":
return "detected"
default:
return "default"
}
}
return false
return fmt.Sprintf("ua=%s host=%s server=%s remote=%s referer=%s",
src(override.UserAgent, detected.UserAgent),
src(override.Host, detected.Host),
src(override.ServerName, detected.ServerName),
src(override.RemoteAddr, detected.RemoteAddr),
src(override.Referer, detected.Referer),
)
}

// highCardLabels returns the intersection of labels with the denylist.
Expand Down Expand Up @@ -516,7 +600,7 @@ func (a *App) registerBotLogs(ctx context.Context) {
return
}
bp := botlog.NewPusher(a.Logger, a.appName, a.version, *a.cfg.BotLogs, a.registry)
obs := botlog.NewObserver(bp, *a.cfg.BotLogs, a.hostname, a.extractFields)
obs := botlog.NewObserver(bp, *a.cfg.BotLogs, a.hostname, a.extractFields, a.botlogAliases)
a.logCollector.AddObserver(obs)
a.goBackground(func() { bp.Run(ctx) })
a.Print(ctx, "botlog: observer attached", "endpoint", a.cfg.BotLogs.Endpoint, "spool", a.cfg.BotLogs.SpoolDir)
Expand Down
88 changes: 66 additions & 22 deletions internal/app/app_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package app

import (
"context"
"testing"

"github.com/vmkteam/topsrv/internal/topsrv/botlog"
"github.com/vmkteam/topsrv/internal/topsrv/nginx"

"github.com/prometheus/client_golang/prometheus"
Expand Down Expand Up @@ -59,30 +61,72 @@ func TestInstrumentedCollectorRecoversPanic(t *testing.T) {
assert.InDelta(t, 1.0, counterValue(t, panics), 1e-9, "panics counter must be incremented once")
}

func TestLogHasUserAgent(t *testing.T) {
cases := []struct {
name string
cfg nginx.LogConfig
want bool
}{
{"empty", nginx.LogConfig{}, false},
{"single format with UA", nginx.LogConfig{LogFormat: "$remote_addr - $http_user_agent"}, true},
{"single format without UA", nginx.LogConfig{LogFormat: "$remote_addr $request_time"}, false},
{"map with UA", nginx.LogConfig{LogFormats: map[string]string{"/a": "$http_user_agent"}}, true},
{"map without UA", nginx.LogConfig{LogFormats: map[string]string{"/a": "$remote_addr"}}, false},
{"any-of map has UA", nginx.LogConfig{LogFormats: map[string]string{
"/a": "$remote_addr",
"/b": "$http_user_agent",
}}, true},
{"json with UA", nginx.LogConfig{LogFormats: map[string]string{
"/a": `{"ua":"$http_user_agent"}`,
}}, true},
// newAliasTestApp builds the minimal App required to exercise
// resolveBotlogAliases — Logger for warnConfig, configWarnings counter for
// the warning side-effect, and a BotLogs config carrying the override.
func newAliasTestApp(t *testing.T, override botlog.FieldAliases) *App {
t.Helper()
a := &App{
cfg: Config{BotLogs: &botlog.Config{FieldAliases: override}},
configWarnings: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "test_config_warnings_total", Help: "test",
}, []string{"kind"}),
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
assert.Equal(t, tc.want, logHasUserAgent(tc.cfg))
})
return a
}

func TestResolveBotlogAliases_SinglePathStandardFormat(t *testing.T) {
a := newAliasTestApp(t, botlog.FieldAliases{})
cfg := nginx.LogConfig{
LogPaths: []string{"/var/log/nginx/a.log"},
LogFormat: `$remote_addr [$time_local] "$request" $status "$http_referer" "$http_user_agent"`,
}
got := a.resolveBotlogAliases(context.Background(), cfg)
assert.Equal(t, "http_user_agent", got.UserAgent)
assert.Equal(t, "http_referer", got.Referer)
assert.Equal(t, "remote_addr", got.RemoteAddr)
}

func TestResolveBotlogAliases_OverrideWins(t *testing.T) {
a := newAliasTestApp(t, botlog.FieldAliases{Referer: "ref"})
cfg := nginx.LogConfig{
LogPaths: []string{"/var/log/nginx/a.log"},
LogFormat: `$remote_addr "$http_referer" "$http_user_agent"`,
}
got := a.resolveBotlogAliases(context.Background(), cfg)
assert.Equal(t, "ref", got.Referer, "explicit override beats auto-detected")
assert.Equal(t, "http_user_agent", got.UserAgent, "non-overridden fields keep detection")
}

func TestResolveBotlogAliases_MismatchEmitsWarning(t *testing.T) {
a := newAliasTestApp(t, botlog.FieldAliases{})
// Two paths, different formats with diverging detected aliases:
// path A uses $http_referer, path B uses $http_referrer.
cfg := nginx.LogConfig{
LogPaths: []string{"/var/log/a.log", "/var/log/b.log"},
LogFormats: map[string]string{
"/var/log/a.log": `$remote_addr "$http_referer" "$http_user_agent"`,
"/var/log/b.log": `$remote_addr "$http_referrer" "$http_user_agent"`,
},
}
got := a.resolveBotlogAliases(context.Background(), cfg)
assert.Equal(t, "http_referer", got.Referer, "first path's resolution wins")
assert.InDelta(t, 1.0, counterValue(t, a.configWarnings.WithLabelValues("botlog_alias_mismatch")), 1e-9)
}

func TestResolveBotlogAliases_AllPathsAgreeNoWarning(t *testing.T) {
a := newAliasTestApp(t, botlog.FieldAliases{})
// Two paths sharing the same format → no mismatch warning, no DetectAliases
// re-run (memoised by (format,isJSON)).
cfg := nginx.LogConfig{
LogPaths: []string{"/var/log/a.log", "/var/log/b.log"},
LogFormats: map[string]string{
"/var/log/a.log": `$remote_addr "$http_referer" "$http_user_agent"`,
"/var/log/b.log": `$remote_addr "$http_referer" "$http_user_agent"`,
},
}
a.resolveBotlogAliases(context.Background(), cfg)
assert.InDelta(t, 0.0, counterValue(t, a.configWarnings.WithLabelValues("botlog_alias_mismatch")), 1e-9)
}

// TestInstrumentedCollectorNormalCallNoPanicCount verifies the counter stays zero
Expand Down
Loading
Loading