From bed723457b47a37af9974f79d7ae7e12f8b33be1 Mon Sep 17 00:00:00 2001 From: Hugo Haas Date: Sun, 7 Jun 2026 21:52:13 -0500 Subject: [PATCH 01/12] fix: increased logging for failed checks & down action --- internal/cmd.go | 8 +++- internal/conf.go | 32 +++++++++++--- internal/logger/logger.go | 12 +++++ internal/logic/downaction.go | 76 ++++++++++++++++++++++++++------ internal/logic/loop.go | 37 ++++++++++++---- internal/status/changetracker.go | 4 +- internal/status/stat-server.go | 38 +++++++++++++--- 7 files changed, 168 insertions(+), 39 deletions(-) diff --git a/internal/cmd.go b/internal/cmd.go index a3a7f95..2544d08 100644 --- a/internal/cmd.go +++ b/internal/cmd.go @@ -97,7 +97,7 @@ func Run(appCtx context.Context, cmd *cli.Command) error { select { case <-rootCtx.Done(): - logger.L.Info("shutting down", "component", "app") + logger.L.Info("shutting down", logger.LogComponent, logger.LogComponentApp) cancelCurrentWorker() <-done @@ -112,7 +112,11 @@ func Run(appCtx context.Context, cmd *cli.Command) error { <-done case <-sighupCh: - logger.L.Info("SIGHUP received: reloading configuration", "component", "app") + logger.L.Info( + "SIGHUP received: reloading configuration", + logger.LogComponent, + logger.LogComponentApp, + ) cancelCurrentWorker() <-done } diff --git a/internal/conf.go b/internal/conf.go index bb4b3e0..b78d131 100644 --- a/internal/conf.go +++ b/internal/conf.go @@ -120,7 +120,7 @@ type Configuration struct { } func configError(msg string, path string, err error) (*Configuration, error) { - logger.L.Error(msg, "component", "config", "file", path, "error", err) + logger.L.Error(msg, logger.LogComponent, logger.LogComponentConfig, "file", path, "error", err) return nil, fmt.Errorf("%s: %w", msg, err) } @@ -147,7 +147,13 @@ func ReadConf(cfgFile string) (*Configuration, error) { return configError("Could not read config", absPath, err) } - logger.L.Debug("config file used", "component", "config", "file", absPath) + logger.L.Debug( + "config file used", + logger.LogComponent, + logger.LogComponentConfig, + "file", + absPath, + ) content, err = expandEnvVars(content) if err != nil { @@ -215,7 +221,7 @@ func (c Configuration) GetChecksCat(category []string) []*check.Check { parsedURL, err := url.Parse(checkStr) if err != nil { logger.L.Error("could not parse check in config", - "component", "config", "check", checkStr, "error", err) + logger.LogComponent, logger.LogComponentConfig, "check", checkStr, "error", err) continue } @@ -226,14 +232,20 @@ func (c Configuration) GetChecksCat(category []string) []*check.Check { case check.DNS: domain := strings.TrimPrefix(parsedURL.Path, "/") if domain == "" { - logger.L.Error("DNS check missing domain", "component", "config", "check", checkStr) + logger.L.Error( + "DNS check missing domain", + logger.LogComponent, + logger.LogComponentConfig, + "check", + checkStr, + ) continue } if parsedURL.Host == "" { logger.L.Error("DNS check missing resolver host", - "component", "config", "check", checkStr) + logger.LogComponent, logger.LogComponentConfig, "check", checkStr) continue } @@ -252,7 +264,7 @@ func (c Configuration) GetChecksCat(category []string) []*check.Check { probe = check.NewTCPProbe(hostPort) default: logger.L.Error("unknown protocol in config", - "component", "config", "check", checkStr, + logger.LogComponent, logger.LogComponentConfig, "check", checkStr, "protocol", parsedURL.Scheme) continue @@ -323,7 +335,13 @@ func (c Configuration) logSetup() { case logLevelWarn: level = slog.LevelWarn default: - logger.L.Error("unknown loglevel", "component", "config", "loglevel", c.LogLevel) + logger.L.Error( + "unknown loglevel", + logger.LogComponent, + logger.LogComponentConfig, + "loglevel", + c.LogLevel, + ) return } diff --git a/internal/logger/logger.go b/internal/logger/logger.go index 82f0ff5..9b3be08 100644 --- a/internal/logger/logger.go +++ b/internal/logger/logger.go @@ -6,6 +6,18 @@ import ( "os" ) +// LogComponent is the structured log key used for component identification. +const LogComponent = "component" + +// Component values used across the codebase for log attribution. +const ( + LogComponentCheck = "check" + LogComponentDownAction = "downaction" + LogComponentStats = "stats" + LogComponentConfig = "config" + LogComponentApp = "app" +) + // L is the global logger instance for the application. // //nolint:gochecknoglobals,varnamelen // Standard pattern for application-wide logger diff --git a/internal/logic/downaction.go b/internal/logic/downaction.go index 86894d8..c48895b 100644 --- a/internal/logic/downaction.go +++ b/internal/logic/downaction.go @@ -99,11 +99,19 @@ func (dal *DownActionLoop) Execute(ctx context.Context, execString string) error cmdEnv := os.Environ() cmdEnv = append(cmdEnv, fmt.Sprintf("UPD_ITERATION=%d", iteration)) cmd.Env = cmdEnv - logger.L.Info("executing command", "component", "downaction", "exec", cmd.String()) + logger.L.Info( + "executing command", + logger.LogComponent, + logger.LogComponentDownAction, + "exec", + cmd.String(), + "iteration", + iteration, + ) if err = cmd.Start(); err != nil { logger.L.Error("failed to run", - "component", "downaction", "exec", cmd.String(), "error", err) + logger.LogComponent, logger.LogComponentDownAction, "exec", cmd.String(), "error", err) return fmt.Errorf("failed to execute DownAction: %w", err) } @@ -122,9 +130,17 @@ func (dal *DownActionLoop) Execute(ctx context.Context, execString string) error dal.cmdMu.Unlock() if waitErr != nil { - logger.L.Warn("error executing command", - "component", "downaction", "exec", cmd.String(), "error", waitErr, - "stderr", string(bytes.TrimSpace(stderrBuf.Bytes()))) + logger.L.Warn( + "error executing command", + logger.LogComponent, + logger.LogComponentDownAction, + "exec", + cmd.String(), + "error", + waitErr, + "stderr", + string(bytes.TrimSpace(stderrBuf.Bytes())), + ) } }() @@ -155,7 +171,7 @@ func (da *DownAction) NewDownActionLoop(ctx context.Context) (*DownActionLoop, c func (da *DownAction) Start(ctx context.Context) *DownActionLoop { dal, ctx := da.NewDownActionLoop(ctx) - logger.L.Debug("kicking off run loop", "component", "downaction") + logger.L.Debug("kicking off run loop", logger.LogComponent, logger.LogComponentDownAction) go dal.run(ctx) @@ -171,11 +187,17 @@ func (dal *DownActionLoop) Stop(_ context.Context) { //nolint:contextcheck // loopCtx derived from the same hierarchy as ctx err := dal.Execute(dal.loopCtx, dal.da.StopExec) if err != nil && !errors.Is(err, ErrNoCommand) { - logger.L.Warn("failed to execute stop command", "component", "downaction", "error", err) + logger.L.Warn( + "failed to execute stop command", + logger.LogComponent, + logger.LogComponentDownAction, + "error", + err, + ) } } - logger.L.Debug("sending shutdown signal", "component", "downaction") + logger.L.Debug("sending shutdown signal", logger.LogComponent, logger.LogComponentDownAction) dal.cancelFunc() } @@ -189,11 +211,17 @@ func (dal *DownActionLoop) killCurrentCmd() { } logger.L.Warn("killing current command", - "component", "downaction", "pid", dal.currentCmd.Process.Pid) + logger.LogComponent, logger.LogComponentDownAction, "pid", dal.currentCmd.Process.Pid) if dal.currentCmd.Process != nil { if err := dal.currentCmd.Process.Kill(); err != nil { - logger.L.Warn("failed to kill current command", "component", "downaction", "error", err) + logger.L.Warn( + "failed to kill current command", + logger.LogComponent, + logger.LogComponentDownAction, + "error", + err, + ) } } @@ -221,12 +249,14 @@ func (dal *DownActionLoop) iterate() { } logger.L.Debug("iteration details", - "component", "downaction", "iteration", dal.it.iteration, + logger.LogComponent, logger.LogComponentDownAction, "iteration", dal.it.iteration, "sleepTime", dal.it.sleepTime, "limitReached", dal.it.limitReached) } func (dal *DownActionLoop) run(ctx context.Context) { + logger.L.Debug("down action loop started", + logger.LogComponent, logger.LogComponentDownAction) dal.iterate() for { @@ -234,12 +264,19 @@ func (dal *DownActionLoop) run(ctx context.Context) { sleepTime := dal.it.sleepTime dal.mu.RUnlock() + logger.L.Debug( + "sleeping", + logger.LogComponent, + logger.LogComponentDownAction, + "duration", + sleepTime, + ) timer := time.NewTimer(sleepTime) select { case <-ctx.Done(): timer.Stop() - logger.L.Debug("canceled", "component", "downaction") + logger.L.Debug("canceled", logger.LogComponent, logger.LogComponentDownAction) return case <-timer.C: @@ -250,12 +287,25 @@ func (dal *DownActionLoop) run(ctx context.Context) { err := dal.Execute(ctx, dal.da.Exec) if err != nil { - logger.L.Error("failed to execute", "component", "downaction", "error", err) + logger.L.Error( + "failed to execute", + logger.LogComponent, + logger.LogComponentDownAction, + "iteration", + dal.it.iteration, + "error", + err, + ) + } else { + logger.L.Debug("command succeeded", + logger.LogComponent, logger.LogComponentDownAction) } if dal.da.Every > 0 { dal.iterate() } else { + logger.L.Debug("down action loop complete", + logger.LogComponent, logger.LogComponentDownAction) break } } diff --git a/internal/logic/loop.go b/internal/logic/loop.go index fc7be20..bee5de7 100644 --- a/internal/logic/loop.go +++ b/internal/logic/loop.go @@ -98,6 +98,7 @@ type Loop struct { downActionLoop *DownActionLoop statServer *status.StatServer status *status.Status + lastSuccess time.Time } // NewLoop creates a new monitoring loop. @@ -152,7 +153,7 @@ func (l *Loop) ProcessCheck(ctx context.Context, upStatus bool) { return } - logger.L.Info("connection status changed", "component", "loop", "up", l.status.Up) + logger.L.Info("connection status changed", logger.LogComponent, "loop", "up", l.status.Up) if !l.hasDownAction() { return @@ -163,7 +164,7 @@ func (l *Loop) ProcessCheck(ctx context.Context, upStatus bool) { } else { err := l.DownActionStart(ctx) if err != nil { - logger.L.Error("could not start DownAction", "component", "loop", "error", err) + logger.L.Error("could not start DownAction", logger.LogComponent, "loop", "error", err) } } } @@ -179,19 +180,35 @@ func (l *Loop) Run(ctx context.Context, statServerConfig *status.StatServerConfi for { checkStatus, err := check.CheckerRun(ctx, checker, l.checkList.GetIterator()) if err == nil { + l.lastSuccess = time.Now() l.ProcessCheck(ctx, checkStatus) } else { - logger.L.Error("loop error", "component", "loop", "error", err) + attrs := []any{logger.LogComponent, "loop", "error", err} + if !l.lastSuccess.IsZero() { + attrs = append(attrs, "sinceLastSuccess", time.Since(l.lastSuccess)) + } + + logger.L.Error("loop error", attrs...) } sleepTime := l.delays[l.status.Up] - logger.L.Debug("waiting for next loop iteration", "component", "loop", "wait", sleepTime) + logger.L.Debug( + "waiting for next loop iteration", + logger.LogComponent, + "loop", + "wait", + sleepTime, + ) timer := time.NewTimer(sleepTime) select { case <-ctx.Done(): timer.Stop() - logger.L.Debug("context canceled during sleep, exiting Run()", "component", "loop") + logger.L.Debug( + "context canceled during sleep, exiting Run()", + logger.LogComponent, + "loop", + ) return case <-timer.C: @@ -221,7 +238,7 @@ type Checker struct{} // CheckRun logs the start of a check. func (Checker) CheckRun(chk check.Check) { logger.L.Debug("running", - "component", "check", + logger.LogComponent, logger.LogComponentCheck, "probe", chk.Probe, "protocol", @@ -233,10 +250,14 @@ func (Checker) CheckRun(chk check.Check) { // ProbeSuccess logs successful probe results. func (Checker) ProbeSuccess(report *check.Report) { - logger.L.Debug("success", append([]any{"component", "check"}, report.LogAttrs()...)...) + logger.L.Debug( + "success", + append([]any{logger.LogComponent, logger.LogComponentCheck}, report.LogAttrs()...)...) } // ProbeFailure logs failed probe results. func (Checker) ProbeFailure(report *check.Report) { - logger.L.Warn("failed", append([]any{"component", "check"}, report.LogAttrs()...)...) + logger.L.Warn( + "failed", + append([]any{logger.LogComponent, logger.LogComponentCheck}, report.LogAttrs()...)...) } diff --git a/internal/status/changetracker.go b/internal/status/changetracker.go index 848d920..2e1729b 100644 --- a/internal/status/changetracker.go +++ b/internal/status/changetracker.go @@ -130,8 +130,8 @@ func (tracker *StateChangeTracker) GenReports(currentState bool, end time.Time, if err != nil { logger.L.Debug( "invalid range for stat report", - "component", - "stats", + logger.LogComponent, + logger.LogComponentStats, "error", err, "period", diff --git a/internal/status/stat-server.go b/internal/status/stat-server.go index 3e21297..7171a50 100644 --- a/internal/status/stat-server.go +++ b/internal/status/stat-server.go @@ -43,7 +43,7 @@ type StatServer struct { // StartStatServer starts a new statistics server in a goroutine. func StartStatServer(status *Status, config *StatServerConfig) *StatServer { if config.Port == 0 { - logger.L.Debug("no stat server specified", "component", "stats") + logger.L.Debug("no stat server specified", logger.LogComponent, logger.LogComponentStats) return nil } @@ -74,21 +74,33 @@ func (s *StatServer) Shutdown(ctx context.Context) { return } - logger.L.Info("shutting down stats server", "component", "stats") + logger.L.Info("shutting down stats server", logger.LogComponent, logger.LogComponentStats) if err := s.server.Shutdown(ctx); err != nil { - logger.L.Error("error shutting down stats server", "component", "stats", "error", err) + logger.L.Error( + "error shutting down stats server", + logger.LogComponent, + logger.LogComponentStats, + "error", + err, + ) } } func (s *StatServer) listenAndServe() { logger.L.Info("server started", - "component", "stats", + logger.LogComponent, logger.LogComponentStats, "statserver", fmt.Sprintf("http://localhost%s%s", s.server.Addr, StatRoute), ) if err := s.server.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) { - logger.L.Error("error starting stats server", "component", "stats", "error", err) + logger.L.Error( + "error starting stats server", + logger.LogComponent, + logger.LogComponentStats, + "error", + err, + ) } } @@ -114,7 +126,13 @@ func (h *StatHandler) ServeHTTP(writer http.ResponseWriter, req *http.Request) { return } - logger.L.Debug("requested", "component", "stats", "requester", req.RemoteAddr) + logger.L.Debug( + "requested", + logger.LogComponent, + logger.LogComponentStats, + "requester", + req.RemoteAddr, + ) writeJSON(writer, h.GenStatReport()) } @@ -126,7 +144,13 @@ func writeJSON(w http.ResponseWriter, data any) { enc.SetIndent("", JSONIndentSpaces) if err := enc.Encode(data); err != nil { - logger.L.Error("error writing JSON response", "component", "stats", "error", err) + logger.L.Error( + "error writing JSON response", + logger.LogComponent, + logger.LogComponentStats, + "error", + err, + ) } } From 892e3792aa37fc7f2895d6052b9b1396945a5ebd Mon Sep 17 00:00:00 2001 From: Hugo Haas Date: Sun, 7 Jun 2026 22:03:44 -0500 Subject: [PATCH 02/12] refactor(downaction): simplified loop --- internal/logic/downaction.go | 78 ++++++++++++------------------- internal/logic/downaction_test.go | 34 +++++++------- 2 files changed, 47 insertions(+), 65 deletions(-) diff --git a/internal/logic/downaction.go b/internal/logic/downaction.go index c48895b..bf732e8 100644 --- a/internal/logic/downaction.go +++ b/internal/logic/downaction.go @@ -9,6 +9,7 @@ import ( "os" "os/exec" "sync" + "sync/atomic" "time" "github.com/google/shlex" @@ -24,23 +25,17 @@ type DownAction struct { StopExec string } -// DownActionIteration tracks the current iteration state for exponential backoff. -type DownActionIteration struct { - iteration int - sleepTime time.Duration - limitReached bool -} - // DownActionLoop manages execution of down action commands. type DownActionLoop struct { da *DownAction - it *DownActionIteration cancelFunc context.CancelFunc //nolint:containedctx // loopCtx stored to bind StopExec commands on loop exit - loopCtx context.Context - mu sync.RWMutex - currentCmd *exec.Cmd - cmdMu sync.Mutex + loopCtx context.Context + iteration atomic.Int64 + sleepTime time.Duration + limitReached bool + currentCmd *exec.Cmd + cmdMu sync.Mutex } // BackoffFactor is the multiplier for exponential backoff. @@ -85,9 +80,7 @@ func (dal *DownActionLoop) Execute(ctx context.Context, execString string) error return fmt.Errorf("invalid command: %w", err) } - dal.mu.RLock() - iteration := dal.it.iteration - dal.mu.RUnlock() + iteration := dal.iteration.Load() // #nosec G204 // Command is validated by shlex.Split() and validateCommand() before execution cmd := exec.CommandContext(ctx, command[0], command[1:]...) @@ -147,21 +140,14 @@ func (dal *DownActionLoop) Execute(ctx context.Context, execString string) error return nil } -// NewDownActionIteration creates a new iteration tracker. -func NewDownActionIteration() *DownActionIteration { - return &DownActionIteration{ - iteration: -1, - } -} - // NewDownActionLoop creates a new loop context for the down action. func (da *DownAction) NewDownActionLoop(ctx context.Context) (*DownActionLoop, context.Context) { ctx, cancelFunc := context.WithCancel(ctx) dal := &DownActionLoop{ da: da, - it: NewDownActionIteration(), cancelFunc: cancelFunc, loopCtx: ctx, + sleepTime: da.After, } return dal, ctx @@ -228,50 +214,45 @@ func (dal *DownActionLoop) killCurrentCmd() { dal.currentCmd = nil } -func (dal *DownActionLoop) iterate() { - dal.mu.Lock() - defer dal.mu.Unlock() +func (dal *DownActionLoop) nextSleep() time.Duration { + dal.iteration.Add(1) - dal.it.iteration++ - switch dal.it.iteration { - case 0: - dal.it.sleepTime = dal.da.After + switch dal.iteration.Load() { case 1: - dal.it.sleepTime = dal.da.Every + dal.sleepTime = dal.da.Every default: - if !dal.it.limitReached { - dal.it.sleepTime = time.Duration(BackoffFactor * float64(dal.it.sleepTime)) - if dal.da.BackoffLimit != 0 && dal.it.sleepTime >= dal.da.BackoffLimit { - dal.it.sleepTime = dal.da.BackoffLimit - dal.it.limitReached = true + if !dal.limitReached { + next := time.Duration(BackoffFactor * float64(dal.sleepTime)) + if dal.da.BackoffLimit != 0 && next >= dal.da.BackoffLimit { + next = dal.da.BackoffLimit + dal.limitReached = true } + + dal.sleepTime = next } } logger.L.Debug("iteration details", - logger.LogComponent, logger.LogComponentDownAction, "iteration", dal.it.iteration, - "sleepTime", dal.it.sleepTime, - "limitReached", dal.it.limitReached) + logger.LogComponent, logger.LogComponentDownAction, "iteration", dal.iteration.Load(), + "sleepTime", dal.sleepTime, + "limitReached", dal.limitReached) + + return dal.sleepTime } func (dal *DownActionLoop) run(ctx context.Context) { logger.L.Debug("down action loop started", logger.LogComponent, logger.LogComponentDownAction) - dal.iterate() for { - dal.mu.RLock() - sleepTime := dal.it.sleepTime - dal.mu.RUnlock() - logger.L.Debug( "sleeping", logger.LogComponent, logger.LogComponentDownAction, "duration", - sleepTime, + dal.sleepTime, ) - timer := time.NewTimer(sleepTime) + timer := time.NewTimer(dal.sleepTime) select { case <-ctx.Done(): @@ -292,7 +273,7 @@ func (dal *DownActionLoop) run(ctx context.Context) { logger.LogComponent, logger.LogComponentDownAction, "iteration", - dal.it.iteration, + dal.iteration.Load(), "error", err, ) @@ -302,10 +283,11 @@ func (dal *DownActionLoop) run(ctx context.Context) { } if dal.da.Every > 0 { - dal.iterate() + dal.sleepTime = dal.nextSleep() } else { logger.L.Debug("down action loop complete", logger.LogComponent, logger.LogComponentDownAction) + break } } diff --git a/internal/logic/downaction_test.go b/internal/logic/downaction_test.go index 758ad17..4048b4a 100644 --- a/internal/logic/downaction_test.go +++ b/internal/logic/downaction_test.go @@ -184,28 +184,28 @@ func testBackoff(t *testing.T, hasLimit bool) { assert.InEpsilon(t, 1.5, BackoffFactor, 0.01, "Ensuring we have the right values") dal, _ := da.NewDownActionLoop(context.Background()) - assert.Equal(t, DownActionIteration{iteration: -1, sleepTime: 0}, *dal.it) - dal.iterate() - assert.Equal(t, DownActionIteration{iteration: 0, sleepTime: da.After}, *dal.it) - dal.iterate() - assert.Equal(t, DownActionIteration{iteration: 1, sleepTime: da.Every}, *dal.it) - dal.iterate() + assert.Equal(t, int64(0), dal.iteration.Load()) + assert.Equal(t, da.After, dal.sleepTime) + assert.False(t, dal.limitReached) - current := time.Duration(1.5 * float64(time.Second)) - assert.Equal(t, DownActionIteration{iteration: 2, sleepTime: current}, *dal.it) - dal.iterate() + sleepTime := dal.nextSleep() + assert.Equal(t, int64(1), dal.iteration.Load()) + assert.Equal(t, da.Every, sleepTime) + + sleepTime = dal.nextSleep() + assert.Equal(t, int64(2), dal.iteration.Load()) + assert.Equal(t, time.Duration(1.5*float64(time.Second)), sleepTime) + + sleepTime = dal.nextSleep() + assert.Equal(t, int64(3), dal.iteration.Load()) if hasLimit { - current = da.BackoffLimit + assert.Equal(t, da.BackoffLimit, sleepTime) + assert.True(t, dal.limitReached) } else { - current = time.Duration(2.25 * float64(time.Second)) + assert.Equal(t, time.Duration(2.25*float64(time.Second)), sleepTime) + assert.False(t, dal.limitReached) } - - assert.Equal( - t, - DownActionIteration{iteration: 3, sleepTime: current, limitReached: hasLimit}, - *dal.it, - ) } func Test_BackoffNoLimit(t *testing.T) { From 788f11c501128ec4348af730ce65dbb20408d9ae Mon Sep 17 00:00:00 2001 From: Hugo Haas Date: Sun, 7 Jun 2026 22:17:55 -0500 Subject: [PATCH 03/12] refactor(loop): remove no-op timer.Stop and inline hasDownAction --- internal/logic/loop.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/internal/logic/loop.go b/internal/logic/loop.go index bee5de7..6afe269 100644 --- a/internal/logic/loop.go +++ b/internal/logic/loop.go @@ -155,7 +155,7 @@ func (l *Loop) ProcessCheck(ctx context.Context, upStatus bool) { logger.L.Info("connection status changed", logger.LogComponent, "loop", "up", l.status.Up) - if !l.hasDownAction() { + if l.downAction == nil { return } @@ -213,8 +213,6 @@ func (l *Loop) Run(ctx context.Context, statServerConfig *status.StatServerConfi return case <-timer.C: } - - timer.Stop() } } @@ -228,10 +226,6 @@ func (l *Loop) Stop(ctx context.Context) { } } -func (l *Loop) hasDownAction() bool { - return l.downAction != nil -} - // Checker implements check.Checker for logging check lifecycle events. type Checker struct{} From 38ab09744c97f7c5c6a90804e706bc1e09718f85 Mon Sep 17 00:00:00 2001 From: Hugo Haas Date: Sun, 7 Jun 2026 22:18:35 -0500 Subject: [PATCH 04/12] refactor(downaction): remove no-op timer.Stop and dead ErrInvalidCommand --- internal/logic/downaction.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/internal/logic/downaction.go b/internal/logic/downaction.go index bf732e8..dbc19b4 100644 --- a/internal/logic/downaction.go +++ b/internal/logic/downaction.go @@ -48,8 +48,6 @@ var ( ErrNoCommand = errors.New("no command to execute") // ErrEmptyCommand is returned when the command name is empty. ErrEmptyCommand = errors.New("command name cannot be empty") - // ErrInvalidCommand is returned when the command is invalid. - ErrInvalidCommand = errors.New("invalid command") ) func validateCommand(command []string) error { @@ -261,7 +259,6 @@ func (dal *DownActionLoop) run(ctx context.Context) { return case <-timer.C: - timer.Stop() } dal.killCurrentCmd() From 81ff39938f30c2f3171a4c66b7e68ce03df46fc0 Mon Sep 17 00:00:00 2001 From: Hugo Haas Date: Sun, 7 Jun 2026 22:19:04 -0500 Subject: [PATCH 05/12] refactor(cmd): remove dead ExitCodeError and ConfigDump --- internal/cmd.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/internal/cmd.go b/internal/cmd.go index 2544d08..577579c 100644 --- a/internal/cmd.go +++ b/internal/cmd.go @@ -22,8 +22,6 @@ const ( AppName = "upd" // AppShort is the application short description. AppShort = "Tool to monitor if the network connection is up." - // ExitCodeError is the exit code for errors. - ExitCodeError = 1 // ErrChanSize is the buffer size for error channels. ErrChanSize = 1 // SighupChanSize is the buffer size for SIGHUP channels. @@ -35,8 +33,6 @@ const ( ConfigConfig string = "config" // ConfigDebug is the debug flag name. ConfigDebug string = "debug" - // ConfigDump is the dump flag name. - ConfigDump string = "dump" ) // SetupLoop initializes the loop with configuration from the given file. From 6e341823352dfd1e12c93ca37e1e89e5142ac32c Mon Sep 17 00:00:00 2001 From: Hugo Haas Date: Sun, 7 Jun 2026 22:19:37 -0500 Subject: [PATCH 06/12] refactor(status): remove unused NewStatHandler constructor --- internal/status/report_test.go | 6 +++--- internal/status/stat-server.go | 5 ----- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/internal/status/report_test.go b/internal/status/report_test.go index 1c051ef..c5cacf6 100644 --- a/internal/status/report_test.go +++ b/internal/status/report_test.go @@ -32,10 +32,10 @@ func setupTestServer(t *testing.T, opts ...func(*StatServerConfig)) *StatHandler config: config, } - return NewStatHandler(server) + return &StatHandler{statServer: server} } -func TestNewStatHandler(t *testing.T) { +func TestStatHandlerInit(t *testing.T) { status := NewStatus() status.SetRetention(1 * time.Hour) @@ -50,7 +50,7 @@ func TestNewStatHandler(t *testing.T) { config: config, } - handler := NewStatHandler(server) + handler := &StatHandler{statServer: server} require.NotNil(t, handler) assert.Equal(t, server, handler.statServer) } diff --git a/internal/status/stat-server.go b/internal/status/stat-server.go index 7171a50..37bfd52 100644 --- a/internal/status/stat-server.go +++ b/internal/status/stat-server.go @@ -109,11 +109,6 @@ type StatHandler struct { statServer *StatServer } -// NewStatHandler creates a new statistics handler for the given server. -func NewStatHandler(server *StatServer) *StatHandler { - return &StatHandler{statServer: server} -} - // GenStatReport generates a statistics report from the server's status. func (h *StatHandler) GenStatReport() *Report { return h.statServer.status.GenStatReport(h.statServer.config.Reports) From 0445e89e13f964e8236433b690846a5870a87ef3 Mon Sep 17 00:00:00 2001 From: Hugo Haas Date: Sun, 7 Jun 2026 22:20:25 -0500 Subject: [PATCH 07/12] refactor(conf): simplify redundant var declaration --- internal/conf.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/internal/conf.go b/internal/conf.go index b78d131..3ef49a7 100644 --- a/internal/conf.go +++ b/internal/conf.go @@ -127,8 +127,6 @@ func configError(msg string, path string, err error) (*Configuration, error) { // ReadConf loads and validates configuration from the given file. func ReadConf(cfgFile string) (*Configuration, error) { - var err error - if cfgFile == "" { cfgFile = DefaultConfig } @@ -140,9 +138,7 @@ func ReadConf(cfgFile string) (*Configuration, error) { // Read file - cfgFile has been cleaned and resolved to absolute path // #nosec G304 // Path is sanitized by filepath.Abs() and filepath.Clean(), and only reads admin-configured files - var content []byte - - content, err = os.ReadFile(absPath) // #nosec G304 + content, err := os.ReadFile(absPath) // #nosec G304 if err != nil { return configError("Could not read config", absPath, err) } From f5827fd81d6202e7124014819e0c62bbc9dfec37 Mon Sep 17 00:00:00 2001 From: Hugo Haas Date: Sun, 7 Jun 2026 22:20:52 -0500 Subject: [PATCH 08/12] refactor(status): inline InvalidRangeMsg const --- internal/status/changetracker.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/internal/status/changetracker.go b/internal/status/changetracker.go index 2e1729b..0b2e9d9 100644 --- a/internal/status/changetracker.go +++ b/internal/status/changetracker.go @@ -7,11 +7,6 @@ import ( "github.com/hugoh/upd/internal/logger" ) -const ( - // InvalidRangeMsg is the error message returned when the requested range exceeds retention. - InvalidRangeMsg = "range greater than the retention period" -) - // StateChange represents a single state transition in the tracker. type StateChange struct { timestamp time.Time @@ -77,8 +72,7 @@ func (tracker *StateChangeTracker) Prune(currentTime time.Time) { } } -// ErrInvalidRange is returned when the requested duration exceeds retention. -var ErrInvalidRange = errors.New(InvalidRangeMsg) +var ErrInvalidRange = errors.New("range greater than the retention period") // CalculateUptime computes availability percentage and downtime for a given period. func (tracker *StateChangeTracker) CalculateUptime(currentState bool, From 180eaa7ad2a13d2357531cd98302eef54c5d57ca Mon Sep 17 00:00:00 2001 From: Hugo Haas Date: Sun, 7 Jun 2026 22:22:37 -0500 Subject: [PATCH 09/12] refactor(status): inline InvalidRangeMsg const --- internal/status/changetracker.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/status/changetracker.go b/internal/status/changetracker.go index 0b2e9d9..5e9f349 100644 --- a/internal/status/changetracker.go +++ b/internal/status/changetracker.go @@ -72,6 +72,7 @@ func (tracker *StateChangeTracker) Prune(currentTime time.Time) { } } +// ErrInvalidRange is returned when the requested duration exceeds retention. var ErrInvalidRange = errors.New("range greater than the retention period") // CalculateUptime computes availability percentage and downtime for a given period. From 7fc7426d0dea3844daa552cd06db6db2491b82d3 Mon Sep 17 00:00:00 2001 From: Hugo Haas Date: Sun, 7 Jun 2026 22:21:55 -0500 Subject: [PATCH 10/12] refactor(check): remove unused Report getter methods --- internal/check/probe_http_test.go | 6 +++--- internal/check/report.go | 30 ------------------------------ internal/check/report_test.go | 16 +++++++--------- 3 files changed, 10 insertions(+), 42 deletions(-) diff --git a/internal/check/probe_http_test.go b/internal/check/probe_http_test.go index 578739c..8cdbc00 100644 --- a/internal/check/probe_http_test.go +++ b/internal/check/probe_http_test.go @@ -43,8 +43,8 @@ func TestHttpProbe_Success(t *testing.T) { } report := probe.Execute(context.Background(), testTimeout) - require.NoError(t, report.Error()) - assert.Equal(t, testOKStatus, report.Response()) + require.NoError(t, report.error) + assert.Equal(t, testOKStatus, report.response) } func TestHttpProbe_UserAgentHeader(t *testing.T) { @@ -70,7 +70,7 @@ func TestHttpProbe_UserAgentHeader(t *testing.T) { } report := probe.Execute(context.Background(), testTimeout) - require.NoError(t, report.Error()) + require.NoError(t, report.error) assert.Equal(t, "upd/dev", gotUA) } diff --git a/internal/check/report.go b/internal/check/report.go index a4f5b3e..26559d6 100644 --- a/internal/check/report.go +++ b/internal/check/report.go @@ -41,33 +41,3 @@ func (r *Report) LogAttrs() []any { return attrs } - -// Protocol returns the protocol used for the probe (e.g., "http", "tcp", "dns"). -func (r *Report) Protocol() string { - return r.protocol -} - -// Response returns the response from the probe. -// For HTTP requests, this is the HTTP status code. -// For TCP probes, this is the local address (IP:port). -// For DNS probes, this is the resolved IP address and DNS resolver. -// Returns empty string if there was an error. -func (r *Report) Response() string { - return r.response -} - -// Elapsed returns the time taken to complete the probe. -func (r *Report) Elapsed() time.Duration { - return r.elapsed -} - -// Error returns any error that occurred during the probe. -// Returns nil if the probe was successful. -func (r *Report) Error() error { - return r.error -} - -// IsError returns true if the probe encountered an error. -func (r *Report) IsError() bool { - return r.error != nil -} diff --git a/internal/check/report_test.go b/internal/check/report_test.go index 0f4ae2e..d4cce14 100644 --- a/internal/check/report_test.go +++ b/internal/check/report_test.go @@ -71,7 +71,7 @@ func TestProtocol(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { report := &Report{protocol: tt.protocol} - assert.Equal(t, tt.protocol, report.Protocol()) + assert.Equal(t, tt.protocol, report.protocol) }) } } @@ -90,7 +90,7 @@ func TestResponse(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { report := &Report{response: tt.response} - assert.Equal(t, tt.response, report.Response()) + assert.Equal(t, tt.response, report.response) }) } } @@ -98,8 +98,7 @@ func TestResponse(t *testing.T) { func TestResponse_WithErrors(t *testing.T) { err := errors.New("network error") report := &Report{error: err} - resp := report.Response() - assert.Empty(t, resp, "Response should be empty when there's an error") + assert.Empty(t, report.response, "Response should be empty when there's an error") } func TestElapsed(t *testing.T) { @@ -115,7 +114,7 @@ func TestElapsed(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { report := &Report{elapsed: tt.elapsed} - assert.Equal(t, tt.elapsed, report.Elapsed()) + assert.Equal(t, tt.elapsed, report.elapsed) }) } } @@ -133,9 +132,8 @@ func TestError(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { report := &Report{error: tt.err} - err := report.Error() - assert.Equal(t, tt.err, err) - assert.Equal(t, tt.wantErr, err != nil) + assert.Equal(t, tt.err, report.error) + assert.Equal(t, tt.wantErr, report.error != nil) }) } } @@ -154,7 +152,7 @@ func TestIsError(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { report := &Report{error: tt.err} - assert.Equal(t, tt.isError, report.IsError()) + assert.Equal(t, tt.isError, report.error != nil) }) } } From c8407fe1578ee2307bc2ec30ad99ea24a06eba8e Mon Sep 17 00:00:00 2001 From: Hugo Haas Date: Sun, 7 Jun 2026 22:21:55 -0500 Subject: [PATCH 11/12] refactor: fixed linting issues --- internal/conf.go | 84 ++++++++++++++++++------------------ internal/logic/downaction.go | 53 +++++++++++------------ 2 files changed, 67 insertions(+), 70 deletions(-) diff --git a/internal/conf.go b/internal/conf.go index 3ef49a7..f6d5814 100644 --- a/internal/conf.go +++ b/internal/conf.go @@ -210,6 +210,47 @@ func (c Configuration) GetChecks() (*check.List, error) { return checkList, nil } +//nolint:ireturn // intentionally returns interface to abstract probe creation +func probeFromURL(parsedURL *url.URL, checkStr string) check.Probe { + switch parsedURL.Scheme { + case check.DNS: + domain := strings.TrimPrefix(parsedURL.Path, "/") + if domain == "" { + logger.L.Error( + "DNS check missing domain", + logger.LogComponent, logger.LogComponentConfig, + "check", checkStr, + ) + + return nil + } + + if parsedURL.Host == "" { + logger.L.Error("DNS check missing resolver host", + logger.LogComponent, logger.LogComponentConfig, "check", checkStr) + + return nil + } + + port := parsedURL.Port() + if port == "" { + port = DefaultDNSPort + } + + return check.NewDNSProbe(parsedURL.Host+":"+port, domain) + case check.HTTP, check.HTTPS: + return check.NewHTTPProbe(parsedURL.String()) + case check.TCP: + return check.NewTCPProbe(net.JoinHostPort(parsedURL.Hostname(), parsedURL.Port())) + default: + logger.L.Error("unknown protocol in config", + logger.LogComponent, logger.LogComponentConfig, "check", checkStr, + "protocol", parsedURL.Scheme) + + return nil + } +} + // GetChecksCat creates checks from a list of check URIs. func (c Configuration) GetChecksCat(category []string) []*check.Check { checks := make([]*check.Check, 0, len(category)) @@ -222,47 +263,8 @@ func (c Configuration) GetChecksCat(category []string) []*check.Check { continue } - var probe check.Probe - - switch parsedURL.Scheme { - case check.DNS: - domain := strings.TrimPrefix(parsedURL.Path, "/") - if domain == "" { - logger.L.Error( - "DNS check missing domain", - logger.LogComponent, - logger.LogComponentConfig, - "check", - checkStr, - ) - - continue - } - - if parsedURL.Host == "" { - logger.L.Error("DNS check missing resolver host", - logger.LogComponent, logger.LogComponentConfig, "check", checkStr) - - continue - } - - port := parsedURL.Port() - if port == "" { - port = DefaultDNSPort - } - - dnsResolver := parsedURL.Host + ":" + port - probe = check.NewDNSProbe(dnsResolver, domain) - case check.HTTP, check.HTTPS: - probe = check.NewHTTPProbe(parsedURL.String()) - case check.TCP: - hostPort := net.JoinHostPort(parsedURL.Hostname(), parsedURL.Port()) - probe = check.NewTCPProbe(hostPort) - default: - logger.L.Error("unknown protocol in config", - logger.LogComponent, logger.LogComponentConfig, "check", checkStr, - "protocol", parsedURL.Scheme) - + probe := probeFromURL(parsedURL, checkStr) + if probe == nil { continue } diff --git a/internal/logic/downaction.go b/internal/logic/downaction.go index dbc19b4..c1f21a3 100644 --- a/internal/logic/downaction.go +++ b/internal/logic/downaction.go @@ -92,12 +92,9 @@ func (dal *DownActionLoop) Execute(ctx context.Context, execString string) error cmd.Env = cmdEnv logger.L.Info( "executing command", - logger.LogComponent, - logger.LogComponentDownAction, - "exec", - cmd.String(), - "iteration", - iteration, + logger.LogComponent, logger.LogComponentDownAction, + "exec", cmd.String(), + "iteration", iteration, ) if err = cmd.Start(); err != nil { @@ -111,29 +108,7 @@ func (dal *DownActionLoop) Execute(ctx context.Context, execString string) error dal.currentCmd = cmd dal.cmdMu.Unlock() - go func() { - waitErr := cmd.Wait() - - dal.cmdMu.Lock() - if dal.currentCmd == cmd { - dal.currentCmd = nil - } - dal.cmdMu.Unlock() - - if waitErr != nil { - logger.L.Warn( - "error executing command", - logger.LogComponent, - logger.LogComponentDownAction, - "exec", - cmd.String(), - "error", - waitErr, - "stderr", - string(bytes.TrimSpace(stderrBuf.Bytes())), - ) - } - }() + go dal.waitForCmd(cmd, &stderrBuf) return nil } @@ -185,6 +160,26 @@ func (dal *DownActionLoop) Stop(_ context.Context) { dal.cancelFunc() } +func (dal *DownActionLoop) waitForCmd(cmd *exec.Cmd, stderrBuf *bytes.Buffer) { + waitErr := cmd.Wait() + + dal.cmdMu.Lock() + if dal.currentCmd == cmd { + dal.currentCmd = nil + } + dal.cmdMu.Unlock() + + if waitErr != nil { + logger.L.Warn( + "error executing command", + logger.LogComponent, logger.LogComponentDownAction, + "exec", cmd.String(), + "error", waitErr, + "stderr", string(bytes.TrimSpace(stderrBuf.Bytes())), + ) + } +} + // killCurrentCmd kills any currently running command and clears the reference. func (dal *DownActionLoop) killCurrentCmd() { dal.cmdMu.Lock() From 1ff7c1d1493b0c4938adf6578c7b2155be09e483 Mon Sep 17 00:00:00 2001 From: Hugo Haas Date: Sun, 7 Jun 2026 22:41:12 -0500 Subject: [PATCH 12/12] fix(dns): probe config parsing --- internal/check/probe_dns.go | 34 +++++++++++++++--- internal/check/probe_dns_test.go | 59 ++++++++++++++++++++++++++++++++ internal/conf.go | 26 +++----------- 3 files changed, 94 insertions(+), 25 deletions(-) diff --git a/internal/check/probe_dns.go b/internal/check/probe_dns.go index 5e3f8cf..51e4864 100644 --- a/internal/check/probe_dns.go +++ b/internal/check/probe_dns.go @@ -2,6 +2,7 @@ package check import ( "context" + "errors" "fmt" "net" "time" @@ -19,12 +20,37 @@ type DNSProbe struct { resolver DNSResolver } -// NewDNSProbe creates a new DNS probe for the given resolver and domain. -func NewDNSProbe(dnsResolver, domain string) *DNSProbe { +// DefaultDNSPort is the default DNS resolver port. +const DefaultDNSPort = "53" + +var ( + // ErrDNSMissingDomain is returned when no domain is specified. + ErrDNSMissingDomain = errors.New("DNS probe missing domain") + // ErrDNSMissingResolver is returned when no resolver is specified. + ErrDNSMissingResolver = errors.New("DNS probe missing resolver") +) + +// NewDNSProbe creates a new DNS probe for the given resolver host (host:port +// or host-only, port defaults to 53) and domain. +func NewDNSProbe(host, domain string) (*DNSProbe, error) { + if domain == "" { + return nil, ErrDNSMissingDomain + } + + hostname, port, err := net.SplitHostPort(host) + if err != nil { + hostname = host + port = DefaultDNSPort + } + + if hostname == "" { + return nil, ErrDNSMissingResolver + } + return &DNSProbe{ - DNSResolver: dnsResolver, + DNSResolver: net.JoinHostPort(hostname, port), Domain: domain, - } + }, nil } // Scheme returns the protocol scheme (dns). diff --git a/internal/check/probe_dns_test.go b/internal/check/probe_dns_test.go index 5b1d3ff..875a243 100644 --- a/internal/check/probe_dns_test.go +++ b/internal/check/probe_dns_test.go @@ -7,6 +7,9 @@ import ( "net" "strings" "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) type fakeResolver struct { @@ -18,6 +21,62 @@ func (r *fakeResolver) LookupHost(_ context.Context, _ string) ([]string, error) return r.result, r.err } +const testDomain = "example.com" + +func TestNewDNSProbe_Valid(t *testing.T) { + tests := []struct { + name string + host string + domain string + wantAddr string + wantDomain string + }{ + {host: "1.1.1.1", domain: testDomain, wantAddr: "1.1.1.1:53", wantDomain: testDomain}, + { + host: "1.1.1.1:5353", + domain: testDomain, + wantAddr: "1.1.1.1:5353", + wantDomain: testDomain, + }, + { + host: "[::1]:5353", + domain: testDomain, + wantAddr: "[::1]:5353", + wantDomain: testDomain, + }, + } + + for _, tt := range tests { + t.Run(tt.host, func(t *testing.T) { + probe, err := NewDNSProbe(tt.host, tt.domain) + require.NoError(t, err) + assert.Equal(t, tt.wantAddr, probe.DNSResolver) + assert.Equal(t, tt.wantDomain, probe.Domain) + }) + } +} + +func TestNewDNSProbe_Error(t *testing.T) { + tests := []struct { + name string + host string + domain string + wantErr error + }{ + {name: "missing domain", host: "1.1.1.1", domain: "", wantErr: ErrDNSMissingDomain}, + {name: "missing resolver", host: "", domain: testDomain, wantErr: ErrDNSMissingResolver}, + {name: "port only", host: ":53", domain: testDomain, wantErr: ErrDNSMissingResolver}, + {name: "both missing", host: "", domain: "", wantErr: ErrDNSMissingDomain}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := NewDNSProbe(tt.host, tt.domain) + require.ErrorIs(t, err, tt.wantErr) + }) + } +} + func TestDnsProbe(t *testing.T) { t.Run("returns the first resolved IP address if the request is successful", func(t *testing.T) { resolver := &fakeResolver{result: []string{"1.2.3.4"}} diff --git a/internal/conf.go b/internal/conf.go index f6d5814..db15d50 100644 --- a/internal/conf.go +++ b/internal/conf.go @@ -77,8 +77,6 @@ import ( const ( // DefaultConfig is the default configuration file name. DefaultConfig = ".upd.toml" - // DefaultDNSPort is the default DNS port. - DefaultDNSPort = "53" logLevelTrace = "trace" logLevelDebug = "debug" @@ -214,30 +212,16 @@ func (c Configuration) GetChecks() (*check.List, error) { func probeFromURL(parsedURL *url.URL, checkStr string) check.Probe { switch parsedURL.Scheme { case check.DNS: - domain := strings.TrimPrefix(parsedURL.Path, "/") - if domain == "" { - logger.L.Error( - "DNS check missing domain", + probe, err := check.NewDNSProbe(parsedURL.Host, strings.TrimPrefix(parsedURL.Path, "/")) + if err != nil { + logger.L.Error("invalid DNS check", logger.LogComponent, logger.LogComponentConfig, - "check", checkStr, - ) + "check", checkStr, "error", err) return nil } - if parsedURL.Host == "" { - logger.L.Error("DNS check missing resolver host", - logger.LogComponent, logger.LogComponentConfig, "check", checkStr) - - return nil - } - - port := parsedURL.Port() - if port == "" { - port = DefaultDNSPort - } - - return check.NewDNSProbe(parsedURL.Host+":"+port, domain) + return probe case check.HTTP, check.HTTPS: return check.NewHTTPProbe(parsedURL.String()) case check.TCP: