From 807d7d137885a23adf92c72bc3f763f7207da835 Mon Sep 17 00:00:00 2001 From: Max Lund Date: Mon, 18 May 2026 21:26:24 -0500 Subject: [PATCH] engage: add -r flag to reload config of a running squadron A second `squadron engage` against a config dir that already has a running instance now errors out with a hint, instead of attempting a fork that would have failed later in daemon.Fork's IsRunning check. Passing `-r` / `--reload` to a second `engage` signals the running daemon over SIGHUP to re-read and validate its config. The reload runs through the same wsbridge.Client.ReloadConfig path the command center uses, so it inherits the validate-then-atomic-swap semantics: a broken config is rejected and the running daemon keeps its previous config. If `-r` is passed when nothing is running, the flag is noted and ignored and engage starts squadron normally. Includes tests for daemon.Reload covering: missing PID file, malformed PID file, stale PID, and actual SIGHUP delivery to a live process. --- cmd/engage.go | 60 ++++++++++++++++++++++++++++++ internal/daemon/daemon.go | 30 +++++++++++++++ internal/daemon/daemon_test.go | 68 ++++++++++++++++++++++++++++++++++ 3 files changed, 158 insertions(+) diff --git a/cmd/engage.go b/cmd/engage.go index 108be90..c651ca8 100644 --- a/cmd/engage.go +++ b/cmd/engage.go @@ -40,6 +40,7 @@ var ( engageCCPort int engageAutoInit bool engageForeground bool + engageReload bool ) const ( @@ -82,6 +83,7 @@ func init() { engageCmd.Flags().IntVar(&engageCCPort, "cc-port", 8080, "Port for the command center") engageCmd.Flags().BoolVar(&engageAutoInit, "init", false, "Auto-initialize Squadron if not already initialized") engageCmd.Flags().BoolVar(&engageForeground, "foreground", false, "Run in foreground (default: run as background service)") + engageCmd.Flags().BoolVarP(&engageReload, "reload", "r", false, "Reload the config of an already-running squadron (no-op if not running)") } func runEngage(cmd *cobra.Command, args []string) { @@ -95,6 +97,19 @@ func runEngage(cmd *cobra.Command, args []string) { os.Exit(1) } + running, pid := daemon.IsRunning(engageConfigPath) + switch { + case running && engageReload: + reloadRunningSquadron(pid) + return + case running: + fmt.Fprintf(os.Stderr, "Error: squadron is already running (PID %d).\n", pid) + fmt.Fprintln(os.Stderr, "Use 'squadron engage -r' to reload the config, or 'squadron disengage' to stop it.") + os.Exit(1) + case engageReload: + fmt.Println("Squadron is not running — ignoring -r and starting it now.") + } + if warning, err := validateConfigDir(engageConfigPath); err != nil { fmt.Fprintf(os.Stderr, "Error: %v\n", err) os.Exit(1) @@ -392,6 +407,22 @@ func runEngage(cmd *cobra.Command, args []string) { client.Close() }() + reloads := make(chan os.Signal, 1) + signal.Notify(reloads, syscall.SIGHUP) + go func() { + for { + select { + case <-shutdown: + return + case <-reloads: + if err := client.ReloadConfig(); err != nil { + log.Printf("Config reload failed: %v", err) + daemon.SignalFailed(engageConfigPath, err) + } + } + } + }() + // Periodic sweep of expired per-run mission folders. Runs hourly, reads // the live config so new run_folder bases show up after a reload. go runFolderCleanupLoop(shutdown, client.GetConfig) @@ -503,6 +534,35 @@ func isContainer() bool { return os.Getenv("SQUADRON_CONTAINER") == "1" } +func reloadRunningSquadron(pid int) { + absConfigPath, err := filepath.Abs(engageConfigPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Error resolving config path: %v\n", err) + os.Exit(1) + } + + fmt.Printf("Squadron is already running (PID %d). Reloading config from %s...\n", pid, absConfigPath) + + daemon.ClearReady(absConfigPath) + + if _, err := daemon.Reload(absConfigPath); err != nil { + fmt.Fprintf(os.Stderr, "Error signaling squadron (PID %d): %v\n", pid, err) + os.Exit(1) + } + + sp := startSpinner("Validating and applying") + ready := daemon.WaitReady(absConfigPath, 30*time.Second, 500*time.Millisecond) + sp.Stop() + + if !ready.OK { + fmt.Fprintf(os.Stderr, "Config reload failed: %s\n", ready.Error) + fmt.Fprintf(os.Stderr, "Squadron is still running with the previous config (PID %d). Fix the error above and re-run 'squadron engage'.\n", pid) + os.Exit(1) + } + + fmt.Println("Config reloaded successfully.") +} + func hasHCLFiles(configPath string) bool { info, err := os.Stat(configPath) if err != nil { diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 4277b94..3c41462 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -180,6 +180,36 @@ func Fork(configPath string, extraFlags []string) (int, error) { return pid, nil } +// Reload sends SIGHUP to the running daemon to trigger a config reload. +func Reload(configPath string) (int, error) { + absConfig, err := filepath.Abs(configPath) + if err != nil { + return 0, fmt.Errorf("could not resolve config path: %w", err) + } + + pidPath := PidFilePath(absConfig) + data, err := os.ReadFile(pidPath) + if err != nil { + return 0, fmt.Errorf("no PID file found — squadron may not be running") + } + + pid, err := strconv.Atoi(strings.TrimSpace(string(data))) + if err != nil { + return 0, fmt.Errorf("invalid PID file") + } + + process, err := os.FindProcess(pid) + if err != nil { + return 0, fmt.Errorf("process %d not found", pid) + } + + if err := process.Signal(syscall.SIGHUP); err != nil { + return 0, fmt.Errorf("could not signal process %d: %w", pid, err) + } + + return pid, nil +} + // Stop reads the PID file and gracefully stops the background process. func Stop(configPath string) error { absConfig, err := filepath.Abs(configPath) diff --git a/internal/daemon/daemon_test.go b/internal/daemon/daemon_test.go index 8f6590a..703bebf 100644 --- a/internal/daemon/daemon_test.go +++ b/internal/daemon/daemon_test.go @@ -3,7 +3,9 @@ package daemon import ( "fmt" "os" + "os/signal" "path/filepath" + "syscall" "testing" "time" ) @@ -220,6 +222,72 @@ func TestIsRunning_LiveProcess(t *testing.T) { } } +func TestReload_NoPidFile(t *testing.T) { + dir := t.TempDir() + if _, err := Reload(dir); err == nil { + t.Fatal("Reload should error when no PID file exists") + } +} + +func TestReload_InvalidPidFile(t *testing.T) { + dir := t.TempDir() + if err := os.MkdirAll(filepath.Join(dir, ".squadron"), 0755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(PidFilePath(dir), []byte("not-a-pid"), 0644); err != nil { + t.Fatal(err) + } + if _, err := Reload(dir); err == nil { + t.Fatal("Reload should error for malformed PID file") + } +} + +func TestReload_StaleProcess(t *testing.T) { + dir := t.TempDir() + if err := os.MkdirAll(filepath.Join(dir, ".squadron"), 0755); err != nil { + t.Fatal(err) + } + // A high PID that almost certainly doesn't exist. + if err := os.WriteFile(PidFilePath(dir), []byte("999999"), 0644); err != nil { + t.Fatal(err) + } + if _, err := Reload(dir); err == nil { + t.Fatal("Reload should error when target process does not exist") + } +} + +func TestReload_DeliversSighup(t *testing.T) { + dir := t.TempDir() + if err := os.MkdirAll(filepath.Join(dir, ".squadron"), 0755); err != nil { + t.Fatal(err) + } + + // Use the current test process as the SIGHUP target. + pid := os.Getpid() + if err := os.WriteFile(PidFilePath(dir), []byte(fmt.Sprintf("%d", pid)), 0644); err != nil { + t.Fatal(err) + } + + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGHUP) + defer signal.Stop(sigs) + + gotPID, err := Reload(dir) + if err != nil { + t.Fatalf("Reload returned error: %v", err) + } + if gotPID != pid { + t.Errorf("returned pid = %d, want %d", gotPID, pid) + } + + select { + case <-sigs: + // got it + case <-time.After(2 * time.Second): + t.Fatal("did not receive SIGHUP within timeout") + } +} + func TestResolveConfigDir(t *testing.T) { dir := t.TempDir() file := filepath.Join(dir, "squadron.hcl")