diff --git a/docs/local-dev.md b/docs/local-dev.md index 79e6b7a..98bf4be 100644 --- a/docs/local-dev.md +++ b/docs/local-dev.md @@ -134,3 +134,16 @@ amesh-node update Authenticated admins can also trigger the same node-side updater from the dashboard. The control plane sends a `node.update` command over the existing node websocket, the daemon runs `amesh-node update`, and a managed systemd service should restart back into the new binary after the process exits. - The dashboard only shows the update action when the node reports an installed release tag and that tag differs from the control plane's latest known GitHub release tag. - Daemon-triggered self-updates reuse the node's active `server`, `config`, and `state` paths and deliberately avoid `systemctl stop` during the update run. The daemon exits after the installer finishes and systemd restarts it into the new binary. + +## Remote reinstall + +```bash +amesh-node reinstall +``` + +The shared CLI also exposes the same command as `amesh reinstall`. + +`reinstall` is the destructive recovery path for a stale or suspect node install. It stops and disables the managed user service, removes the node service file, durable node state, detected agent config, installed `amesh-node` and `amesh` binaries, and the managed `~/.local/share/amesh` payload, then runs the installer again from scratch. +- Use `reinstall` when you suspect stale node state, stale detected agent inventory, or broken managed ACPX/node wiring. +- `reinstall` preserves the user ACPX config at `~/.acpx/config.json`; it only wipes amesh-managed node artifacts. +- On success, the installer re-detects agents, re-registers the node, rewrites the service, and starts the managed daemon again. diff --git a/docs/testing.md b/docs/testing.md index 90ad494..e7d7cc4 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -38,3 +38,5 @@ - The web app also covers the top-bar MCP config panel so the copy-paste client snippets stay aligned with the server endpoint and scope headers. - The Go daemon owns table-driven tests for config loading, reconnect logic, update, detect, exposed-path command dispatch, and `acpx` process lifecycle including streamed output and cancellation. - The dev helper script also has a regression shell test for the stale local reconnect-token path, so local `pnpm dev:daemon` re-registers automatically after a fresh control-plane reset. +- The Go daemon also covers the shared `reinstall` subcommand and verifies that reinstall mode passes the destructive reset flag through to the installer. +- `scripts/test-install-amesh-node.sh` also covers remote self-update and full reinstall flows, including reinstall-time cleanup of stale node state, config, service, binaries, and managed amesh home. diff --git a/install-amesh-node.sh b/install-amesh-node.sh index 1f4ade6..16b6163 100644 --- a/install-amesh-node.sh +++ b/install-amesh-node.sh @@ -19,6 +19,7 @@ SERVER_URL="${SERVER_URL:-}" REGISTRATION_TOKEN="${REGISTRATION_TOKEN:-}" NODE_ID="${NODE_ID:-$(hostname)-amesh}" SELF_UPDATE="${AMESH_NODE_SELF_UPDATE:-0}" +REINSTALL="${AMESH_NODE_REINSTALL:-0}" log() { printf '%s\n' "$*" >&2 @@ -220,6 +221,16 @@ main() { tmp_dir="$(mktemp -d)" trap 'rm -rf "${tmp_dir}"' EXIT + if [[ "$REINSTALL" == "1" ]]; then + log "reinstall requested; removing existing node install artifacts" + if command -v systemctl >/dev/null 2>&1; then + systemctl --user stop "$SERVICE_NAME" >/dev/null 2>&1 || true + systemctl --user disable "$SERVICE_NAME" >/dev/null 2>&1 || true + fi + rm -f "$SERVICE_PATH" "$STATE_PATH" "$CONFIG_PATH" "$binary_path" "$cli_binary_path" + rm -rf "$AMESH_HOME" + fi + mkdir -p "${install_dir}" mkdir -p "${AMESH_HOME}" mkdir -p "$(dirname "$STATE_PATH")" diff --git a/internal/app/app.go b/internal/app/app.go index e43431e..fc14f52 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -47,6 +47,7 @@ type nodeUpdateOptions struct { ConfigPath string StatePath string SelfUpdate bool + Reinstall bool } type updateRunner func(ctx context.Context, stdout, stderr io.Writer, options nodeUpdateOptions) error @@ -96,7 +97,7 @@ func Run(ctx context.Context, args []string) error { func run(ctx context.Context, args []string, update updateRunner, detect detectRunner) error { if len(args) == 0 { - return errors.New("expected subcommand: register, run, detect, update, or acp") + return errors.New("expected subcommand: register, run, detect, update, reinstall, logs, or acp") } switch args[0] { @@ -108,6 +109,8 @@ func run(ctx context.Context, args []string, update updateRunner, detect detectR return runDetectCommand(ctx, args[1:], detect) case "update": return update(ctx, os.Stdout, os.Stderr, nodeUpdateOptions{}) + case "reinstall": + return update(ctx, os.Stdout, os.Stderr, nodeUpdateOptions{Reinstall: true}) case "acp": return runACPBridge(ctx, args[1:], os.Stdin, os.Stdout) case "logs": @@ -168,6 +171,15 @@ func runACPBridge(ctx context.Context, args []string, stdin io.Reader, stdout io } func runUpdate(ctx context.Context, stdout, stderr io.Writer, options nodeUpdateOptions) error { + return runInstaller(ctx, stdout, stderr, options, options.Reinstall) +} + +func runReinstall(ctx context.Context, stdout, stderr io.Writer, options nodeUpdateOptions) error { + options.Reinstall = true + return runInstaller(ctx, stdout, stderr, options, true) +} + +func runInstaller(ctx context.Context, stdout, stderr io.Writer, options nodeUpdateOptions, reinstall bool) error { if _, err := exec.LookPath("bash"); err != nil { return errors.New("required CLI missing: bash") } @@ -203,14 +215,24 @@ func runUpdate(ctx context.Context, stdout, stderr io.Writer, options nodeUpdate if options.SelfUpdate { cmd.Env = append(cmd.Env, "AMESH_NODE_SELF_UPDATE=1") } + if reinstall { + cmd.Env = append(cmd.Env, "AMESH_NODE_REINSTALL=1") + } if os.Getenv("INSTALL_DIR") == "" { if installDir, ok := currentInstallDir(); ok { cmd.Env = append(cmd.Env, "INSTALL_DIR="+installDir) } } - fmt.Fprintf(stdout, "updating amesh-node from %s\n", installerURL) + action := "updating" + if reinstall { + action = "reinstalling" + } + fmt.Fprintf(stdout, "%s amesh-node from %s\n", action, installerURL) if err := cmd.Run(); err != nil { + if reinstall { + return fmt.Errorf("reinstall failed: %w", err) + } return fmt.Errorf("update failed: %w", err) } return nil diff --git a/internal/app/app_test.go b/internal/app/app_test.go index 83c762c..f36f340 100644 --- a/internal/app/app_test.go +++ b/internal/app/app_test.go @@ -140,6 +140,30 @@ func TestRunDispatchesUpdateSubcommand(t *testing.T) { } } +func TestRunDispatchesReinstallSubcommand(t *testing.T) { + t.Parallel() + + called := false + err := run( + context.Background(), + []string{"reinstall"}, + func(_ context.Context, _ io.Writer, _ io.Writer, options nodeUpdateOptions) error { + called = true + if !options.Reinstall { + t.Fatal("expected reinstall flag to be set") + } + return nil + }, + func(context.Context, string) error { return nil }, + ) + if err != nil { + t.Fatalf("run() error = %v", err) + } + if !called { + t.Fatal("expected update runner to be called for reinstall") + } +} + func TestRunDispatchesDetectSubcommand(t *testing.T) { t.Parallel() @@ -282,6 +306,43 @@ func TestRunUpdatePassesRuntimeContextToInstaller(t *testing.T) { } } +func TestRunReinstallPassesResetModeToInstaller(t *testing.T) { + binDir := t.TempDir() + envLogPath := filepath.Join(t.TempDir(), "installer-env.log") + writeExecutable(t, filepath.Join(binDir, "curl"), fmt.Sprintf(`#!/bin/sh + printf 'AMESH_NODE_REINSTALL=%%s\nSERVER_URL=%%s\nSTATE_PATH=%%s\n' \ + "$AMESH_NODE_REINSTALL" "$SERVER_URL" "$STATE_PATH" > %q + printf '%%s\n' '#!/bin/sh' + printf '%%s\n' 'exit 0' +`, envLogPath)) + t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) + t.Setenv("AMESH_INSTALL_URL", "https://example.invalid/install-amesh-node.sh") + + var stdout bytes.Buffer + err := runReinstall(context.Background(), &stdout, io.Discard, nodeUpdateOptions{ + ServerURL: "ws://example.invalid/ws?role=node", + StatePath: "/srv/amesh/node-state.json", + }) + if err != nil { + t.Fatalf("runReinstall() error = %v", err) + } + + bytes, err := os.ReadFile(envLogPath) + if err != nil { + t.Fatalf("read env log: %v", err) + } + got := string(bytes) + for _, want := range []string{ + "AMESH_NODE_REINSTALL=1", + "SERVER_URL=ws://example.invalid/ws?role=node", + "STATE_PATH=/srv/amesh/node-state.json", + } { + if !strings.Contains(got, want) { + t.Fatalf("installer env = %q, want %q", got, want) + } + } +} + func TestRunDaemonSessionHandlesNodeDetect(t *testing.T) { t.Parallel() diff --git a/scripts/test-install-amesh-node.sh b/scripts/test-install-amesh-node.sh index 9b37e61..6bb1a7a 100644 --- a/scripts/test-install-amesh-node.sh +++ b/scripts/test-install-amesh-node.sh @@ -326,3 +326,176 @@ if grep -F 'stop amesh-node' "$self_systemctl_log" >/dev/null 2>&1; then cat "$self_systemctl_log" >&2 exit 1 fi + +reinstall_stub_dir="$tmp_dir/reinstall-bin" +mkdir -p "$reinstall_stub_dir" + +cat <<'EOF' >"$reinstall_stub_dir/curl" +#!/usr/bin/env bash +set -euo pipefail +archive="${@: -1}" +printf 'stub archive' >"$archive" +EOF +chmod +x "$reinstall_stub_dir/curl" + +cat <<'EOF' >"$reinstall_stub_dir/npm" +#!/usr/bin/env bash +set -euo pipefail +exit 0 +EOF +chmod +x "$reinstall_stub_dir/npm" + +cat <<'EOF' >"$reinstall_stub_dir/systemctl" +#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >>"${SYSTEMCTL_LOG:?}" +verb= +for arg in "$@"; do + case "$arg" in + --user|--now|--quiet|--no-pager|--full) + continue + ;; + *) + verb="$arg" + break + ;; + esac +done +case "$verb" in + stop|disable|daemon-reload|enable|is-active) + if [[ "$verb" == "is-active" ]]; then + exit 0 + fi + exit 0 + ;; + *) + exit 99 + ;; +esac +EOF +chmod +x "$reinstall_stub_dir/systemctl" + +cat <<'EOF' >"$reinstall_stub_dir/uname" +#!/usr/bin/env bash +set -euo pipefail +case "${1:-}" in + -m) + printf 'x86_64\n' + ;; + *) + printf 'Linux\n' + ;; +esac +EOF +chmod +x "$reinstall_stub_dir/uname" + +cat <<'EOF' >"$reinstall_stub_dir/mktemp" +#!/usr/bin/env bash +set -euo pipefail +dir="${TMPDIR:-/tmp}/amesh-test-reinstall" +mkdir -p "$dir" +printf '%s\n' "$dir" +EOF +chmod +x "$reinstall_stub_dir/mktemp" + +cat <<'EOF' >"$reinstall_stub_dir/tar" +#!/usr/bin/env bash +set -euo pipefail +target_dir= +while [[ $# -gt 0 ]]; do + case "$1" in + -C) + target_dir="$2" + shift 2 + ;; + *) + shift + ;; + esac +done +mkdir -p "$target_dir" +cat <<'BIN' >"$target_dir/amesh-node" +#!/usr/bin/env bash +set -euo pipefail +exit 0 +BIN +chmod +x "$target_dir/amesh-node" +cat <<'BIN' >"$target_dir/amesh" +#!/usr/bin/env bash +set -euo pipefail +exit 0 +BIN +chmod +x "$target_dir/amesh" +EOF +chmod +x "$reinstall_stub_dir/tar" + +cat <<'EOF' >"$reinstall_stub_dir/install" +#!/usr/bin/env bash +set -euo pipefail +src="${@: -2:1}" +dest="${@: -1}" +cp "$src" "$dest" +chmod 0755 "$dest" +EOF +chmod +x "$reinstall_stub_dir/install" + +cat <<'EOF' >"$reinstall_stub_dir/node" +#!/usr/bin/env bash +set -euo pipefail +case "${1:-}" in + -v) + printf 'v24.13.1\n' + ;; + -p) + printf '24\n' + ;; + *) + exit 0 + ;; +esac +EOF +chmod +x "$reinstall_stub_dir/node" + +reinstall_env_dir="$tmp_dir/reinstall-env" +mkdir -p "$reinstall_env_dir/home/keep-me" +mkdir -p "$reinstall_env_dir/bin" +printf '{"old":true}\n' >"$reinstall_env_dir/agents.json" +printf '{"nodeId":"node-a","reconnectToken":"token","serverUrl":"ws://saved.invalid/ws?role=node","configPath":"%s"}\n' "$reinstall_env_dir/agents.json" >"$reinstall_env_dir/node-state.json" +printf '[Unit]\nDescription=old service\n' >"$reinstall_env_dir/amesh-node.service" +printf 'old binary\n' >"$reinstall_env_dir/bin/amesh-node" +printf 'old cli\n' >"$reinstall_env_dir/bin/amesh" +printf 'stale managed home\n' >"$reinstall_env_dir/home/keep-me/stale.txt" + +reinstall_systemctl_log="$tmp_dir/reinstall-systemctl.log" +reinstall_log="$tmp_dir/reinstall.log" +if ! PATH="$reinstall_stub_dir:$PATH" \ + SYSTEMCTL_LOG="$reinstall_systemctl_log" \ + AMESH_NODE_REINSTALL='1' \ + AMESH_VERSION_TAG='test-tag' \ + INSTALL_DIR="$reinstall_env_dir/bin" \ + AMESH_HOME="$reinstall_env_dir/home" \ + ACPX_PREFIX="$reinstall_env_dir/acpx" \ + ACPX_CONFIG_PATH="$reinstall_env_dir/acpx-config.json" \ + CONFIG_PATH="$reinstall_env_dir/agents.json" \ + STATE_PATH="$reinstall_env_dir/node-state.json" \ + SERVICE_PATH="$reinstall_env_dir/amesh-node.service" \ + NODE_ID='reinstall-node' \ + SERVER_URL='wss://example.invalid/ws?role=node' \ + REGISTRATION_TOKEN='token' \ + bash <"$ROOT_DIR/install-amesh-node.sh" >"$reinstall_log" 2>&1; then + printf 'expected reinstall installer execution to succeed\n' >&2 + cat "$reinstall_log" >&2 + exit 1 +fi + +assert_contains 'stop amesh-node' "$reinstall_systemctl_log" +assert_contains 'disable amesh-node' "$reinstall_systemctl_log" +assert_contains 'enable --now amesh-node' "$reinstall_systemctl_log" +if [[ -f "$reinstall_env_dir/home/keep-me/stale.txt" ]]; then + printf 'reinstall should remove previous managed amesh home\n' >&2 + exit 1 +fi +if grep -F '"old":true' "$reinstall_env_dir/agents.json" >/dev/null 2>&1; then + printf 'reinstall should replace stale agent config\n' >&2 + exit 1 +fi