Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions docs/local-dev.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,16 @@ amesh-node update
Authenticated admins can also trigger the same node-side updater from the dashboard. The control plane sends a `node.update` command over the existing node websocket, the daemon runs `amesh-node update`, and a managed systemd service should restart back into the new binary after the process exits.
- The dashboard only shows the update action when the node reports an installed release tag and that tag differs from the control plane's latest known GitHub release tag.
- Daemon-triggered self-updates reuse the node's active `server`, `config`, and `state` paths and deliberately avoid `systemctl stop` during the update run. The daemon exits after the installer finishes and systemd restarts it into the new binary.

## Remote reinstall

```bash
amesh-node reinstall
```

The shared CLI also exposes the same command as `amesh reinstall`.

`reinstall` is the destructive recovery path for a stale or suspect node install. It stops and disables the managed user service, removes the node service file, durable node state, detected agent config, installed `amesh-node` and `amesh` binaries, and the managed `~/.local/share/amesh` payload, then runs the installer again from scratch.
- Use `reinstall` when you suspect stale node state, stale detected agent inventory, or broken managed ACPX/node wiring.
- `reinstall` preserves the user ACPX config at `~/.acpx/config.json`; it only wipes amesh-managed node artifacts.
- On success, the installer re-detects agents, re-registers the node, rewrites the service, and starts the managed daemon again.
2 changes: 2 additions & 0 deletions docs/testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,5 @@
- The web app also covers the top-bar MCP config panel so the copy-paste client snippets stay aligned with the server endpoint and scope headers.
- The Go daemon owns table-driven tests for config loading, reconnect logic, update, detect, exposed-path command dispatch, and `acpx` process lifecycle including streamed output and cancellation.
- The dev helper script also has a regression shell test for the stale local reconnect-token path, so local `pnpm dev:daemon` re-registers automatically after a fresh control-plane reset.
- The Go daemon also covers the shared `reinstall` subcommand and verifies that reinstall mode passes the destructive reset flag through to the installer.
- `scripts/test-install-amesh-node.sh` also covers remote self-update and full reinstall flows, including reinstall-time cleanup of stale node state, config, service, binaries, and managed amesh home.
11 changes: 11 additions & 0 deletions install-amesh-node.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ SERVER_URL="${SERVER_URL:-}"
REGISTRATION_TOKEN="${REGISTRATION_TOKEN:-}"
NODE_ID="${NODE_ID:-$(hostname)-amesh}"
SELF_UPDATE="${AMESH_NODE_SELF_UPDATE:-0}"
REINSTALL="${AMESH_NODE_REINSTALL:-0}"

log() {
printf '%s\n' "$*" >&2
Expand Down Expand Up @@ -220,6 +221,16 @@ main() {
tmp_dir="$(mktemp -d)"
trap 'rm -rf "${tmp_dir}"' EXIT

if [[ "$REINSTALL" == "1" ]]; then
log "reinstall requested; removing existing node install artifacts"
if command -v systemctl >/dev/null 2>&1; then
systemctl --user stop "$SERVICE_NAME" >/dev/null 2>&1 || true
systemctl --user disable "$SERVICE_NAME" >/dev/null 2>&1 || true
fi
rm -f "$SERVICE_PATH" "$STATE_PATH" "$CONFIG_PATH" "$binary_path" "$cli_binary_path"
rm -rf "$AMESH_HOME"
fi

mkdir -p "${install_dir}"
mkdir -p "${AMESH_HOME}"
mkdir -p "$(dirname "$STATE_PATH")"
Expand Down
26 changes: 24 additions & 2 deletions internal/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ type nodeUpdateOptions struct {
ConfigPath string
StatePath string
SelfUpdate bool
Reinstall bool
}

type updateRunner func(ctx context.Context, stdout, stderr io.Writer, options nodeUpdateOptions) error
Expand Down Expand Up @@ -96,7 +97,7 @@ func Run(ctx context.Context, args []string) error {

func run(ctx context.Context, args []string, update updateRunner, detect detectRunner) error {
if len(args) == 0 {
return errors.New("expected subcommand: register, run, detect, update, or acp")
return errors.New("expected subcommand: register, run, detect, update, reinstall, logs, or acp")
}

switch args[0] {
Expand All @@ -108,6 +109,8 @@ func run(ctx context.Context, args []string, update updateRunner, detect detectR
return runDetectCommand(ctx, args[1:], detect)
case "update":
return update(ctx, os.Stdout, os.Stderr, nodeUpdateOptions{})
case "reinstall":
return update(ctx, os.Stdout, os.Stderr, nodeUpdateOptions{Reinstall: true})
case "acp":
return runACPBridge(ctx, args[1:], os.Stdin, os.Stdout)
case "logs":
Expand Down Expand Up @@ -168,6 +171,15 @@ func runACPBridge(ctx context.Context, args []string, stdin io.Reader, stdout io
}

func runUpdate(ctx context.Context, stdout, stderr io.Writer, options nodeUpdateOptions) error {
return runInstaller(ctx, stdout, stderr, options, options.Reinstall)
}

func runReinstall(ctx context.Context, stdout, stderr io.Writer, options nodeUpdateOptions) error {
options.Reinstall = true
return runInstaller(ctx, stdout, stderr, options, true)
}

func runInstaller(ctx context.Context, stdout, stderr io.Writer, options nodeUpdateOptions, reinstall bool) error {
if _, err := exec.LookPath("bash"); err != nil {
return errors.New("required CLI missing: bash")
}
Expand Down Expand Up @@ -203,14 +215,24 @@ func runUpdate(ctx context.Context, stdout, stderr io.Writer, options nodeUpdate
if options.SelfUpdate {
cmd.Env = append(cmd.Env, "AMESH_NODE_SELF_UPDATE=1")
}
if reinstall {
cmd.Env = append(cmd.Env, "AMESH_NODE_REINSTALL=1")
}
if os.Getenv("INSTALL_DIR") == "" {
if installDir, ok := currentInstallDir(); ok {
cmd.Env = append(cmd.Env, "INSTALL_DIR="+installDir)
}
}

fmt.Fprintf(stdout, "updating amesh-node from %s\n", installerURL)
action := "updating"
if reinstall {
action = "reinstalling"
}
fmt.Fprintf(stdout, "%s amesh-node from %s\n", action, installerURL)
if err := cmd.Run(); err != nil {
if reinstall {
return fmt.Errorf("reinstall failed: %w", err)
}
return fmt.Errorf("update failed: %w", err)
}
return nil
Expand Down
61 changes: 61 additions & 0 deletions internal/app/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,30 @@ func TestRunDispatchesUpdateSubcommand(t *testing.T) {
}
}

func TestRunDispatchesReinstallSubcommand(t *testing.T) {
t.Parallel()

called := false
err := run(
context.Background(),
[]string{"reinstall"},
func(_ context.Context, _ io.Writer, _ io.Writer, options nodeUpdateOptions) error {
called = true
if !options.Reinstall {
t.Fatal("expected reinstall flag to be set")
}
return nil
},
func(context.Context, string) error { return nil },
)
if err != nil {
t.Fatalf("run() error = %v", err)
}
if !called {
t.Fatal("expected update runner to be called for reinstall")
}
}

func TestRunDispatchesDetectSubcommand(t *testing.T) {
t.Parallel()

Expand Down Expand Up @@ -282,6 +306,43 @@ func TestRunUpdatePassesRuntimeContextToInstaller(t *testing.T) {
}
}

func TestRunReinstallPassesResetModeToInstaller(t *testing.T) {
binDir := t.TempDir()
envLogPath := filepath.Join(t.TempDir(), "installer-env.log")
writeExecutable(t, filepath.Join(binDir, "curl"), fmt.Sprintf(`#!/bin/sh
printf 'AMESH_NODE_REINSTALL=%%s\nSERVER_URL=%%s\nSTATE_PATH=%%s\n' \
"$AMESH_NODE_REINSTALL" "$SERVER_URL" "$STATE_PATH" > %q
printf '%%s\n' '#!/bin/sh'
printf '%%s\n' 'exit 0'
`, envLogPath))
t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH"))
t.Setenv("AMESH_INSTALL_URL", "https://example.invalid/install-amesh-node.sh")

var stdout bytes.Buffer
err := runReinstall(context.Background(), &stdout, io.Discard, nodeUpdateOptions{
ServerURL: "ws://example.invalid/ws?role=node",
StatePath: "/srv/amesh/node-state.json",
})
if err != nil {
t.Fatalf("runReinstall() error = %v", err)
}

bytes, err := os.ReadFile(envLogPath)
if err != nil {
t.Fatalf("read env log: %v", err)
}
got := string(bytes)
for _, want := range []string{
"AMESH_NODE_REINSTALL=1",
"SERVER_URL=ws://example.invalid/ws?role=node",
"STATE_PATH=/srv/amesh/node-state.json",
} {
if !strings.Contains(got, want) {
t.Fatalf("installer env = %q, want %q", got, want)
}
}
}

func TestRunDaemonSessionHandlesNodeDetect(t *testing.T) {
t.Parallel()

Expand Down
173 changes: 173 additions & 0 deletions scripts/test-install-amesh-node.sh
Original file line number Diff line number Diff line change
Expand Up @@ -326,3 +326,176 @@ if grep -F 'stop amesh-node' "$self_systemctl_log" >/dev/null 2>&1; then
cat "$self_systemctl_log" >&2
exit 1
fi

reinstall_stub_dir="$tmp_dir/reinstall-bin"
mkdir -p "$reinstall_stub_dir"

cat <<'EOF' >"$reinstall_stub_dir/curl"
#!/usr/bin/env bash
set -euo pipefail
archive="${@: -1}"
printf 'stub archive' >"$archive"
EOF
chmod +x "$reinstall_stub_dir/curl"

cat <<'EOF' >"$reinstall_stub_dir/npm"
#!/usr/bin/env bash
set -euo pipefail
exit 0
EOF
chmod +x "$reinstall_stub_dir/npm"

cat <<'EOF' >"$reinstall_stub_dir/systemctl"
#!/usr/bin/env bash
set -euo pipefail
printf '%s\n' "$*" >>"${SYSTEMCTL_LOG:?}"
verb=
for arg in "$@"; do
case "$arg" in
--user|--now|--quiet|--no-pager|--full)
continue
;;
*)
verb="$arg"
break
;;
esac
done
case "$verb" in
stop|disable|daemon-reload|enable|is-active)
if [[ "$verb" == "is-active" ]]; then
exit 0
fi
exit 0
;;
*)
exit 99
;;
esac
EOF
chmod +x "$reinstall_stub_dir/systemctl"

cat <<'EOF' >"$reinstall_stub_dir/uname"
#!/usr/bin/env bash
set -euo pipefail
case "${1:-}" in
-m)
printf 'x86_64\n'
;;
*)
printf 'Linux\n'
;;
esac
EOF
chmod +x "$reinstall_stub_dir/uname"

cat <<'EOF' >"$reinstall_stub_dir/mktemp"
#!/usr/bin/env bash
set -euo pipefail
dir="${TMPDIR:-/tmp}/amesh-test-reinstall"
mkdir -p "$dir"
printf '%s\n' "$dir"
EOF
chmod +x "$reinstall_stub_dir/mktemp"

cat <<'EOF' >"$reinstall_stub_dir/tar"
#!/usr/bin/env bash
set -euo pipefail
target_dir=
while [[ $# -gt 0 ]]; do
case "$1" in
-C)
target_dir="$2"
shift 2
;;
*)
shift
;;
esac
done
mkdir -p "$target_dir"
cat <<'BIN' >"$target_dir/amesh-node"
#!/usr/bin/env bash
set -euo pipefail
exit 0
BIN
chmod +x "$target_dir/amesh-node"
cat <<'BIN' >"$target_dir/amesh"
#!/usr/bin/env bash
set -euo pipefail
exit 0
BIN
chmod +x "$target_dir/amesh"
EOF
chmod +x "$reinstall_stub_dir/tar"

cat <<'EOF' >"$reinstall_stub_dir/install"
#!/usr/bin/env bash
set -euo pipefail
src="${@: -2:1}"
dest="${@: -1}"
cp "$src" "$dest"
chmod 0755 "$dest"
EOF
chmod +x "$reinstall_stub_dir/install"

cat <<'EOF' >"$reinstall_stub_dir/node"
#!/usr/bin/env bash
set -euo pipefail
case "${1:-}" in
-v)
printf 'v24.13.1\n'
;;
-p)
printf '24\n'
;;
*)
exit 0
;;
esac
EOF
chmod +x "$reinstall_stub_dir/node"

reinstall_env_dir="$tmp_dir/reinstall-env"
mkdir -p "$reinstall_env_dir/home/keep-me"
mkdir -p "$reinstall_env_dir/bin"
printf '{"old":true}\n' >"$reinstall_env_dir/agents.json"
printf '{"nodeId":"node-a","reconnectToken":"token","serverUrl":"ws://saved.invalid/ws?role=node","configPath":"%s"}\n' "$reinstall_env_dir/agents.json" >"$reinstall_env_dir/node-state.json"
printf '[Unit]\nDescription=old service\n' >"$reinstall_env_dir/amesh-node.service"
printf 'old binary\n' >"$reinstall_env_dir/bin/amesh-node"
printf 'old cli\n' >"$reinstall_env_dir/bin/amesh"
printf 'stale managed home\n' >"$reinstall_env_dir/home/keep-me/stale.txt"

reinstall_systemctl_log="$tmp_dir/reinstall-systemctl.log"
reinstall_log="$tmp_dir/reinstall.log"
if ! PATH="$reinstall_stub_dir:$PATH" \
SYSTEMCTL_LOG="$reinstall_systemctl_log" \
AMESH_NODE_REINSTALL='1' \
AMESH_VERSION_TAG='test-tag' \
INSTALL_DIR="$reinstall_env_dir/bin" \
AMESH_HOME="$reinstall_env_dir/home" \
ACPX_PREFIX="$reinstall_env_dir/acpx" \
ACPX_CONFIG_PATH="$reinstall_env_dir/acpx-config.json" \
CONFIG_PATH="$reinstall_env_dir/agents.json" \
STATE_PATH="$reinstall_env_dir/node-state.json" \
SERVICE_PATH="$reinstall_env_dir/amesh-node.service" \
NODE_ID='reinstall-node' \
SERVER_URL='wss://example.invalid/ws?role=node' \
REGISTRATION_TOKEN='token' \
bash <"$ROOT_DIR/install-amesh-node.sh" >"$reinstall_log" 2>&1; then
printf 'expected reinstall installer execution to succeed\n' >&2
cat "$reinstall_log" >&2
exit 1
fi

assert_contains 'stop amesh-node' "$reinstall_systemctl_log"
assert_contains 'disable amesh-node' "$reinstall_systemctl_log"
assert_contains 'enable --now amesh-node' "$reinstall_systemctl_log"
if [[ -f "$reinstall_env_dir/home/keep-me/stale.txt" ]]; then
printf 'reinstall should remove previous managed amesh home\n' >&2
exit 1
fi
if grep -F '"old":true' "$reinstall_env_dir/agents.json" >/dev/null 2>&1; then
printf 'reinstall should replace stale agent config\n' >&2
exit 1
fi
Loading