From 85bccfd30c6f9fbb2d0ccdac8c6579b40e6620ce Mon Sep 17 00:00:00 2001 From: Pierre Gaufillet Date: Mon, 6 Apr 2026 10:55:12 +0200 Subject: [PATCH] ha-cluster: add HA cluster orchestration Add ha-cluster, a UCI-based orchestration package that coordinates keepalived (VRRP), owsync (config sync), and optionally lease-sync (DHCP sync) to provide high availability for OpenWrt routers. Reads /etc/config/ha-cluster and generates flat configs for each managed service in /tmp/ha-cluster/. Manages lifecycle via procd. Features: - Single UCI configuration for all HA services - Automatic keepalived.conf generation (VRRP instances, VIPs) - IPv4 + IPv6 dual-stack VRRP support - Unicast and multicast VRRP transport - Service takeover/release on VRRP state transitions - Startup validation (interface checks, DHCP prerequisites) Shell-only package (PKGARCH:=all), ~1000 lines. Depends on: keepalived. Recommends: owsync, lease-sync. Tested on: OpenWrt 24.10 and 25.12 (x86_64 + filogic), production. Signed-off-by: Pierre Gaufillet --- net/ha-cluster/LICENSE | 21 + net/ha-cluster/Makefile | 70 ++ net/ha-cluster/README.md | 244 ++++++ net/ha-cluster/files/ha-cluster.config | 130 +++ net/ha-cluster/files/ha-cluster.init | 136 ++++ net/ha-cluster/files/ha-cluster.sh | 1013 ++++++++++++++++++++++++ 6 files changed, 1614 insertions(+) create mode 100644 net/ha-cluster/LICENSE create mode 100644 net/ha-cluster/Makefile create mode 100644 net/ha-cluster/README.md create mode 100644 net/ha-cluster/files/ha-cluster.config create mode 100644 net/ha-cluster/files/ha-cluster.init create mode 100644 net/ha-cluster/files/ha-cluster.sh diff --git a/net/ha-cluster/LICENSE b/net/ha-cluster/LICENSE new file mode 100644 index 0000000000000..fd0c6dc2a1098 --- /dev/null +++ b/net/ha-cluster/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025-2026 Pierre Gaufillet + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/net/ha-cluster/Makefile b/net/ha-cluster/Makefile new file mode 100644 index 0000000000000..c6b0a35c32a80 --- /dev/null +++ b/net/ha-cluster/Makefile @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025-2026 Pierre Gaufillet + +include $(TOPDIR)/rules.mk + +PKG_NAME:=ha-cluster +PKG_VERSION:=1.0.0 +PKG_RELEASE:=1 + +PKG_MAINTAINER:=Pierre Gaufillet +PKG_LICENSE:=MIT +PKG_LICENSE_FILES:=LICENSE + +include $(INCLUDE_DIR)/package.mk + +define Package/ha-cluster + SECTION:=net + CATEGORY:=Network + SUBMENU:=IP Addresses and Names + TITLE:=High Availability Cluster Management + DEPENDS:=+keepalived + PKGARCH:=all +endef + +define Package/ha-cluster/description + UCI-based HA cluster orchestration for OpenWrt. + Coordinates keepalived (VRRP), owsync (config sync), and lease-sync + (DHCP sync) to provide seamless failover for routers. + owsync and lease-sync are optional and detected at runtime. +endef + +define Package/ha-cluster/conffiles +/etc/config/ha-cluster +/etc/ha-cluster/service_states +endef + +define Build/Compile + # Nothing to compile - shell scripts only +endef + +define Package/ha-cluster/install + $(INSTALL_DIR) $(1)/etc/config + $(INSTALL_CONF) ./files/ha-cluster.config $(1)/etc/config/ha-cluster + + $(INSTALL_DIR) $(1)/etc/init.d + $(INSTALL_BIN) ./files/ha-cluster.init $(1)/etc/init.d/ha-cluster + + $(INSTALL_DIR) $(1)/usr/lib/ha-cluster + $(INSTALL_BIN) ./files/ha-cluster.sh $(1)/usr/lib/ha-cluster/ + +endef + +define Package/ha-cluster/prerm +#!/bin/sh +if [ -z "$${IPKG_INSTROOT}" ]; then + /etc/init.d/ha-cluster stop >/dev/null 2>&1 + /etc/init.d/ha-cluster disable >/dev/null 2>&1 +fi +exit 0 +endef + +define Package/ha-cluster/postinst +#!/bin/sh +[ -n "$${IPKG_INSTROOT}" ] || { + /etc/init.d/ha-cluster enable +} +exit 0 +endef + +$(eval $(call BuildPackage,ha-cluster)) diff --git a/net/ha-cluster/README.md b/net/ha-cluster/README.md new file mode 100644 index 0000000000000..84c2457df1979 --- /dev/null +++ b/net/ha-cluster/README.md @@ -0,0 +1,244 @@ +# ha-cluster - High Availability for OpenWrt + +Meta-package that orchestrates keepalived (VRRP), owsync (config sync), and +lease-sync (DHCP lease sync) to provide seamless failover between OpenWrt +routers. + +## Installation + +```sh +apk update +apk add ha-cluster owsync lease-sync luci-app-ha-cluster +``` + +## Dependencies + +- `keepalived` - VRRP failover (pulled automatically) +- `owsync` - Bidirectional config file synchronization (optional, detected at runtime) +- `lease-sync` - Real-time DHCP lease replication via dnsmasq ubus (optional, detected at runtime) + +Optional: `luci-app-ha-cluster` for web interface. + +**Note:** DHCP lease sync requires the dnsmasq ubus lease methods patch +(`300-ubus-add-lease-methods.patch` in `package/network/services/dnsmasq/patches/`). + +## How It Works + +ha-cluster reads `/etc/config/ha-cluster` and generates flat config files +for each service under `/tmp/ha-cluster/`: + +``` +/etc/config/ha-cluster → /tmp/ha-cluster/keepalived.conf + → /tmp/ha-cluster/owsync.conf + → /tmp/ha-cluster/lease-sync.conf +``` + +All three daemons are started as procd instances by ha-cluster. Do **not** +use standalone init scripts (`/etc/init.d/keepalived`, `/etc/init.d/owsync`, +`/etc/init.d/lease-sync`) while ha-cluster is enabled — they generate their +own configs and would conflict. + +Any `uci commit ha-cluster` automatically triggers a service reload. + +## Quick Start + +```sh +# Generate an encryption key +KEY=$(hexdump -n 32 -v -e '1/1 "%02x"' /dev/urandom) + +# Minimal configuration +uci set ha-cluster.config.enabled='1' +uci set ha-cluster.config.node_priority='100' +uci set ha-cluster.config.encryption_key="$KEY" + +# Add a peer +uci add ha-cluster peer +uci set ha-cluster.@peer[-1].name='router2' +uci set ha-cluster.@peer[-1].address='192.168.1.2' + +# Create a VRRP instance (all VIPs in same instance fail over together) +uci set ha-cluster.main=vrrp_instance +uci set ha-cluster.main.vrid='51' +uci set ha-cluster.main.interface='lan' +uci set ha-cluster.main.priority='100' +uci set ha-cluster.main.nopreempt='1' + +# Configure a VIP +uci set ha-cluster.lan=vip +uci set ha-cluster.lan.enabled='1' +uci set ha-cluster.lan.vrrp_instance='main' +uci set ha-cluster.lan.interface='br-lan' +uci set ha-cluster.lan.address='192.168.1.254' +uci set ha-cluster.lan.netmask='255.255.255.0' + +# Apply +uci commit ha-cluster +``` + +Repeat on each peer node with the appropriate priority and peer addresses. + +## DHCP Prerequisites + +When using lease sync (`sync_leases='1'`), each VIP interface must have +`force=1` in its DHCP configuration: + +```sh +uci set dhcp.lan.force='1' +uci commit dhcp +``` + +**Why?** Without `force=1`, dnsmasq detects the peer's DHCP server on the +same network and disables its own DHCP service on that interface. This +prevents the ubus `add_lease` method from working — lease-sync cannot +inject leases into a node whose DHCP subsystem is not initialized. DNS +resolution for local hostnames would fail on the BACKUP node. + +ha-cluster validates this at startup and refuses to start if `force=1` is +missing on any VIP interface with lease sync enabled. + +Only set `force=1` on interfaces where you need HA DHCP. Other interfaces +(management networks, etc.) retain normal dhcp_check protection. + +## UCI Configuration + +All configuration lives in `/etc/config/ha-cluster`. + +### Global settings (`config global 'config'`) + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `enabled` | bool | `0` | Enable/disable ha-cluster | +| `node_priority` | int | `100` | VRRP priority (1-255, higher wins MASTER) | +| `vrrp_transport` | string | `multicast` | VRRP transport: `multicast` or `unicast`. When `unicast`, auto-derives addresses from peer config | +| `sync_method` | string | `owsync` | Sync backend: `owsync` or `none` | +| `sync_encryption` | bool | `1` | Encrypt owsync traffic (AES-256-GCM) | +| `encryption_key` | string | | 256-bit hex key (use LuCI "Generate" button or `hexdump -n 32 -v -e '1/1 "%02x"' /dev/urandom`) | +| `sync_port` | int | `4321` | owsync TCP port | +| `sync_dir` | string | `/etc/config` | Directory to synchronize | +| `bind_address` | string | | Local IP for sync traffic (use real IP, not VIP) | + +### VRRP Instances (`config vrrp_instance ''`) + +Each section defines a VRRP instance. All VIPs referencing the same instance +fail over atomically as a group (one advertisement, one failover event). + +When any VIP in the group has `address6` set, a second VRRP instance is +created automatically using VRID+128 for all IPv6 VIPs. + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `vrid` | int | | VRRP router ID (1-127, 128+ reserved for IPv6) | +| `interface` | string | | Primary interface for VRRP advertisements | +| `priority` | int | | Override global `node_priority` for this instance | +| `nopreempt` | bool | `1` | Don't reclaim MASTER on recovery | +| `preempt_delay` | int | | Delay before preempting (seconds) | +| `garp_master_delay` | int | | Gratuitous ARP delay after becoming MASTER | +| `advert_int` | int | `1` | VRRP advertisement interval (seconds) | +| `track_interface` | list | | Interfaces to track for failover | +| `track_script` | list | | Health check script names | +| `auth_type` | string | `none` | VRRP auth: `none`, `pass`, or `ah` | +| `auth_pass` | string | | VRRP auth password | +| `unicast_src_ip` | string | | Source IP for unicast VRRP (overrides auto-derivation) | +| `unicast_peer` | list | | Unicast peer IPs (overrides auto-derivation) | + +### Virtual IPs (`config vip ''`) + +Each VIP references a `vrrp_instance` section. Multiple VIPs can share the +same instance for atomic failover. + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `enabled` | bool | `1` | Enable this VIP | +| `vrrp_instance` | string | | Name of `vrrp_instance` section | +| `interface` | string | | Network interface for this VIP (e.g. `br-lan`) | +| `address` | string | | Virtual IPv4 address | +| `netmask` | string | `255.255.255.0` | IPv4 netmask | +| `address6` | string | | Virtual IPv6 address (optional, uses VRID+128) | +| `prefix6` | int | `64` | IPv6 prefix length | + +### Peers (`config peer`) + +| Option | Type | Description | +|--------|------|-------------| +| `name` | string | Peer identifier | +| `address` | string | Peer IP address | +| `source_address` | string | Local IP to use when contacting this peer (also used as `unicast_src_ip` for auto-derivation) | +| `sync_enabled` | bool | `1` | Enable owsync/lease-sync for this peer. Set to `0` for non-OpenWrt peers (VRRP-only) | + +### Services (`config service ''`) + +Each service section defines a sync group for owsync. + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `enabled` | bool | `0` | Enable sync for this group | +| `config_files` | list | | UCI config names or paths to sync | +| `sync_leases` | bool | `0` | Enable lease-sync daemon (dhcp service only) | + +### Exclusions (`config exclude`) + +| Option | Type | Description | +|--------|------|-------------| +| `file` | list | UCI config names to never sync | + +Default exclusions: `network`, `system`, `owsync`, `ha-cluster`, `wireless`. + +### Health check scripts (`config script ''`) + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `script` | string | | Command to run | +| `interval` | int | `5` | Check interval (seconds) | +| `timeout` | int | | Script timeout (seconds, keepalived default applies) | +| `weight` | int | | Priority adjustment on failure (keepalived default applies) | +| `rise` | int | | Successes before marking UP (keepalived default applies) | +| `fall` | int | | Failures before marking DOWN (keepalived default applies) | +| `user` | string | | User to run script as | + +### Advanced settings (`config advanced`) + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `log_level` | int | `2` | 0=ERROR, 1=WARN, 2=INFO, 3=DEBUG | +| `owsync_log_level` | int | `2` | owsync log level | +| `sync_interval` | int | `30` | owsync poll interval (seconds) | +| `lease_sync_port` | int | `5378` | lease-sync UDP port | +| `lease_sync_interval` | int | `30` | lease-sync periodic sync (seconds) | +| `lease_sync_peer_timeout` | int | `120` | Peer timeout (seconds) | +| `lease_sync_persist_interval` | int | `60` | Persist interval (seconds) | +| `lease_sync_log_level` | int | `2` | lease-sync log level | +| `max_auto_priority` | int | `0` | Auto-priority cap (0 = disabled) | +| `enable_notifications` | bool | `0` | Email notifications | +| `notification_email` | list | | Notification recipients | +| `notification_email_from` | string | | Sender address for notifications | +| `smtp_server` | string | | SMTP server address | + +## State Change Hooks + +keepalived state transitions trigger the OpenWrt hotplug system. +Custom scripts can be placed in `/etc/hotplug.d/keepalived/` with a +numeric prefix above 50 (e.g. `60-vpn-failover`). + +Available environment variables: +- `ACTION` — `MASTER`, `BACKUP`, `FAULT`, or `STOP` +- `TYPE` — `INSTANCE`, `GROUP`, etc. +- `NAME` — instance name (e.g. `main`) + +## Files + +``` +/etc/config/ha-cluster UCI configuration +/etc/init.d/ha-cluster procd init script (START=19, STOP=91) +/usr/lib/ha-cluster/ha-cluster.sh Core library +/tmp/ha-cluster/ Generated configs (runtime) +``` + +## License + +MIT. See LICENSE file. + +ha-cluster has been developed using Claude Code from Anthropic. + +## Maintainer + +Pierre Gaufillet diff --git a/net/ha-cluster/files/ha-cluster.config b/net/ha-cluster/files/ha-cluster.config new file mode 100644 index 0000000000000..a174b019c1576 --- /dev/null +++ b/net/ha-cluster/files/ha-cluster.config @@ -0,0 +1,130 @@ +# /etc/config/ha-cluster +# +# High Availability Cluster Configuration +# This package orchestrates keepalived, owsync, and lease-sync + +config global 'config' + option enabled '0' + + # VRRP priority (1-255, higher becomes MASTER) + option node_priority '100' + + # VRRP transport: multicast (default, requires L2 adjacency) or unicast + # Unicast auto-derives addresses from peer configuration (source_address/address) + # Required for networks without multicast support (e.g., WireGuard tunnels) + option vrrp_transport 'multicast' + + # Sync method + option sync_method 'owsync' # owsync | none + option sync_encryption '1' # Enable encryption for owsync + option encryption_key '' # Encryption key for owsync (generate with: openssl rand -hex 32) + + # Bind address for sync traffic (owsync and lease-sync) + # Set to this node's real IP (not VIP) to ensure sync traffic uses correct source address + # Leave empty to bind to all interfaces (may use VIP as source in some configurations) + #option bind_address '' # e.g., '192.168.1.1' or 'fdeb:8804:e54c::1' + +# VRRP instance configuration +# All VIPs referencing the same instance fail over atomically as a group. +# VRID must be 1-127 (128-255 reserved for auto-generated IPv6 instances). +config vrrp_instance 'main' + option vrid '64' # VRRP router ID (1-127), matches DJB2("main") in LuCI + option interface 'lan' # Primary interface for VRRP adverts + option priority '100' # Override global priority if needed + option nopreempt '1' # Don't reclaim MASTER on recovery (safer default) + option advert_int '1' # Advertisement interval (seconds) + #option preempt_delay '0' # Delay before preempting (seconds) + #option garp_master_delay '' # Gratuitous ARP delay after becoming MASTER + # VRRP authentication + #option auth_type 'none' # none | pass | ah + #option auth_pass '' # Password for VRRP auth + # Unicast VRRP (for multicast-restricted networks) + #option unicast_src_ip '' # Source IP for unicast VRRP + #list unicast_peer '10.0.0.2' + # Tracking + #list track_interface 'wan' # Track interfaces for failover + #list track_script 'check_gateway' + +# Virtual IP configuration +# Each VIP references a vrrp_instance. Multiple VIPs in the same instance +# share a single VRRP advertisement and fail over together. +config vip + option enabled '1' + option vrrp_instance 'main' # Reference to vrrp_instance section + option interface 'lan' # Logical interface name (resolved to device at runtime) + option address '192.168.1.254' + option netmask '255.255.255.0' + #option address6 '' # IPv6 virtual IP (optional) + #option prefix6 '64' # IPv6 prefix length (default: 64) + +# Note: WAN VIP can be added manually for advanced scenarios where both routers +# share the same upstream network + +# Peer nodes (examples - uncomment and configure for your setup) +# source_address specifies the local IP to use when contacting this peer +# This prevents sync traffic from using the VIP as source address +# Leave empty to let the kernel choose (may select VIP in some configurations) +#config peer +# option name 'router2' +# option address '192.168.1.2' +# #option source_address '192.168.1.1' # Use node's real IP, not VIP +# #option sync_enabled '1' # Set to 0 for non-OpenWrt peers (VRRP only, no sync) + +#config peer +# option name 'router3' +# option address '192.168.1.3' +# #option source_address '192.168.1.1' +# #option sync_enabled '1' + +# Service synchronization (owsync sync groups) +# Each service section defines a sync group with: +# - enabled: whether to sync this group +# - config_files: list of UCI config names or paths to sync +# - sync_leases: (dhcp only) enable real-time lease-sync daemon + +config service 'dhcp' + option enabled '1' # Sync DHCP configuration + option sync_leases '1' # Enable real-time lease-sync daemon + list config_files 'dhcp' + +config service 'firewall' + option enabled '0' # Sync firewall rules (disabled by default - review rules before enabling) + list config_files 'firewall' + +config service 'wireless' + option enabled '0' # Sync wireless (disabled by default - hardware specific) + list config_files 'wireless' + +# Advanced settings +config advanced + option log_level '2' # ha-cluster log level: 0=ERROR, 1=WARN, 2=INFO, 3=DEBUG + option owsync_log_level '2' # owsync log level: 0=ERROR, 1=WARN, 2=INFO, 3=DEBUG + option sync_interval '30' # owsync periodic sync interval + option lease_sync_port '5378' # lease-sync UDP port + option lease_sync_interval '30' # lease-sync periodic sync interval + option lease_sync_peer_timeout '120' + option lease_sync_persist_interval '60' + option lease_sync_log_level '2' # 0=ERROR, 1=WARN, 2=INFO, 3=DEBUG + option max_auto_priority '0' # Keepalived auto-priority cap (0 disables auto) + # Keepalived notifications (optional) + #list notification_email 'root@localhost' + #option notification_email_from 'ha-cluster@router' + #option smtp_server '127.0.0.1' + option enable_notifications '0' # Email/script notifications + +# Files to exclude from sync (important!) +config exclude + list file 'network' # Each node has unique IPs + list file 'system' # Each node has unique hostname + list file 'owsync' # Don't sync the sync config + list file 'ha-cluster' # Don't sync ha-cluster config + list file 'wireless' # Often hardware-specific (unless explicitly enabled) + +# VRRP health check scripts (advanced) +#config script 'check_gateway' +# option script '/bin/ping -c 1 -W 1 8.8.8.8' +# option interval '5' +# option timeout '2' +# option weight '-10' +# option rise '2' +# option fall '2' diff --git a/net/ha-cluster/files/ha-cluster.init b/net/ha-cluster/files/ha-cluster.init new file mode 100644 index 0000000000000..4b3f79957a0d9 --- /dev/null +++ b/net/ha-cluster/files/ha-cluster.init @@ -0,0 +1,136 @@ +#!/bin/sh /etc/rc.common +# SPDX-License-Identifier: MIT +# Copyright (c) 2025-2026 Pierre Gaufillet +# /etc/init.d/ha-cluster +# High Availability Cluster Management + +START=25 # After network (20), before standalone owsync (99) and lease-sync (90) +STOP=91 + +USE_PROCD=1 + +. /usr/lib/ha-cluster/ha-cluster.sh + +start_service() { + local enabled + + config_load ha-cluster + config_get_bool enabled config enabled 0 + + # Initialize log level from UCI + ha_log_init + + [ "$enabled" -eq 0 ] && { + ha_log_debug "HA cluster disabled" + return 0 + } + + ha_log_info "Starting HA cluster" + + # Ensure runtime directories exist (must be before ha_apply_config) + mkdir -p /etc/owsync + mkdir -p "$HA_CLUSTER_RUN_DIR" + + # Generate configs and start all services + ha_apply_config || { + ha_log_error "Failed to apply configuration" + return 1 + } + + # Start keepalived daemon + ha_log_info "Starting keepalived daemon" + procd_open_instance keepalived + procd_set_param command /usr/sbin/keepalived + procd_append_param command -n # don't daemonize + procd_append_param command -f "$KEEPALIVED_CONF" + procd_set_param respawn + procd_set_param stdout 1 + procd_set_param stderr 1 + procd_close_instance + + # Start owsync daemon (if config file was generated) + if [ -f "$OWSYNC_CONF" ]; then + ha_log_info "Starting owsync daemon" + procd_open_instance owsync + procd_set_param command /usr/bin/owsync daemon + procd_append_param command -c "$OWSYNC_CONF" + procd_set_param file "$OWSYNC_CONF" + procd_set_param respawn + procd_set_param stdout 1 + procd_set_param stderr 1 + procd_close_instance + fi + + # Start lease-sync daemon (if config exists) + if [ -f "$LEASE_SYNC_CONF" ]; then + ha_log_info "Starting lease-sync daemon" + procd_open_instance lease-sync + procd_set_param command /usr/sbin/lease-sync + procd_append_param command -c "$LEASE_SYNC_CONF" + procd_set_param file "$LEASE_SYNC_CONF" + procd_set_param respawn + procd_set_param stdout 1 + procd_set_param stderr 1 + procd_close_instance + fi + + return 0 +} + +stop_service() { + ha_log_info "Stopping HA cluster" + + # Remove dnsmasq HA overlay and restart dnsmasq + ha_release_dnsmasq + + # Release service management back to standalone init scripts + ha_manage_services "release" + + # procd handles stopping all instances + return 0 +} + +reload_service() { + local enabled + + config_load ha-cluster + config_get_bool enabled config enabled 0 + + # Initialize log level from UCI + ha_log_init + + ha_log_info "Reloading HA cluster configuration" + + # Not running → start or nothing + if ! procd_running ha-cluster; then + if [ "$enabled" -eq 1 ]; then + ha_log_info "Service not running, performing full start" + start + fi + return 0 + fi + + # Running + disabled → stop + if [ "$enabled" -eq 0 ]; then + ha_log_info "HA cluster disabled, stopping" + stop + return 0 + fi + + # Running + enabled → reconcile all instances via procd + # start_service() regenerates configs and re-defines procd instances. + # procd compares new vs running instances: + # - keepalived: command line unchanged → procd does nothing + # - owsync/lease-sync: config file tracked → procd restarts on change + # - owsync/lease-sync: instance appeared/disappeared → procd starts/stops + start + + # SIGHUP keepalived to reload its config file without dropping VIPs. + # procd doesn't restart keepalived (command line unchanged), so we + # signal it to re-read keepalived.conf in-place (no failover). + procd_send_signal ha-cluster keepalived SIGHUP +} + +service_triggers() { + procd_add_reload_trigger "ha-cluster" +} diff --git a/net/ha-cluster/files/ha-cluster.sh b/net/ha-cluster/files/ha-cluster.sh new file mode 100644 index 0000000000000..6c429ffad462b --- /dev/null +++ b/net/ha-cluster/files/ha-cluster.sh @@ -0,0 +1,1013 @@ +#!/bin/sh +# SPDX-License-Identifier: MIT +# Copyright (c) 2025-2026 Pierre Gaufillet +# /usr/lib/ha-cluster/ha-cluster.sh +# Library functions for HA cluster management + +. /lib/functions.sh +. /lib/config/uci.sh + +HA_CLUSTER_CONFIG="/etc/config/ha-cluster" +# Generated configs are placed in a dedicated directory to avoid conflicts +# with standalone service init scripts (which use /tmp/*.conf) +HA_CLUSTER_RUN_DIR="/tmp/ha-cluster" +KEEPALIVED_CONF="${HA_CLUSTER_RUN_DIR}/keepalived.conf" +OWSYNC_CONF="${HA_CLUSTER_RUN_DIR}/owsync.conf" +LEASE_SYNC_CONF="${HA_CLUSTER_RUN_DIR}/lease-sync.conf" + +# Log level constants (matches syslog priorities) +HA_LOG_LEVEL_ERROR=0 +HA_LOG_LEVEL_WARNING=1 +HA_LOG_LEVEL_INFO=2 +HA_LOG_LEVEL_DEBUG=3 + +# Current log level (default: INFO) +HA_LOG_LEVEL="${HA_LOG_LEVEL:-2}" + +# Initialize log level from UCI (call once at start) +ha_log_init() { + config_load ha-cluster + config_get HA_LOG_LEVEL advanced log_level "2" +} + +# Log error (always logged) +ha_log_error() { + logger -t ha-cluster -p daemon.err "$@" +} + +# Log warning (level >= 1) +ha_log_warning() { + [ "$HA_LOG_LEVEL" -ge "$HA_LOG_LEVEL_WARNING" ] && \ + logger -t ha-cluster -p daemon.warning "$@" +} + +# Log info (level >= 2) +ha_log_info() { + [ "$HA_LOG_LEVEL" -ge "$HA_LOG_LEVEL_INFO" ] && \ + logger -t ha-cluster -p daemon.info "$@" +} + +# Log debug (level >= 3) +ha_log_debug() { + [ "$HA_LOG_LEVEL" -ge "$HA_LOG_LEVEL_DEBUG" ] && \ + logger -t ha-cluster -p daemon.debug "$@" +} + +# Convenience wrapper (maps to info level) +ha_log() { + ha_log_info "$@" +} + +# ============================================ +# Helper Functions to Reduce Code Duplication +# ============================================ + +# Generic list collector for config_list_foreach +# Usage: _ha_list_result=""; config_list_foreach section option _ha_list_collect; myvar="$_ha_list_result" +_ha_list_result="" +_ha_list_collect() { + _ha_list_result="$_ha_list_result $1" +} + +# Conditional append to config file +# Usage: ha_conf_append "$file" "key" "$value" ["indent"] +# Only appends if value is non-empty +ha_conf_append() { + local file="$1" + local key="$2" + local value="$3" + local indent="${4:- }" + [ -n "$value" ] && echo "${indent}${key} ${value}" >> "$file" +} + +# Convert netmask to CIDR prefix length +# Usage: netmask_to_cidr "255.255.255.0" -> "24" +netmask_to_cidr() { + local netmask="$1" + local cidr=0 + + # Convert each octet + for octet in $(echo "$netmask" | tr '.' ' '); do + case "$octet" in + 255) cidr=$((cidr + 8)) ;; + 254) cidr=$((cidr + 7)) ;; + 252) cidr=$((cidr + 6)) ;; + 248) cidr=$((cidr + 5)) ;; + 240) cidr=$((cidr + 4)) ;; + 224) cidr=$((cidr + 3)) ;; + 192) cidr=$((cidr + 2)) ;; + 128) cidr=$((cidr + 1)) ;; + 0) ;; + *) ha_log_error "Invalid netmask octet '$octet' in '$netmask'"; return 1 ;; + esac + done + + echo "$cidr" +} + +# Validate IPv6 address format (basic check) +# Defense-in-depth: LuCI validates on input, keepalived validates on load +# Usage: is_valid_ipv6 "fd00::1" -> returns 0 (valid) or 1 (invalid) +is_valid_ipv6() { + local addr="$1" + # Must contain at least one colon + case "$addr" in + *:*) ;; + *) return 1 ;; + esac + # Length check: 2 (::) to 39 (full form) + local len=${#addr} + [ "$len" -lt 2 ] || [ "$len" -gt 39 ] && return 1 + # Only hex digits and colons allowed + case "$addr" in + *[!0-9a-fA-F:]*) return 1 ;; + esac + return 0 +} + +# Get global config value +ha_get_config() { + local var="$1" + local default="$2" + config_load ha-cluster + config_get value config "$var" "$default" + echo "$value" +} + +# Collect peer addresses for unicast auto-derivation +# Sets: _ha_peer_addresses (all peer address values) +# _ha_peer_source_address (first non-empty source_address found) +_ha_peer_addresses="" +_ha_peer_source_address="" +_ha_collect_peer_address() { + local section="$1" + local address source_address + + config_get address "$section" address "" + config_get source_address "$section" source_address "" + + [ -z "$address" ] && return 0 + + _ha_peer_addresses="$_ha_peer_addresses $address" + if [ -z "$_ha_peer_source_address" ] && [ -n "$source_address" ]; then + _ha_peer_source_address="$source_address" + fi +} + +# Generate keepalived configuration +ha_generate_keepalived_conf() { + local node_name node_priority enable_notifications notification_email_from smtp_server + local max_auto_priority + local notification_emails="" + + config_load ha-cluster + node_name="$(cat /proc/sys/kernel/hostname)" + config_get node_priority config node_priority "100" + config_get _ha_vrrp_transport config vrrp_transport "multicast" + config_get max_auto_priority advanced max_auto_priority "0" + config_get_bool enable_notifications advanced enable_notifications 0 + config_get notification_email_from advanced notification_email_from "" + config_get smtp_server advanced smtp_server "" + + # Collect peer addresses for unicast auto-derivation + _ha_peer_addresses="" + _ha_peer_source_address="" + config_foreach _ha_collect_peer_address peer + + _ha_list_result="" + config_list_foreach advanced notification_email _ha_list_collect + notification_emails="$_ha_list_result" + + ha_log "Generating keepalived configuration for node: $node_name" + + cat > "$KEEPALIVED_CONF" <> "$KEEPALIVED_CONF" <> "$KEEPALIVED_CONF" + done + cat >> "$KEEPALIVED_CONF" <> "$KEEPALIVED_CONF" + echo "" >> "$KEEPALIVED_CONF" + + # Generate VRRP scripts (health checks) + config_foreach ha_generate_vrrp_script script + + # Generate VRRP instances (grouped by vrrp_instance section) + config_foreach ha_generate_vrrp_group vrrp_instance + + ha_log "Keepalived configuration generated at $KEEPALIVED_CONF" + return 0 +} + +# Generate a VRRP script block +ha_generate_vrrp_script() { + local section="$1" + local script interval timeout weight rise fall user + + config_get script "$section" script + [ -z "$script" ] && return 0 + + config_get interval "$section" interval "5" + config_get timeout "$section" timeout "" + config_get weight "$section" weight "" + config_get rise "$section" rise "" + config_get fall "$section" fall "" + config_get user "$section" user "" + + cat >> "$KEEPALIVED_CONF" <> "$KEEPALIVED_CONF" + echo "" >> "$KEEPALIVED_CONF" +} + +# Write common VRRP instance options (shared between IPv4 and IPv6 instances) +# Uses parent-scope variables: nopreempt, preempt_delay, garp_master_delay, +# track_ifaces, track_scripts, auth_type, auth_pass, unicast_peers, unicast_src_ip +_ha_write_vrrp_instance_options() { + local label="$1" + + if [ "$nopreempt" -eq 1 ]; then + echo " nopreempt" >> "$KEEPALIVED_CONF" + elif [ -n "$preempt_delay" ]; then + echo " preempt_delay $preempt_delay" >> "$KEEPALIVED_CONF" + fi + + ha_conf_append "$KEEPALIVED_CONF" "garp_master_delay" "$garp_master_delay" + + # Track interfaces + if [ -n "$track_ifaces" ]; then + echo " track_interface {" >> "$KEEPALIVED_CONF" + for iface in $track_ifaces; do + local resolved_iface + if network_get_device resolved_iface "$iface" 2>/dev/null; then + ha_log_debug "VIP $label: resolved track_interface '$iface' to device '$resolved_iface'" + echo " $resolved_iface" >> "$KEEPALIVED_CONF" + else + ha_log_debug "VIP $label: using track_interface '$iface' as-is" + echo " $iface" >> "$KEEPALIVED_CONF" + fi + done + echo " }" >> "$KEEPALIVED_CONF" + fi + + # Track scripts + if [ -n "$track_scripts" ]; then + echo " track_script {" >> "$KEEPALIVED_CONF" + for script_name in $track_scripts; do + echo " $script_name" >> "$KEEPALIVED_CONF" + done + echo " }" >> "$KEEPALIVED_CONF" + fi + + # Authentication + if [ "$auth_type" != "none" ] && [ -n "$auth_pass" ]; then + cat >> "$KEEPALIVED_CONF" <> "$KEEPALIVED_CONF" + for peer in $unicast_peers; do + echo " $peer" >> "$KEEPALIVED_CONF" + done + echo " }" >> "$KEEPALIVED_CONF" + fi +} + +# Callback to collect VIPs belonging to a specific vrrp_instance +# Sets: _ha_vip_v4_addrs, _ha_vip_v6_addrs (space-separated "addr dev iface" entries) +_ha_collect_vip_for_instance() { + local vip_section="$1" + local vip_enabled vip_instance vip_interface vip_interface_logical + local vip_address vip_netmask vip_address6 vip_prefix6 + + config_get_bool vip_enabled "$vip_section" enabled 0 + [ "$vip_enabled" -eq 0 ] && return 0 + + config_get vip_instance "$vip_section" vrrp_instance "" + [ "$vip_instance" != "$_ha_current_instance" ] && return 0 + + config_get vip_interface_logical "$vip_section" interface "" + config_get vip_address "$vip_section" address "" + config_get vip_netmask "$vip_section" netmask "255.255.255.0" + config_get vip_address6 "$vip_section" address6 "" + config_get vip_prefix6 "$vip_section" prefix6 "64" + + [ -z "$vip_interface_logical" ] && { ha_log_warning "VIP $vip_section has no interface"; return 1; } + [ -z "$vip_address" ] && [ -z "$vip_address6" ] && { ha_log_warning "VIP $vip_section has no address (IPv4 or IPv6)"; return 1; } + + # Resolve interface name + . /lib/functions/network.sh + local vip_iface_resolved + if network_get_device vip_iface_resolved "$vip_interface_logical" 2>/dev/null; then + ha_log_debug "VIP $vip_section: resolved interface '$vip_interface_logical' to device '$vip_iface_resolved'" + else + vip_iface_resolved="$vip_interface_logical" + ha_log_debug "VIP $vip_section: using interface '$vip_iface_resolved' as-is" + fi + + # Collect IPv4 address + if [ -n "$vip_address" ]; then + local cidr + cidr=$(netmask_to_cidr "$vip_netmask") || { + ha_log_error "VIP $vip_section: invalid netmask '$vip_netmask'" + return 1 + } + _ha_vip_v4_addrs="${_ha_vip_v4_addrs} ${vip_address}/${cidr} dev ${vip_iface_resolved} +" + fi + + # Collect IPv6 address + if [ -n "$vip_address6" ]; then + is_valid_ipv6 "$vip_address6" || { ha_log_error "VIP $vip_section: invalid IPv6 address (got: $vip_address6)"; return 1; } + case "$vip_prefix6" in + ''|*[!0-9]*) ha_log_error "VIP $vip_section: prefix6 must be a number (got: $vip_prefix6)"; return 1 ;; + esac + [ "$vip_prefix6" -lt 1 ] || [ "$vip_prefix6" -gt 128 ] && { ha_log_error "VIP $vip_section: prefix6 must be 1-128 (got: $vip_prefix6)"; return 1; } + _ha_vip_v6_addrs="${_ha_vip_v6_addrs} ${vip_address6}/${vip_prefix6} dev ${vip_iface_resolved} +" + fi +} + +# Generate a VRRP group from a vrrp_instance section +# Collects all enabled VIPs referencing this instance and generates +# one keepalived vrrp_instance with all IPv4 addresses, plus a second +# vrrp_instance (VRID+128) if any VIP has IPv6. +ha_generate_vrrp_group() { + local section="$1" + local interface interface_logical vrid priority nopreempt track_interface + local advert_int preempt_delay garp_master_delay auth_type auth_pass unicast_src_ip + local track_ifaces="" track_scripts="" unicast_peers="" + + # Get instance-level options + config_get interface_logical "$section" interface + config_get vrid "$section" vrid + config_get priority "$section" priority "$(ha_get_config node_priority 100)" + config_get_bool nopreempt "$section" nopreempt 1 + config_get track_interface "$section" track_interface + config_get advert_int "$section" advert_int "1" + config_get preempt_delay "$section" preempt_delay "" + config_get garp_master_delay "$section" garp_master_delay "" + config_get auth_type "$section" auth_type "none" + config_get auth_pass "$section" auth_pass "" + config_get unicast_src_ip "$section" unicast_src_ip "" + + [ -z "$interface_logical" ] && { ha_log_warning "vrrp_instance $section has no interface"; return 1; } + [ -z "$vrid" ] && { ha_log_warning "vrrp_instance $section has no VRID"; return 1; } + + # Resolve primary interface + . /lib/functions/network.sh + if network_get_device interface "$interface_logical" 2>/dev/null; then + ha_log_debug "vrrp_instance $section: resolved interface '$interface_logical' to device '$interface'" + else + interface="$interface_logical" + ha_log_debug "vrrp_instance $section: using interface '$interface' as-is" + fi + + # Validate VRID (1-127, 128+ reserved for IPv6) + case "$vrid" in + ''|*[!0-9]*) ha_log_error "vrrp_instance $section: VRID must be a number (got: $vrid)"; return 1 ;; + esac + [ "$vrid" -lt 1 ] || [ "$vrid" -gt 127 ] && { ha_log_error "vrrp_instance $section: VRID must be 1-127 (got: $vrid)"; return 1; } + + case "$priority" in + ''|*[!0-9]*) ha_log_error "vrrp_instance $section: priority must be a number (got: $priority)"; return 1 ;; + esac + [ "$priority" -lt 1 ] || [ "$priority" -gt 255 ] && { ha_log_error "vrrp_instance $section: priority must be 1-255 (got: $priority)"; return 1; } + + case "$advert_int" in + ''|*[!0-9]*) ha_log_error "vrrp_instance $section: advert_int must be a number (got: $advert_int)"; return 1 ;; + esac + + # Collect track interfaces (list or single) + _ha_list_result="" + config_list_foreach "$section" track_interface _ha_list_collect + track_ifaces="$_ha_list_result" + if [ -z "$track_ifaces" ] && [ -n "$track_interface" ]; then + track_ifaces="$track_interface" + fi + + # Collect track scripts + _ha_list_result="" + config_list_foreach "$section" track_script _ha_list_collect + track_scripts="$_ha_list_result" + + # Normalize auth_type + case "$auth_type" in + pass|PASS) auth_type="PASS" ;; + ah|AH) auth_type="AH" ;; + esac + + # Collect unicast peers (per-instance explicit config) + _ha_list_result="" + config_list_foreach "$section" unicast_peer _ha_list_collect + unicast_peers="$_ha_list_result" + + # Unicast auto-derivation: when vrrp_transport=unicast and no per-instance override, + # derive unicast_src_ip from peer source_address and unicast_peer from peer addresses + if [ "$_ha_vrrp_transport" = "unicast" ] && [ -z "$unicast_peers" ]; then + unicast_peers="$_ha_peer_addresses" + if [ -z "$unicast_src_ip" ] && [ -n "$_ha_peer_source_address" ]; then + unicast_src_ip="$_ha_peer_source_address" + fi + ha_log_debug "vrrp_instance $section: unicast auto-derived from peer config" + fi + + if [ -n "$unicast_peers" ] && [ -z "$unicast_src_ip" ]; then + ha_log_warning "vrrp_instance $section has unicast_peer but no unicast_src_ip" + fi + + # Collect all VIPs belonging to this instance + _ha_current_instance="$section" + _ha_vip_v4_addrs="" + _ha_vip_v6_addrs="" + config_foreach _ha_collect_vip_for_instance vip + + # Must have at least one VIP + [ -z "$_ha_vip_v4_addrs" ] && [ -z "$_ha_vip_v6_addrs" ] && { + ha_log_warning "vrrp_instance $section has no enabled VIPs" + return 0 + } + + # Write IPv4 VRRP instance (if any IPv4 VIPs) + if [ -n "$_ha_vip_v4_addrs" ]; then + cat >> "$KEEPALIVED_CONF" <> "$KEEPALIVED_CONF" + printf '%s' "$_ha_vip_v4_addrs" >> "$KEEPALIVED_CONF" + cat >> "$KEEPALIVED_CONF" <> "$KEEPALIVED_CONF" <> "$KEEPALIVED_CONF" + printf '%s' "$_ha_vip_v6_addrs" >> "$KEEPALIVED_CONF" + cat >> "$KEEPALIVED_CONF" < "$OWSYNC_CONF" <> "$OWSYNC_CONF" + echo "encryption_key=${encryption_key}" >> "$OWSYNC_CONF" + else + ha_log_warning "No encryption_key set - owsync will run without encryption" + ha_log_warning "For production: Generate a key with 'owsync genkey' and set it in both nodes" + ha_log_warning "Add to /etc/config/ha-cluster: option encryption_key ''" + echo "# Security: Plain mode (use only over secure VPN)" >> "$OWSYNC_CONF" + echo "plain_mode=1" >> "$OWSYNC_CONF" + fi + echo "" >> "$OWSYNC_CONF" + + # Add peers + echo "# Peers" >> "$OWSYNC_CONF" + config_foreach ha_add_owsync_peer_conf peer + echo "" >> "$OWSYNC_CONF" + + # Add includes (files to sync based on enabled services) + echo "# Include patterns (sync these files)" >> "$OWSYNC_CONF" + config_foreach ha_add_owsync_includes_conf service + + # Add excludes + echo "" >> "$OWSYNC_CONF" + echo "# Exclude patterns (never sync these)" >> "$OWSYNC_CONF" + config_foreach ha_add_owsync_excludes_conf exclude + + ha_log "owsync configuration generated at $OWSYNC_CONF" + return 0 +} + +# Add peer to owsync config (uses global port from config) +# Supports per-peer source_address for source address selection +# Skips peers with sync_enabled=0 (non-OpenWrt peers) +ha_add_owsync_peer_conf() { + local section="$1" + local address source_address sync_enabled + + config_get_bool sync_enabled "$section" sync_enabled 1 + [ "$sync_enabled" -eq 0 ] && return 0 + + config_get address "$section" address + config_get source_address "$section" source_address "" + + [ -z "$address" ] && return 0 + + if [ -n "$source_address" ]; then + echo "peer=${address},${source_address}" >> "$OWSYNC_CONF" + ha_log_debug "owsync peer: $address (source: $source_address)" + else + echo "peer=${address}" >> "$OWSYNC_CONF" + ha_log_debug "owsync peer: $address (no source specified)" + fi +} + +# Add includes from enabled services +ha_add_owsync_includes_conf() { + local section="$1" + local enabled + + config_get_bool enabled "$section" enabled 0 + [ "$enabled" -eq 0 ] && return 0 + + config_list_foreach "$section" config_files ha_add_owsync_include_conf +} + +ha_add_owsync_include_conf() { + echo "include=$1" >> "$OWSYNC_CONF" +} + +# Add excludes +ha_add_owsync_excludes_conf() { + local section="$1" + config_list_foreach "$section" file ha_add_owsync_exclude_conf +} + +ha_add_owsync_exclude_conf() { + echo "exclude=$1" >> "$OWSYNC_CONF" +} + + +# Generate lease-sync configuration (flat file for managed mode) +ha_generate_lease_sync_conf() { + local dhcp_service_enabled lease_sync_enabled lease_sync_port encryption_key + local sync_interval peer_timeout persist_interval log_level node_name bind_address + + config_load ha-cluster + + # Check if DHCP service has lease sync enabled + config_get_bool dhcp_service_enabled dhcp enabled 0 + config_get_bool lease_sync_enabled dhcp sync_leases 0 + config_get lease_sync_port advanced lease_sync_port "5378" + config_get sync_interval advanced lease_sync_interval "30" + config_get peer_timeout advanced lease_sync_peer_timeout "120" + config_get persist_interval advanced lease_sync_persist_interval "60" + config_get log_level advanced lease_sync_log_level "2" + config_get encryption_key config encryption_key "" + config_get bind_address config bind_address "" + node_name="$(cat /proc/sys/kernel/hostname)" + + [ "$dhcp_service_enabled" -eq 0 ] || [ "$lease_sync_enabled" -eq 0 ] && { + ha_log_debug "DHCP lease sync disabled" + rm -f "$LEASE_SYNC_CONF" + return 0 + } + + ha_log "Generating lease-sync configuration" + + # Generate config file with secure permissions (for encryption key) + rm -f "$LEASE_SYNC_CONF" + touch "$LEASE_SYNC_CONF" + chmod 0600 "$LEASE_SYNC_CONF" + + cat > "$LEASE_SYNC_CONF" <> "$LEASE_SYNC_CONF" + ha_log "lease-sync will bind to $bind_address" + fi + + # Security settings: use encryption if key is available, otherwise plain mode + if [ -n "$encryption_key" ]; then + cat >> "$LEASE_SYNC_CONF" <> "$LEASE_SYNC_CONF" <> "$LEASE_SYNC_CONF" + + # Add peers + config_foreach ha_add_lease_sync_peer_flat peer + + ha_log "lease-sync configuration generated at $LEASE_SYNC_CONF" + return 0 +} + +# Supports per-peer source_address for source address selection +# Skips peers with sync_enabled=0 (non-OpenWrt peers) +ha_add_lease_sync_peer_flat() { + local section="$1" + local address source_address sync_enabled + + config_get_bool sync_enabled "$section" sync_enabled 1 + [ "$sync_enabled" -eq 0 ] && return 0 + + config_get address "$section" address + config_get source_address "$section" source_address "" + + [ -z "$address" ] && return 0 + + if [ -n "$source_address" ]; then + echo "peer=${address},${source_address}" >> "$LEASE_SYNC_CONF" + ha_log_debug "lease-sync peer: $address (source: $source_address)" + else + echo "peer=${address}" >> "$LEASE_SYNC_CONF" + ha_log_debug "lease-sync peer: $address (no source specified)" + fi +} + +# Validate ha-cluster configuration +ha_validate_config() { + local errors=0 + local vrid_list="" + + config_load ha-cluster + + # Check if enabled + config_get_bool enabled config enabled 0 + [ "$enabled" -eq 0 ] && return 0 + + # Validate VRIDs are unique across vrrp_instance sections + config_foreach ha_check_vrid_unique vrrp_instance + + # Check for peer configuration + local peer_count=0 + config_foreach ha_count_peers peer + [ "$peer_count" -eq 0 ] && { + ha_log_warning "No peers configured - HA will not function" + } + + # When lease sync is enabled, VIP interfaces need dhcp.*.force=1 + # so that dnsmasq initializes DHCP (required for ubus add_lease). + # Without it, the BACKUP node cannot receive synced leases. + local lease_sync_enabled + config_get_bool lease_sync_enabled dhcp sync_leases 0 + if [ "$lease_sync_enabled" -eq 1 ]; then + _ha_dhcp_force_checked="" + config_foreach _ha_check_dhcp_force vip + fi + + return $errors +} + +# Check that each enabled VIP interface has force=1 in its dhcp section +_ha_dhcp_force_checked="" +_ha_check_dhcp_force() { + local section="$1" + local vip_enabled interface + + config_get_bool vip_enabled "$section" enabled 0 + [ "$vip_enabled" -eq 0 ] && return 0 + + config_get interface "$section" interface "" + [ -z "$interface" ] && return 0 + + # Skip already-checked interfaces (multiple VIPs on same interface) + echo "$_ha_dhcp_force_checked" | grep -qw "$interface" && return 0 + _ha_dhcp_force_checked="$_ha_dhcp_force_checked $interface" + + # Find the dhcp pool section (type=dhcp) for this interface. + # Must exclude dnsmasq sections which also have an 'interface' option. + local dhcp_section force + dhcp_section="" + local _s + for _s in $(uci show dhcp 2>/dev/null | grep '=dhcp$' | cut -d. -f2 | cut -d= -f1); do + local _iface + _iface=$(uci -q get "dhcp.$_s.interface") + if [ "$_iface" = "$interface" ]; then + dhcp_section="$_s" + break + fi + done + [ -z "$dhcp_section" ] && return 0 # No DHCP pool on this interface, nothing to check + + force=$(uci -q get "dhcp.$dhcp_section.force") + if [ "$force" != "1" ]; then + ha_log_error "dhcp.$dhcp_section.force must be '1' for HA lease sync on interface '$interface'" + ha_log_error "Set it with: uci set dhcp.$dhcp_section.force='1' && uci commit dhcp" + errors=$((errors + 1)) + fi +} + +ha_check_vrid_unique() { + local section="$1" + local vrid + + config_get vrid "$section" vrid + [ -z "$vrid" ] && return 0 + + # Validate VRID range (1-127, 128+ reserved for IPv6) + case "$vrid" in + ''|*[!0-9]*) ha_log_error "vrrp_instance $section: VRID must be a number (got: $vrid)"; errors=$((errors + 1)); return 1 ;; + esac + [ "$vrid" -gt 127 ] && { + ha_log_error "vrrp_instance $section: VRID must be 1-127 (got: $vrid, 128-255 reserved for IPv6)" + errors=$((errors + 1)) + return 1 + } + + # Check uniqueness (global, not per-interface — instances are global) + echo "$vrid_list" | grep -qw "$vrid" && { + ha_log_error "Duplicate VRID $vrid in vrrp_instance section $section" + errors=$((errors + 1)) + return 1 + } + + vrid_list="$vrid_list $vrid" +} + +ha_count_peers() { + peer_count=$((peer_count + 1)) +} + +# Manage standalone services (disable/enable) +ha_manage_services() { + local action="$1" # "take_over" or "release" + local state_file="/etc/ha-cluster/service_states" + + mkdir -p /etc/ha-cluster + + if [ "$action" = "take_over" ]; then + # Save current state and disable standalone services + ha_log "Taking over service management from standalone init scripts" + + # Clear previous state file + rm -f "$state_file" + + for service in keepalived owsync lease-sync; do + if [ -x "/etc/init.d/$service" ]; then + # Save enabled state + if /etc/init.d/$service enabled 2>/dev/null; then + echo "$service=1" >> "$state_file" + else + echo "$service=0" >> "$state_file" + fi + + # Stop and disable + /etc/init.d/$service stop 2>/dev/null + /etc/init.d/$service disable 2>/dev/null + ha_log "Disabled standalone service: $service" + fi + done + + elif [ "$action" = "release" ]; then + # Restore previous state + ha_log "Releasing service management back to standalone init scripts" + + [ ! -f "$state_file" ] && return 0 + + while IFS='=' read -r service was_enabled; do + if [ -x "/etc/init.d/$service" ] && [ "$was_enabled" = "1" ]; then + /etc/init.d/$service enable 2>/dev/null + /etc/init.d/$service start 2>/dev/null + ha_log "Restored standalone service: $service" + fi + done < "$state_file" + + rm -f "$state_file" + fi +} + +# dnsmasq conf-dir overlay for HA operation +# ha-cluster drops a config file into dnsmasq's conf-dir to enable HA-required +# options at runtime, without modifying /etc/config/dhcp. +# dnsmasq's init script detects this file and skips the dhcp_check probe, +# allowing both HA nodes to serve DHCP simultaneously. +HA_DNSMASQ_OVERLAY_NAME="ha-cluster.conf" + +# Resolve dnsmasq's conf-dir path from UCI +# The conf-dir path depends on the dnsmasq section name (e.g., cfg01411c) +# matching the logic in dnsmasq's init script. +_ha_dnsmasq_confdir="" +_ha_resolve_dnsmasq_confdir_cb() { + local cfg="$1" + [ -n "$_ha_dnsmasq_confdir" ] && return 0 + config_get _ha_dnsmasq_confdir "$cfg" confdir "/tmp/dnsmasq${cfg:+.$cfg}.d" + # Strip any filter suffixes (confdir supports ",*.ext" filters) + _ha_dnsmasq_confdir="${_ha_dnsmasq_confdir%%,*}" +} + +ha_get_dnsmasq_confdir() { + config_load dhcp + _ha_dnsmasq_confdir="" + config_foreach _ha_resolve_dnsmasq_confdir_cb dnsmasq + echo "${_ha_dnsmasq_confdir:-/tmp/dnsmasq.d}" +} + +# Writes the dnsmasq conf-dir overlay and restarts dnsmasq (at most once) +ha_configure_dnsmasq() { + local lease_sync_enabled needs_restart=0 + local confdir overlay_path + + config_load ha-cluster + config_get_bool lease_sync_enabled dhcp sync_leases 0 + + confdir="$(ha_get_dnsmasq_confdir)" + overlay_path="$confdir/$HA_DNSMASQ_OVERLAY_NAME" + + if [ "$lease_sync_enabled" -eq 1 ]; then + mkdir -p "$confdir" + cat > "$overlay_path" <<-'EOF' + # Auto-generated by ha-cluster — do not edit + # Call dhcp-script on lease renewals so lease-sync can track expiry changes + script-on-renewal + EOF + needs_restart=1 + ha_log "Wrote dnsmasq HA overlay to $overlay_path" + elif [ -f "$overlay_path" ]; then + # sync_leases disabled but stale overlay exists (reload or crash recovery) + rm -f "$overlay_path" + needs_restart=1 + ha_log "Removed stale dnsmasq HA overlay" + fi + + [ "$needs_restart" -eq 1 ] && { + /etc/init.d/dnsmasq restart 2>/dev/null + ha_log "dnsmasq restarted" + } +} + +# Removes the dnsmasq conf-dir overlay and restarts dnsmasq +ha_release_dnsmasq() { + local confdir overlay_path + confdir="$(ha_get_dnsmasq_confdir)" + overlay_path="$confdir/$HA_DNSMASQ_OVERLAY_NAME" + + [ ! -f "$overlay_path" ] && return 0 + + ha_log "Removing dnsmasq HA overlay" + rm -f "$overlay_path" + /etc/init.d/dnsmasq restart 2>/dev/null +} + +# Apply all configurations +ha_apply_config() { + ha_log "Applying HA cluster configuration" + + # Validate first + ha_validate_config || { + ha_log_error "Configuration validation failed" + return 1 + } + + # Take over service management from standalone init scripts + ha_manage_services "take_over" + + # Configure dnsmasq for HA operation (conf-dir overlay + restart) + ha_configure_dnsmasq + + # Generate configs - services will be started by ha-cluster init + ha_generate_keepalived_conf + ha_generate_owsync_conf + ha_generate_lease_sync_conf + + ha_log "HA cluster configuration applied successfully" + return 0 +}