diff --git a/utils/nvfd-fan-control.service b/utils/nvfd-fan-control.service index 8573d91..0d2adb3 100644 --- a/utils/nvfd-fan-control.service +++ b/utils/nvfd-fan-control.service @@ -1,13 +1,13 @@ [Unit] Description=NVIDIA Fan Control - Temperature-aware per-GPU mode switching -After=nvfd.service -Wants=nvfd.service +StartLimitIntervalSec=400 +StartLimitBurst=3 [Service] Type=simple ExecStart=/usr/local/bin/nvfd-fan-control.sh --threshold-up 45 --threshold-down 35 -Restart=always -RestartSec=5 +Restart=on-failure +RestartSec=90 User=root [Install] diff --git a/utils/nvfd-fan-control.sh b/utils/nvfd-fan-control.sh index 7a18b5d..7712de0 100755 --- a/utils/nvfd-fan-control.sh +++ b/utils/nvfd-fan-control.sh @@ -64,6 +64,16 @@ done # Check for root privileges [[ "$EUID" -ne 0 ]] && { echo "ERROR: This script must be run as root (use sudo)" >&2; exit 1; } +# Wait for nvfd service to be active (up to 30 seconds) +# Systemd handles restart rate limiting via StartLimitBurst/StartLimitIntervalSec +echo "[INFO] Waiting for nvfd service to be ready..." +for i in {1..30}; do + systemctl is-active --quiet nvfd.service && break + [[ $i -eq 30 ]] && { echo "[ERROR] nvfd service not ready after 30 seconds. Exiting." >&2; exit 1; } + sleep 1 +done +echo "[INFO] nvfd service is ready" + # Acquire file lock to prevent multiple instances exec 200>"$LOCKFILE" if ! flock -n 200; then