From 243c1e94c33a6b0643e3d42d068320dfc045c479 Mon Sep 17 00:00:00 2001 From: Louis Cathala Date: Sat, 14 Mar 2026 17:22:24 +0100 Subject: [PATCH 1/3] fix: systemd ordering cycle in nvfd-fan-control.service Remove After= and Wants= directives to avoid ordering cycles caused by nvfd.service's After=multi-user.target. Place StartLimitBurst/StartLimitIntervalSec in [Unit] section to rate limit restarts: max 3 restarts in 400 seconds, with 90s between attempts. Changes: - Removed After=nvfd.service, Wants=nvfd.service (causes cycles) - Added StartLimitBurst=3, StartLimitIntervalSec=400 in [Unit] section - RestartSec=90 (to fit within rate limit window) - Script waits up to 30s for nvfd.service with 1s polling This ensures: - No systemd dependency cycles on start or stop - Graceful waiting for nvfd without aggressive restart storms - Rate-limited retries if nvfd fails to start --- utils/nvfd-fan-control.service | 8 ++++---- utils/nvfd-fan-control.sh | 10 ++++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/utils/nvfd-fan-control.service b/utils/nvfd-fan-control.service index 8573d91..0d2adb3 100644 --- a/utils/nvfd-fan-control.service +++ b/utils/nvfd-fan-control.service @@ -1,13 +1,13 @@ [Unit] Description=NVIDIA Fan Control - Temperature-aware per-GPU mode switching -After=nvfd.service -Wants=nvfd.service +StartLimitIntervalSec=400 +StartLimitBurst=3 [Service] Type=simple ExecStart=/usr/local/bin/nvfd-fan-control.sh --threshold-up 45 --threshold-down 35 -Restart=always -RestartSec=5 +Restart=on-failure +RestartSec=90 User=root [Install] diff --git a/utils/nvfd-fan-control.sh b/utils/nvfd-fan-control.sh index 7a18b5d..1730504 100755 --- a/utils/nvfd-fan-control.sh +++ b/utils/nvfd-fan-control.sh @@ -64,6 +64,16 @@ done # Check for root privileges [[ "$EUID" -ne 0 ]] && { echo "ERROR: This script must be run as root (use sudo)" >&2; exit 1; } +# Wait for nvfd service to be active (up to 30 seconds) +# Systemd handles restart rate limiting via StartLimitBurst/StartLimitIntervalSec +echo "[INFO] Waiting for nvfd service to be ready..." +for i in {1..30}; do + systemctl is-active --quiet nvfd.service && break + [[ $i -eq 30 ]] && exit 1 + sleep 1 +done +echo "[INFO] nvfd service is ready" + # Acquire file lock to prevent multiple instances exec 200>"$LOCKFILE" if ! flock -n 200; then From 2cca9118d8d35741297fdc6b39262c309112f912 Mon Sep 17 00:00:00 2001 From: Louis Cathala Date: Mon, 16 Mar 2026 13:38:42 +0100 Subject: [PATCH 2/3] Update utils/nvfd-fan-control.sh Add error log if `nvfd.service` is not ready after 30s Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- utils/nvfd-fan-control.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/nvfd-fan-control.sh b/utils/nvfd-fan-control.sh index 1730504..e404891 100755 --- a/utils/nvfd-fan-control.sh +++ b/utils/nvfd-fan-control.sh @@ -69,7 +69,9 @@ done echo "[INFO] Waiting for nvfd service to be ready..." for i in {1..30}; do systemctl is-active --quiet nvfd.service && break - [[ $i -eq 30 ]] && exit 1 + systemctl is-active --quiet nvfd.service && break + [[ $i -eq 30 ]] && { echo "[ERROR] nvfd service not ready after 30 seconds. Exiting." >&2; exit 1; } + sleep 1 sleep 1 done echo "[INFO] nvfd service is ready" From 3182eba2be36b5cac3aa7c7de6018bf668a7e7d8 Mon Sep 17 00:00:00 2001 From: Louis Cathala Date: Mon, 16 Mar 2026 13:40:27 +0100 Subject: [PATCH 3/3] remove duplicate lines --- utils/nvfd-fan-control.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/utils/nvfd-fan-control.sh b/utils/nvfd-fan-control.sh index e404891..7712de0 100755 --- a/utils/nvfd-fan-control.sh +++ b/utils/nvfd-fan-control.sh @@ -68,11 +68,9 @@ done # Systemd handles restart rate limiting via StartLimitBurst/StartLimitIntervalSec echo "[INFO] Waiting for nvfd service to be ready..." for i in {1..30}; do - systemctl is-active --quiet nvfd.service && break systemctl is-active --quiet nvfd.service && break [[ $i -eq 30 ]] && { echo "[ERROR] nvfd service not ready after 30 seconds. Exiting." >&2; exit 1; } sleep 1 - sleep 1 done echo "[INFO] nvfd service is ready"