From ff46bd953eebd00cc7ccf37c72a776179212d54e Mon Sep 17 00:00:00 2001 From: Michal Skrivanek Date: Fri, 26 Sep 2025 17:33:23 +0200 Subject: [PATCH] unregister from controller only on proper exit exceptions raised duing serve() and lease changes keep the exporter registered with controller to avoid quick online/offline transitions since the internal logic restarts the jumpstarter child process within few seconds. On termination signal the unregistration still happens as the parent process also terminates. Note it still causes offline/online bounce if it is immediatelly restarted via systemd service for example. (cherry picked from commit 8587858e8c10be8ea4d000cd2ddca8ef701aef11) --- packages/jumpstarter-cli/jumpstarter_cli/run.py | 2 +- packages/jumpstarter/jumpstarter/exporter/exporter.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/packages/jumpstarter-cli/jumpstarter_cli/run.py b/packages/jumpstarter-cli/jumpstarter_cli/run.py index b288d8968..50f9606e1 100644 --- a/packages/jumpstarter-cli/jumpstarter_cli/run.py +++ b/packages/jumpstarter-cli/jumpstarter_cli/run.py @@ -61,7 +61,7 @@ async def signal_handler(): # Terminate exporter. SIGHUP waits until current lease is let go. Later SIGTERM still overrides if received_signal != signal.SIGHUP: signal_handled = True - exporter.stop(wait_for_lease_exit=received_signal == signal.SIGHUP) + exporter.stop(wait_for_lease_exit=received_signal == signal.SIGHUP, should_unregister=True) # Start signal handler first, then create exporter async with create_task_group() as signal_tg: diff --git a/packages/jumpstarter/jumpstarter/exporter/exporter.py b/packages/jumpstarter/jumpstarter/exporter/exporter.py index e63589ee5..a33a6a9be 100644 --- a/packages/jumpstarter/jumpstarter/exporter/exporter.py +++ b/packages/jumpstarter/jumpstarter/exporter/exporter.py @@ -38,20 +38,23 @@ class Exporter(AsyncContextManagerMixin, Metadata): tls: TLSConfigV1Alpha1 = field(default_factory=TLSConfigV1Alpha1) grpc_options: dict[str, str] = field(default_factory=dict) registered: bool = field(init=False, default=False) + _unregister: bool = field(init=False, default=False) _stop_requested: bool = field(init=False, default=False) _started: bool = field(init=False, default=False) _tg: TaskGroup | None = field(init=False, default=None) - def stop(self, wait_for_lease_exit=False): + def stop(self, wait_for_lease_exit=False, should_unregister=False): """Signal the exporter to stop. Args: wait_for_lease_exit (bool): If True, wait for the current lease to exit before stopping. + should_unregister (bool): If True, unregister from controller. Otherwise rely on heartbeat. """ # Stop immediately if not started yet or if immediate stop is requested if (not self._started or not wait_for_lease_exit) and self._tg is not None: - logger.info("Stopping exporter immediately") + logger.info("Stopping exporter immediately, unregister from controller=%s", should_unregister) + self._unregister = should_unregister self._tg.cancel_scope.cancel() elif not self._stop_requested: self._stop_requested = True @@ -63,7 +66,7 @@ async def __asynccontextmanager__(self) -> AsyncGenerator[Self]: yield self finally: try: - if self.registered: + if self.registered and self._unregister: logger.info("Unregistering exporter with controller") try: with move_on_after(10): # 10 second timeout @@ -200,6 +203,6 @@ async def status(retries=5, backoff=3): else: logger.info("Currently not leased") if self._stop_requested: - self.stop() + self.stop(should_unregister=True) break self._tg = None