From 4e07b533e6f1162d5829fa5949ce464e1bdfb5e6 Mon Sep 17 00:00:00 2001 From: Clydingus <40514241+Clydingus@users.noreply.github.com> Date: Mon, 8 Jun 2026 18:39:27 -0400 Subject: [PATCH 1/3] fix(models): fetch waypoint collection anonymously MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The curated `Overworld/waypoint` collection is public metadata, but the listing inherited the host's cached HF token. A fine-grained/upload-scoped token authenticates yet lacks "read collections", so it 403s and the picker silently falls back to the default model alone. Force `token=False` so listing never depends on ambient credentials — anonymous access succeeds for every user. Co-Authored-By: Claude Opus 4.8 (1M context) --- server-components/server/routes.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/server-components/server/routes.py b/server-components/server/routes.py index b808cac..ef887de 100755 --- a/server-components/server/routes.py +++ b/server-components/server/routes.py @@ -489,7 +489,14 @@ async def _fetch_waypoint_ids(cache: TtlCache[str, list[str]]) -> list[str]: async def fetcher() -> list[str]: def _fetch() -> list[str]: - collection = get_collection(WAYPOINT_COLLECTION_SLUG) + # `token=False` forces an anonymous request. The collection is + # public curated metadata, so listing it must not depend on + # whatever HF token the host happens to have cached — a + # fine-grained/restricted token (e.g. one scoped only for + # uploads) authenticates but lacks "read collections" and gets + # a 403, whereas anonymous access succeeds. Decoupling from the + # ambient token keeps the picker populated for every user. + collection = get_collection(WAYPOINT_COLLECTION_SLUG, token=False) return [item.item_id for item in collection.items if item.item_type == "model"] fetched = await asyncio.to_thread(_fetch) From caf39f3f8433702652e42b2d151dc8607ae05ca1 Mon Sep 17 00:00:00 2001 From: Clydingus <40514241+Clydingus@users.noreply.github.com> Date: Tue, 9 Jun 2026 11:41:12 -0400 Subject: [PATCH 2/3] fix(server): self-read parent create_time baseline in the watchdog MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parent-death watchdog compared the launcher-supplied --parent-start-time (Electron's `Date.now()/1000 - process.uptime()`) against psutil's create_time() with a 1s tolerance. On a heavy Electron process, uptime only starts counting after Chromium/V8 boot, so that estimate lands 1-3s after the kernel's real create_time and blew past the tolerance — the server decided its (alive) parent was gone and os._exit(1)'d at startup. Capture the recycling baseline from psutil at construction instead, so the startup check and every poll use one source; the launcher value is still accepted but no longer used for the comparison. Co-Authored-By: Claude Opus 4.8 (1M context) --- server-components/main.py | 87 ++++++++++++++++++++++++++------------- 1 file changed, 58 insertions(+), 29 deletions(-) diff --git a/server-components/main.py b/server-components/main.py index c9032fb..7497238 100644 --- a/server-components/main.py +++ b/server-components/main.py @@ -55,48 +55,68 @@ class StartupConfig: # If launched with --parent-pid, poll the parent and exit if it dies. # Linux's prctl(PR_SET_PDEATHSIG) is the kernel-level fallback we'd ideally # use, but Python doesn't expose it portably; the polling watchdog covers -# both Linux and Windows. On Windows the bare `os.kill(pid, 0)` check is -# unreliable in the face of PID recycling — kernel reuses PIDs aggressively -# and the new occupant can be running as a different user — so when the -# launcher passes `--parent-start-time` we additionally compare the parent's -# process-creation time via psutil. - - -# Tolerance for clock skew between the launcher's clock and the kernel's -# recorded process-creation timestamp. Process-creation times sit at -# millisecond precision; one second is generous enough to absorb the -# stringification round-trip and any small drift, while still being well -# below the gap any recycled PID would exhibit. +# both Linux and Windows. A bare `os.kill(pid, 0)` check is unreliable in +# the face of PID recycling — the kernel reuses PIDs aggressively and the +# new occupant can be a different process — so the watchdog also pins the +# parent's psutil `create_time()`, captured as a baseline at startup, and +# treats a changed creation timestamp as "parent gone". + + +# Tolerance for the PID-recycling guard. The baseline and every poll both +# read the parent's creation time from the same source (psutil), and a +# process's create_time is immutable for its lifetime, so a live parent +# compares exactly equal — this tolerance only absorbs float round-trip +# noise. A recycled PID belongs to a process created seconds-to-hours +# later, so it lands far outside this window. _PARENT_START_TIME_TOLERANCE_SEC = 1.0 class ParentWatchdog: """Monitors a parent process and force-exits this process if the parent dies. Constructed in `__main__` (one-shot startup check), - then run as an asyncio task by the lifespan (continuous polling).""" - - def __init__(self, parent_pid: int, parent_start_time: float | None = None) -> None: + then run as an asyncio task by the lifespan (continuous polling). + + The recycling guard compares the parent's *current* kernel + creation timestamp against a baseline captured here via psutil at + construction — deliberately NOT the launcher-supplied + `--parent-start-time`. The Electron launcher derives that value as + `Date.now()/1000 - process.uptime()`, which lands ~1-3s after the + kernel's real `create_time()` on a heavy Electron process (uptime + only starts counting once Chromium/V8 has booted). Comparing that + cross-source estimate against psutil exceeded the tolerance and + false-killed the server at startup. Reading the baseline here means + the startup check and every poll use one source, so the tolerance + only ever has to absorb genuine float noise.""" + + def __init__(self, parent_pid: int) -> None: self.parent_pid = parent_pid - self.parent_start_time = parent_start_time + # Baseline from the kernel, captured now. None if the parent is + # already gone or its metadata isn't readable (→ pid-only liveness). + self.baseline_create_time = self._read_create_time() - def _parent_alive(self) -> bool: - """True iff a process with the parent's PID exists and (when - `parent_start_time` is known) has a matching creation timestamp.""" + def _read_create_time(self) -> float | None: try: - proc = psutil.Process(self.parent_pid) - except psutil.NoSuchProcess: - return False - if self.parent_start_time is None: - return True + return psutil.Process(self.parent_pid).create_time() + except (psutil.NoSuchProcess, psutil.AccessDenied): + return None + + def _parent_alive(self) -> bool: + """True iff a process with the parent's PID exists and its + creation timestamp still matches the baseline (guards against + PID recycling).""" try: - create_time = proc.create_time() + current = psutil.Process(self.parent_pid).create_time() except psutil.NoSuchProcess: return False except psutil.AccessDenied: # Process exists but we can't read its metadata. Treat as alive # rather than self-exit on a transient permission glitch. return True - return abs(create_time - self.parent_start_time) <= _PARENT_START_TIME_TOLERANCE_SEC + if self.baseline_create_time is None: + # No baseline (parent was gone/unreadable at construction but + # the PID resolves now) → fall back to pid-only liveness. + return True + return abs(current - self.baseline_create_time) <= _PARENT_START_TIME_TOLERANCE_SEC def check_alive_or_exit(self) -> None: """Synchronous one-shot check at startup, in case the parent @@ -199,7 +219,7 @@ async def lifespan(app: FastAPI): cfg: StartupConfig = app.state.startup_config watchdog_task = None if cfg.parent_pid is not None: - watchdog_task = asyncio.create_task(ParentWatchdog(cfg.parent_pid, cfg.parent_start_time).run()) + watchdog_task = asyncio.create_task(ParentWatchdog(cfg.parent_pid).run()) yield @@ -254,8 +274,17 @@ async def lifespan(app: FastAPI): launched_from_standalone=args.launched_from_standalone, ) if args.parent_pid is not None: - logger.info("Monitoring parent process", parent_pid=args.parent_pid, parent_start_time=args.parent_start_time) - ParentWatchdog(args.parent_pid, args.parent_start_time).check_alive_or_exit() + # Self-read the kernel baseline (see ParentWatchdog docstring); the + # launcher's `--parent-start-time` is accepted for backward compat + # but intentionally not used for the recycling comparison. + watchdog = ParentWatchdog(args.parent_pid) + logger.info( + "Monitoring parent process", + parent_pid=args.parent_pid, + parent_create_time=watchdog.baseline_create_time, + launcher_start_time=args.parent_start_time, + ) + watchdog.check_alive_or_exit() # Construct the uvicorn Server explicitly (rather than via `uvicorn.run`) # so the `/shutdown` route can flip `should_exit` on the live instance, From 04e07d9abbf63c53a22aeb17219786279d2f89aa Mon Sep 17 00:00:00 2001 From: Clydingus <40514241+Clydingus@users.noreply.github.com> Date: Tue, 9 Jun 2026 11:59:51 -0400 Subject: [PATCH 3/3] fix(server): drop unused --parent-start-time --- electron/ipc/server.ts | 14 ++------------ server-components/main.py | 31 +++---------------------------- 2 files changed, 5 insertions(+), 40 deletions(-) diff --git a/electron/ipc/server.ts b/electron/ipc/server.ts index 6c31c0f..57e0ba8 100644 --- a/electron/ipc/server.ts +++ b/electron/ipc/server.ts @@ -109,18 +109,8 @@ export function registerServerIpc(): void { ] // Parent-process watchdog: the Python server force-exits if this Electron - // process disappears. On Windows we also pass our process-creation time - // so the watchdog can defeat PID recycling — without it, a freshly spawned - // OS process inheriting our PID would look "alive" to the server even - // though Biome itself is long gone. - const parentStartTimeSec = Date.now() / 1000 - process.uptime() - const serverArgs = [ - ...baseServerArgs, - '--parent-pid', - String(process.pid), - '--parent-start-time', - parentStartTimeSec.toFixed(3) - ] + // process disappears. + const serverArgs = [...baseServerArgs, '--parent-pid', String(process.pid)] // Spawn the server const child = spawn(uvBinary, serverArgs, { diff --git a/server-components/main.py b/server-components/main.py index 7497238..a3906e6 100644 --- a/server-components/main.py +++ b/server-components/main.py @@ -40,10 +40,6 @@ class StartupConfig: before the lifespan body itself runs.""" parent_pid: int | None = None - # Epoch seconds for the parent's process-creation time, as reported by - # the launcher. Paired with `parent_pid` to defend against PID recycling - # on Windows: a recycled PID matches, but the creation time won't. - parent_start_time: float | None = None # True when the launching Biome instance is in standalone mode and # owns this process's lifecycle — set via `--launched-from-standalone` # so the renderer can refuse to point itself at a server it would @@ -78,15 +74,7 @@ class ParentWatchdog: The recycling guard compares the parent's *current* kernel creation timestamp against a baseline captured here via psutil at - construction — deliberately NOT the launcher-supplied - `--parent-start-time`. The Electron launcher derives that value as - `Date.now()/1000 - process.uptime()`, which lands ~1-3s after the - kernel's real `create_time()` on a heavy Electron process (uptime - only starts counting once Chromium/V8 has booted). Comparing that - cross-source estimate against psutil exceeded the tolerance and - false-killed the server at startup. Reading the baseline here means - the startup check and every poll use one source, so the tolerance - only ever has to absorb genuine float noise.""" + construction.""" def __init__(self, parent_pid: int) -> None: self.parent_pid = parent_pid @@ -101,7 +89,7 @@ def _read_create_time(self) -> float | None: return None def _parent_alive(self) -> bool: - """True iff a process with the parent's PID exists and its + """True if a process with the parent's PID exists and its creation timestamp still matches the baseline (guards against PID recycling).""" try: @@ -249,15 +237,6 @@ async def lifespan(app: FastAPI): parser.add_argument( "--parent-pid", type=int, default=None, help="PID of parent process; server exits if parent dies" ) - parser.add_argument( - "--parent-start-time", - type=float, - default=None, - help=( - "Epoch-seconds creation timestamp of the parent process. When paired with --parent-pid, used to guard" - " against PID recycling: a recycled PID won't match the original parent's creation time." - ), - ) parser.add_argument( "--launched-from-standalone", action="store_true", @@ -270,19 +249,15 @@ async def lifespan(app: FastAPI): app.state.startup_config = StartupConfig( parent_pid=args.parent_pid, - parent_start_time=args.parent_start_time, launched_from_standalone=args.launched_from_standalone, ) if args.parent_pid is not None: - # Self-read the kernel baseline (see ParentWatchdog docstring); the - # launcher's `--parent-start-time` is accepted for backward compat - # but intentionally not used for the recycling comparison. + # The watchdog self-reads the parent's create_time as its recycling baseline. watchdog = ParentWatchdog(args.parent_pid) logger.info( "Monitoring parent process", parent_pid=args.parent_pid, parent_create_time=watchdog.baseline_create_time, - launcher_start_time=args.parent_start_time, ) watchdog.check_alive_or_exit()