Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 2 additions & 12 deletions electron/ipc/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,18 +109,8 @@ export function registerServerIpc(): void {
]

// Parent-process watchdog: the Python server force-exits if this Electron
// process disappears. On Windows we also pass our process-creation time
// so the watchdog can defeat PID recycling — without it, a freshly spawned
// OS process inheriting our PID would look "alive" to the server even
// though Biome itself is long gone.
const parentStartTimeSec = Date.now() / 1000 - process.uptime()
const serverArgs = [
...baseServerArgs,
'--parent-pid',
String(process.pid),
'--parent-start-time',
parentStartTimeSec.toFixed(3)
]
// process disappears.
const serverArgs = [...baseServerArgs, '--parent-pid', String(process.pid)]

// Spawn the server
const child = spawn(uvBinary, serverArgs, {
Expand Down
88 changes: 46 additions & 42 deletions server-components/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,6 @@ class StartupConfig:
before the lifespan body itself runs."""

parent_pid: int | None = None
# Epoch seconds for the parent's process-creation time, as reported by
# the launcher. Paired with `parent_pid` to defend against PID recycling
# on Windows: a recycled PID matches, but the creation time won't.
parent_start_time: float | None = None
# True when the launching Biome instance is in standalone mode and
# owns this process's lifecycle — set via `--launched-from-standalone`
# so the renderer can refuse to point itself at a server it would
Expand All @@ -55,48 +51,60 @@ class StartupConfig:
# If launched with --parent-pid, poll the parent and exit if it dies.
# Linux's prctl(PR_SET_PDEATHSIG) is the kernel-level fallback we'd ideally
# use, but Python doesn't expose it portably; the polling watchdog covers
# both Linux and Windows. On Windows the bare `os.kill(pid, 0)` check is
# unreliable in the face of PID recycling — kernel reuses PIDs aggressively
# and the new occupant can be running as a different user — so when the
# launcher passes `--parent-start-time` we additionally compare the parent's
# process-creation time via psutil.


# Tolerance for clock skew between the launcher's clock and the kernel's
# recorded process-creation timestamp. Process-creation times sit at
# millisecond precision; one second is generous enough to absorb the
# stringification round-trip and any small drift, while still being well
# below the gap any recycled PID would exhibit.
# both Linux and Windows. A bare `os.kill(pid, 0)` check is unreliable in
# the face of PID recycling — the kernel reuses PIDs aggressively and the
# new occupant can be a different process — so the watchdog also pins the
# parent's psutil `create_time()`, captured as a baseline at startup, and
# treats a changed creation timestamp as "parent gone".


# Tolerance for the PID-recycling guard. The baseline and every poll both
# read the parent's creation time from the same source (psutil), and a
# process's create_time is immutable for its lifetime, so a live parent
# compares exactly equal — this tolerance only absorbs float round-trip
# noise. A recycled PID belongs to a process created seconds-to-hours
# later, so it lands far outside this window.
_PARENT_START_TIME_TOLERANCE_SEC = 1.0


class ParentWatchdog:
"""Monitors a parent process and force-exits this process if the
parent dies. Constructed in `__main__` (one-shot startup check),
then run as an asyncio task by the lifespan (continuous polling)."""
then run as an asyncio task by the lifespan (continuous polling).

def __init__(self, parent_pid: int, parent_start_time: float | None = None) -> None:
The recycling guard compares the parent's *current* kernel
creation timestamp against a baseline captured here via psutil at
construction."""

def __init__(self, parent_pid: int) -> None:
self.parent_pid = parent_pid
self.parent_start_time = parent_start_time
# Baseline from the kernel, captured now. None if the parent is
# already gone or its metadata isn't readable (→ pid-only liveness).
self.baseline_create_time = self._read_create_time()

def _parent_alive(self) -> bool:
"""True iff a process with the parent's PID exists and (when
`parent_start_time` is known) has a matching creation timestamp."""
def _read_create_time(self) -> float | None:
try:
proc = psutil.Process(self.parent_pid)
except psutil.NoSuchProcess:
return False
if self.parent_start_time is None:
return True
return psutil.Process(self.parent_pid).create_time()
except (psutil.NoSuchProcess, psutil.AccessDenied):
return None

def _parent_alive(self) -> bool:
"""True if a process with the parent's PID exists and its
creation timestamp still matches the baseline (guards against
PID recycling)."""
try:
create_time = proc.create_time()
current = psutil.Process(self.parent_pid).create_time()
except psutil.NoSuchProcess:
return False
except psutil.AccessDenied:
# Process exists but we can't read its metadata. Treat as alive
# rather than self-exit on a transient permission glitch.
return True
return abs(create_time - self.parent_start_time) <= _PARENT_START_TIME_TOLERANCE_SEC
if self.baseline_create_time is None:
# No baseline (parent was gone/unreadable at construction but
# the PID resolves now) → fall back to pid-only liveness.
return True
return abs(current - self.baseline_create_time) <= _PARENT_START_TIME_TOLERANCE_SEC

def check_alive_or_exit(self) -> None:
"""Synchronous one-shot check at startup, in case the parent
Expand Down Expand Up @@ -199,7 +207,7 @@ async def lifespan(app: FastAPI):
cfg: StartupConfig = app.state.startup_config
watchdog_task = None
if cfg.parent_pid is not None:
watchdog_task = asyncio.create_task(ParentWatchdog(cfg.parent_pid, cfg.parent_start_time).run())
watchdog_task = asyncio.create_task(ParentWatchdog(cfg.parent_pid).run())

yield

Expand Down Expand Up @@ -229,15 +237,6 @@ async def lifespan(app: FastAPI):
parser.add_argument(
"--parent-pid", type=int, default=None, help="PID of parent process; server exits if parent dies"
)
parser.add_argument(
"--parent-start-time",
type=float,
default=None,
help=(
"Epoch-seconds creation timestamp of the parent process. When paired with --parent-pid, used to guard"
" against PID recycling: a recycled PID won't match the original parent's creation time."
),
)
parser.add_argument(
"--launched-from-standalone",
action="store_true",
Expand All @@ -250,12 +249,17 @@ async def lifespan(app: FastAPI):

app.state.startup_config = StartupConfig(
parent_pid=args.parent_pid,
parent_start_time=args.parent_start_time,
launched_from_standalone=args.launched_from_standalone,
)
if args.parent_pid is not None:
logger.info("Monitoring parent process", parent_pid=args.parent_pid, parent_start_time=args.parent_start_time)
ParentWatchdog(args.parent_pid, args.parent_start_time).check_alive_or_exit()
# The watchdog self-reads the parent's create_time as its recycling baseline.
watchdog = ParentWatchdog(args.parent_pid)
logger.info(
"Monitoring parent process",
parent_pid=args.parent_pid,
parent_create_time=watchdog.baseline_create_time,
)
watchdog.check_alive_or_exit()

# Construct the uvicorn Server explicitly (rather than via `uvicorn.run`)
# so the `/shutdown` route can flip `should_exit` on the live instance,
Expand Down
9 changes: 8 additions & 1 deletion server-components/server/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,14 @@ async def _fetch_waypoint_ids(cache: TtlCache[str, list[str]]) -> list[str]:

async def fetcher() -> list[str]:
def _fetch() -> list[str]:
collection = get_collection(WAYPOINT_COLLECTION_SLUG)
# `token=False` forces an anonymous request. The collection is
# public curated metadata, so listing it must not depend on
# whatever HF token the host happens to have cached — a
# fine-grained/restricted token (e.g. one scoped only for
# uploads) authenticates but lacks "read collections" and gets
# a 403, whereas anonymous access succeeds. Decoupling from the
# ambient token keeps the picker populated for every user.
collection = get_collection(WAYPOINT_COLLECTION_SLUG, token=False)
return [item.item_id for item in collection.items if item.item_type == "model"]

fetched = await asyncio.to_thread(_fetch)
Expand Down
Loading