diff --git a/ui/src/api/hooks/useSlots.ts b/ui/src/api/hooks/useSlots.ts index db336461..961e5752 100644 --- a/ui/src/api/hooks/useSlots.ts +++ b/ui/src/api/hooks/useSlots.ts @@ -99,6 +99,29 @@ export interface Slot { * true and never recomputes it from device strings. */ backend_mismatch?: boolean + // ── Container runtime fields (#657) ───────────────────────────────── + /** Slot runtime engine: "lemonade" (default) or "container". Container + * slots dispatch through ContainerProvider (podman/docker systemd unit) + * instead of Lemonade. */ + runtime?: 'lemonade' | 'container' + /** Profile name from /etc/hal0/profiles.toml. Container slots use a + * profile to supply the container image + bench-tuned flags. */ + profile?: string | null + /** Container image ref (from the resolved profile). E.g. + * "ghcr.io/hal0ai/amd-strix-halo-toolboxes:rocm-7.2.4-rocmfp4-server". */ + image?: string | null + /** Container image availability: "present" | "pulling" | "missing". + * Populated by the backend when image_status is tracked. */ + image_status?: 'present' | 'pulling' | 'missing' | null + /** Container unit state: "running" | "stopped" | "starting" | "crashed". + * Set by _container_state_enrichment() in /api/slots. Absent for + * Lemonade slots. */ + container_status?: 'running' | 'stopped' | 'starting' | 'crashed' | null + /** True when the container unit is active AND /health returns ok. + * False when stopped, starting (health probe not yet passing), or crashed. + * Absent for Lemonade slots. */ + container_health?: boolean | null + // ── Synthetic upstream-backed entries ─────────────────────────────── // /api/slots merges real lifecycle-managed slots with synthetic // entries (slots.py → _synthesize_slots_from_upstreams) that represent @@ -212,6 +235,18 @@ function normalizeSlot(s: any): Slot { // entries in the union may omit the flag, so default it here rather than // letting the card read undefined as "disabled". enabled: s?.enabled !== false, + // Container runtime fields (#657). Pass through verbatim; absent keys + // surface as null/undefined so the card can safely branch on runtime. + runtime: s?.runtime ?? 'lemonade', + profile: s?.profile ?? null, + // image/image_status may come from profile resolution (backend TBD) or + // be omitted; null means "unknown — don't show image chip". + image: s?.image ?? null, + image_status: s?.image_status ?? null, + // container_status / container_health are set by _container_state_enrichment. + // Absent for Lemonade slots; null here keeps the type honest. + container_status: s?.container_status ?? null, + container_health: s?.container_health ?? null, } } diff --git a/ui/src/dash/memory-map.jsx b/ui/src/dash/memory-map.jsx index de6b6bfd..ea0a8125 100644 --- a/ui/src/dash/memory-map.jsx +++ b/ui/src/dash/memory-map.jsx @@ -11,8 +11,7 @@ import { useSlots } from '@/api/hooks/useSlots' import { useHardware } from '@/api/hooks/useHardware' import { useStatsHardware } from '@/api/hooks/useStatsHardware' import { useProxmoxSettings } from '@/api/hooks/useProxmoxSettings' - -const LIVE_STATES = new Set(['ready', 'serving', 'idle', 'warming']) +import { isSlotLive } from './slot-status.js' const SAFETY_MARGIN_GB = 2 const MB_PER_GB = 1024 @@ -149,7 +148,10 @@ export function useMemoryMapModel() { ) const npuModelGb = mbToGb(stats.data?.npu_status?.model_mb || 0) - const liveSlots = slots.filter((s) => LIVE_STATES.has((s.state || '').toLowerCase())) + // N1 (slot-status unifier): isSlotLive() handles both lemond (state-string) + // and container (container_status + container_health) runtimes. Replaces + // the old static LIVE_STATES set membership test. + const liveSlots = slots.filter((s) => isSlotLive(s)) // Prefer the BE-METRICS `mem_mb` contract (real per-slot resident // model + KV memory). Fall back to the legacy GTT-split / NPU-divide diff --git a/ui/src/dash/slot-modals.jsx b/ui/src/dash/slot-modals.jsx index 37761b16..098a5063 100644 --- a/ui/src/dash/slot-modals.jsx +++ b/ui/src/dash/slot-modals.jsx @@ -15,6 +15,7 @@ import { useHardware } from '@/api/hooks/useHardware' import { useBackends } from '@/api/hooks/useBackends' import { useModels } from '@/api/hooks/useModels' import { ENDPOINTS } from '@/api/endpoints' +import { stateChipClassForSlot } from './slot-status.js' // Full static device list — shown as fallback when /api/backends hasn't // loaded yet or returns empty. Never render an empty device dropdown. @@ -36,12 +37,32 @@ const { useState: useStateSM, useEffect: useEffectSM, useRef: useRefSM } = React // Map a slot lifecycle state to a chip color class. // online/ready/serving → green (ok); starting → amber (warn); // error → red (err); offline/empty/anything else → neutral grey (base chip). -function stateChipClass(state) { - const s = String(state || "").toLowerCase(); - if (["ready", "online", "loaded", "serving", "running"].includes(s)) return "chip ok"; - if (["starting", "loading", "pending", "stopping"].includes(s)) return "chip warn"; - if (["error", "failed", "broken"].includes(s)) return "chip err"; - return "chip"; // offline / empty / unconfigured → neutral grey +// +// N1: accepts either a state string (lemond path, unchanged) or a full slot +// object. When given a slot object, delegates to stateChipClassForSlot() +// from slot-status.js which handles container runtime correctly via +// slotPhase(). The primitive string overload is kept for call sites that +// only have the state string — its behaviour is unchanged. +function stateChipClass(stateOrSlot) { + // Duck-type: if it's a string, keep original behaviour (lemond path). + if (typeof stateOrSlot === "string" || stateOrSlot == null) { + // STRING path = lemond, byte-identical to origin/main. Do NOT add + // warming/pulling/crashed here — that recolored lemond state strips + // (e.g. state="warming" must stay grey at the EditSlotDrawer strip). + // Container chips route through the slot-OBJECT overload only. + const s = String(stateOrSlot || "").toLowerCase(); + if (["ready", "online", "loaded", "serving", "running"].includes(s)) return "chip ok"; + if (["starting", "loading", "pending", "stopping"].includes(s)) return "chip warn"; + if (["error", "failed", "broken"].includes(s)) return "chip err"; + return "chip"; // offline / warming / empty / unconfigured → neutral grey + } + // Full slot object: delegate to the shared N1 helper. + // stateChipClassForSlot returns null for lemond slots (sentinel), + // in which case we fall back to the original string-based path. + const slot = stateOrSlot; + const fromPhase = stateChipClassForSlot(slot); + if (fromPhase !== null) return fromPhase; + return stateChipClass(slot.state); } // Map /api/models registry rows → the shape this file's swap popover and @@ -869,6 +890,8 @@ function InlineSwapPopover({ slot, open, onClose, onPick }) { const modelsQuery = useModels(); const hwQuery = useHardware(); if (!open) return null; + + const isContainer = slot.runtime === "container"; const ramFreeGb = hwQuery.data?.ram?.free ?? 0; const compatible = (modelsQuery.data ?? []) .map(normalizeApiModel) @@ -878,9 +901,40 @@ function InlineSwapPopover({ slot, open, onClose, onPick }) { // don't offer them when swapping a non-rocm slot. !(Array.isArray(m.tags) && m.tags.includes("rocmfp4") && slot.backend !== "rocm") ); + + // N2: container swap = cold systemctl restart (NOT lemond hot /v1/load). + // Intercept onPick for container slots: show a confirm toast and fire + // the same onPick (which drives restart), so the parent card drives to + // "starting" state immediately. The parent's onSwapPick calls useSlotSwap + // which triggers a restart for container slots server-side. + const handlePick = (m) => { + if (isContainer) { + const name = slot.name; + const label = m.longName || m.id; + window.__hal0Toast && window.__hal0Toast( + `Restarting ${name} to load ${label} — ~model-load seconds`, + "info" + ); + } + onPick(m); + onClose(); + }; + return (