Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions ui/src/api/hooks/useSlots.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,29 @@ export interface Slot {
* true and never recomputes it from device strings. */
backend_mismatch?: boolean

// ── Container runtime fields (#657) ─────────────────────────────────
/** Slot runtime engine: "lemonade" (default) or "container". Container
* slots dispatch through ContainerProvider (podman/docker systemd unit)
* instead of Lemonade. */
runtime?: 'lemonade' | 'container'
/** Profile name from /etc/hal0/profiles.toml. Container slots use a
* profile to supply the container image + bench-tuned flags. */
profile?: string | null
/** Container image ref (from the resolved profile). E.g.
* "ghcr.io/hal0ai/amd-strix-halo-toolboxes:rocm-7.2.4-rocmfp4-server". */
image?: string | null
/** Container image availability: "present" | "pulling" | "missing".
* Populated by the backend when image_status is tracked. */
image_status?: 'present' | 'pulling' | 'missing' | null
/** Container unit state: "running" | "stopped" | "starting" | "crashed".
* Set by _container_state_enrichment() in /api/slots. Absent for
* Lemonade slots. */
container_status?: 'running' | 'stopped' | 'starting' | 'crashed' | null
/** True when the container unit is active AND /health returns ok.
* False when stopped, starting (health probe not yet passing), or crashed.
* Absent for Lemonade slots. */
container_health?: boolean | null

// ── Synthetic upstream-backed entries ───────────────────────────────
// /api/slots merges real lifecycle-managed slots with synthetic
// entries (slots.py → _synthesize_slots_from_upstreams) that represent
Expand Down Expand Up @@ -212,6 +235,18 @@ function normalizeSlot(s: any): Slot {
// entries in the union may omit the flag, so default it here rather than
// letting the card read undefined as "disabled".
enabled: s?.enabled !== false,
// Container runtime fields (#657). Pass through verbatim; absent keys
// surface as null/undefined so the card can safely branch on runtime.
runtime: s?.runtime ?? 'lemonade',
profile: s?.profile ?? null,
// image/image_status may come from profile resolution (backend TBD) or
// be omitted; null means "unknown — don't show image chip".
image: s?.image ?? null,
image_status: s?.image_status ?? null,
// container_status / container_health are set by _container_state_enrichment.
// Absent for Lemonade slots; null here keeps the type honest.
container_status: s?.container_status ?? null,
container_health: s?.container_health ?? null,
}
}

Expand Down
8 changes: 5 additions & 3 deletions ui/src/dash/memory-map.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ import { useSlots } from '@/api/hooks/useSlots'
import { useHardware } from '@/api/hooks/useHardware'
import { useStatsHardware } from '@/api/hooks/useStatsHardware'
import { useProxmoxSettings } from '@/api/hooks/useProxmoxSettings'

const LIVE_STATES = new Set(['ready', 'serving', 'idle', 'warming'])
import { isSlotLive } from './slot-status.js'
const SAFETY_MARGIN_GB = 2
const MB_PER_GB = 1024

Expand Down Expand Up @@ -149,7 +148,10 @@ export function useMemoryMapModel() {
)
const npuModelGb = mbToGb(stats.data?.npu_status?.model_mb || 0)

const liveSlots = slots.filter((s) => LIVE_STATES.has((s.state || '').toLowerCase()))
// N1 (slot-status unifier): isSlotLive() handles both lemond (state-string)
// and container (container_status + container_health) runtimes. Replaces
// the old static LIVE_STATES set membership test.
const liveSlots = slots.filter((s) => isSlotLive(s))

// Prefer the BE-METRICS `mem_mb` contract (real per-slot resident
// model + KV memory). Fall back to the legacy GTT-split / NPU-divide
Expand Down
72 changes: 63 additions & 9 deletions ui/src/dash/slot-modals.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import { useHardware } from '@/api/hooks/useHardware'
import { useBackends } from '@/api/hooks/useBackends'
import { useModels } from '@/api/hooks/useModels'
import { ENDPOINTS } from '@/api/endpoints'
import { stateChipClassForSlot } from './slot-status.js'

// Full static device list — shown as fallback when /api/backends hasn't
// loaded yet or returns empty. Never render an empty device dropdown.
Expand All @@ -36,12 +37,32 @@ const { useState: useStateSM, useEffect: useEffectSM, useRef: useRefSM } = React
// Map a slot lifecycle state to a chip color class.
// online/ready/serving → green (ok); starting → amber (warn);
// error → red (err); offline/empty/anything else → neutral grey (base chip).
function stateChipClass(state) {
const s = String(state || "").toLowerCase();
if (["ready", "online", "loaded", "serving", "running"].includes(s)) return "chip ok";
if (["starting", "loading", "pending", "stopping"].includes(s)) return "chip warn";
if (["error", "failed", "broken"].includes(s)) return "chip err";
return "chip"; // offline / empty / unconfigured → neutral grey
//
// N1: accepts either a state string (lemond path, unchanged) or a full slot
// object. When given a slot object, delegates to stateChipClassForSlot()
// from slot-status.js which handles container runtime correctly via
// slotPhase(). The primitive string overload is kept for call sites that
// only have the state string — its behaviour is unchanged.
function stateChipClass(stateOrSlot) {
// Duck-type: if it's a string, keep original behaviour (lemond path).
if (typeof stateOrSlot === "string" || stateOrSlot == null) {
// STRING path = lemond, byte-identical to origin/main. Do NOT add
// warming/pulling/crashed here — that recolored lemond state strips
// (e.g. state="warming" must stay grey at the EditSlotDrawer strip).
// Container chips route through the slot-OBJECT overload only.
const s = String(stateOrSlot || "").toLowerCase();
if (["ready", "online", "loaded", "serving", "running"].includes(s)) return "chip ok";
if (["starting", "loading", "pending", "stopping"].includes(s)) return "chip warn";
if (["error", "failed", "broken"].includes(s)) return "chip err";
return "chip"; // offline / warming / empty / unconfigured → neutral grey
}
// Full slot object: delegate to the shared N1 helper.
// stateChipClassForSlot returns null for lemond slots (sentinel),
// in which case we fall back to the original string-based path.
const slot = stateOrSlot;
const fromPhase = stateChipClassForSlot(slot);
if (fromPhase !== null) return fromPhase;
return stateChipClass(slot.state);
}

// Map /api/models registry rows → the shape this file's swap popover and
Expand Down Expand Up @@ -869,6 +890,8 @@ function InlineSwapPopover({ slot, open, onClose, onPick }) {
const modelsQuery = useModels();
const hwQuery = useHardware();
if (!open) return null;

const isContainer = slot.runtime === "container";
const ramFreeGb = hwQuery.data?.ram?.free ?? 0;
const compatible = (modelsQuery.data ?? [])
.map(normalizeApiModel)
Expand All @@ -878,9 +901,40 @@ function InlineSwapPopover({ slot, open, onClose, onPick }) {
// don't offer them when swapping a non-rocm slot.
!(Array.isArray(m.tags) && m.tags.includes("rocmfp4") && slot.backend !== "rocm")
);

// N2: container swap = cold systemctl restart (NOT lemond hot /v1/load).
// Intercept onPick for container slots: show a confirm toast and fire
// the same onPick (which drives restart), so the parent card drives to
// "starting" state immediately. The parent's onSwapPick calls useSlotSwap
// which triggers a restart for container slots server-side.
const handlePick = (m) => {
if (isContainer) {
const name = slot.name;
const label = m.longName || m.id;
window.__hal0Toast && window.__hal0Toast(
`Restarting ${name} to load ${label} — ~model-load seconds`,
"info"
);
}
onPick(m);
onClose();
};

return (
<div className="swap-pop" onClick={e => e.stopPropagation()}>
<div className="swap-pop-h">Swap model · type {slot.type}</div>
{/* N2: container cold-restart notice in popover header */}
<div className="swap-pop-h">
Swap model · type {slot.type}
{isContainer && (
<span
className="chip"
style={{marginLeft: 8, fontSize: 9, color: "var(--warn)", borderColor: "var(--warn-line)", background: "var(--warn-soft)"}}
title="Container runtime — model swap requires a container restart (~model-load seconds)"
>
· cold restart
</span>
)}
</div>
{compatible.map(m => {
const isCur = slot.model_id === m.id;
const fits = ramFreeGb > parseSizeGB(m.size);
Expand All @@ -892,7 +946,7 @@ function InlineSwapPopover({ slot, open, onClose, onPick }) {
<div
key={m.id}
className={"swap-pop-item" + (isCur ? " cur" : "")}
onClick={() => { onPick(m); onClose(); }}
onClick={() => handlePick(m)}
>
<div className="nm">
{m.longName}
Expand All @@ -904,7 +958,7 @@ function InlineSwapPopover({ slot, open, onClose, onPick }) {
type="button"
className="swap-arrow"
aria-label={`Load ${m.longName || m.id}`}
onClick={e => { e.stopPropagation(); onPick(m); onClose(); }}
onClick={e => { e.stopPropagation(); handlePick(m); }}
>{Icons.chevR}</button>
</div>
);
Expand Down
Loading
Loading