Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions src/hal0/api/routes/slots.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,30 @@ async def _container_state_enrichment(request: Request) -> dict[str, dict[str, A
entry["container_status"] = container_status
entry["container_health"] = container_health

# Emit runtime / profile / image so the UI doesn't have to dig
# into metadata, and resolved_command so the drawer can show the
# real podman argv instead of fabricating flags client-side.
entry["runtime"] = "container"
profile_name = str(cfg.get("profile") or "")
entry["profile"] = profile_name
if profile_name:
try:
from hal0.config.loader import load_profiles_config

catalog = load_profiles_config()
prof = catalog.profile.get(profile_name)
entry["image"] = prof.image if prof else None
# resolved_command = llama-server argv starting from the image
from hal0.providers.container import resolved_command_for_slot

entry["resolved_command"] = resolved_command_for_slot(cfg)
except Exception:
entry["image"] = None
entry["resolved_command"] = None
else:
entry["image"] = None
entry["resolved_command"] = None

out[name] = entry
return out

Expand Down
53 changes: 52 additions & 1 deletion src/hal0/providers/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,4 +397,55 @@ def container_provider() -> ContainerProvider:
return _container_provider


__all__ = ["ContainerProvider", "container_provider"]
def resolved_command_for_slot(
slot_cfg: dict[str, Any],
model_path: str | None = None,
) -> list[str] | None:
"""Return the canonical llama-server argv for a container slot.

Used by the API layer (GET /api/slots + /config) to surface a
``resolved_command`` field without fabricating flags client-side.

Returns the podman run argv *starting from the image tag* — the
boilerplate podman preamble (--device, --group-add, --security-opt,
--volume, --publish) is omitted because:
a) it requires root to read GIDs (``resolve_gpu_group_ids``), and
b) it is not useful for debugging inference behaviour.

Returns ``None`` when the slot has no profile (not a container slot)
or the profile lookup fails.
"""
profile_name = str(slot_cfg.get("profile") or "")
if not profile_name:
return None
try:
profile = _resolve_profile(profile_name)
except (KeyError, Exception):
return None

flags_str = resolve_profile_flags(profile)
flag_tokens = shlex.split(flags_str) if flags_str.strip() else []

# port: may be at top-level or nested under [slot]
port = int(slot_cfg.get("port") or slot_cfg.get("slot", {}).get("port") or 0)
# model lives under [model] default (nested TOML table), not as a top-level string
model_table = slot_cfg.get("model") or {}
default_model = (
model_table.get("default", "") if isinstance(model_table, dict) else str(model_table)
)
effective_model = model_path or str(default_model or "")

argv: list[str] = [
profile.image,
"--host",
"0.0.0.0",
"--port",
str(port),
]
if effective_model:
argv += ["--model", effective_model]
argv.extend(flag_tokens)
return argv


__all__ = ["ContainerProvider", "container_provider", "resolved_command_for_slot"]
126 changes: 126 additions & 0 deletions tests/api/test_slots_container_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,129 @@ def test_get_slot_container_state_fields(
slot = r.json()
assert slot["container_status"] == "running"
assert slot["container_health"] is True


# ── runtime/profile/image/resolved_command enrichment (issue #658) ─────────────


def test_container_slot_has_runtime_profile_image_fields(
client_with_container_slot: TestClient,
) -> None:
"""Container slots must expose runtime/profile/image/resolved_command on /api/slots."""
from hal0.config.schema import ProfileConfig

fake_profile = ProfileConfig(
image="ghcr.io/hal0ai/amd-strix-halo-toolboxes:vulkan-radv-server",
flags="--flash-attn on -ngl 999",
mtp=False,
)
fake_catalog = MagicMock(profile={"vulkan-radv": fake_profile})
with (
patch(
"hal0.providers.container.ContainerProvider.is_active",
return_value=True,
),
patch(
"hal0.providers.container.ContainerProvider.health",
new_callable=AsyncMock,
return_value={"ok": True, "status": "healthy"},
),
# slots.py inline-imports load_profiles_config for the image field
patch(
"hal0.config.loader.load_profiles_config",
return_value=fake_catalog,
),
# container.py module-level import used by resolved_command_for_slot
patch(
"hal0.providers.container.load_profiles_config",
return_value=fake_catalog,
),
):
r = client_with_container_slot.get("/api/slots")
assert r.status_code == 200, r.text
by_name = {e["name"]: e for e in r.json()}
slot = by_name["gpu-chat"]

assert slot.get("runtime") == "container", "runtime must be 'container'"
assert slot.get("profile") == "vulkan-radv", "profile must be the slot's profile name"
assert slot.get("image") == fake_profile.image, "image must come from profile"
# resolved_command: list starting with the image tag
rc = slot.get("resolved_command")
assert rc is not None, "resolved_command must be present"
assert isinstance(rc, list), "resolved_command must be a list"
assert rc[0] == fake_profile.image, "resolved_command[0] must be the image"
# model token must be the string value from [model] default, not a dict repr
joined = " ".join(rc)
assert "--model llama-3b" in joined, (
f"resolved_command must contain '--model llama-3b' (got: {joined!r})"
)


def test_container_slot_resolved_command_includes_flags(
client_with_container_slot: TestClient,
) -> None:
"""resolved_command must include profile flags tokens."""
from hal0.config.schema import ProfileConfig

fake_profile = ProfileConfig(
image="ghcr.io/hal0ai/amd-strix-halo-toolboxes:vulkan-radv-server",
flags="--flash-attn on -ngl 999",
mtp=False,
)
fake_catalog = MagicMock(profile={"vulkan-radv": fake_profile})
with (
patch(
"hal0.providers.container.ContainerProvider.is_active",
return_value=False,
),
patch(
"subprocess.run",
return_value=MagicMock(stdout=b"inactive", returncode=3),
),
# slots.py inline-imports load_profiles_config for the image field
patch(
"hal0.config.loader.load_profiles_config",
return_value=fake_catalog,
),
# container.py module-level import used by resolved_command_for_slot
patch(
"hal0.providers.container.load_profiles_config",
return_value=fake_catalog,
),
):
r = client_with_container_slot.get("/api/slots")
assert r.status_code == 200, r.text
by_name = {e["name"]: e for e in r.json()}
slot = by_name["gpu-chat"]
rc = slot.get("resolved_command")
assert isinstance(rc, list)
# Flags should be spread into the command
joined = " ".join(rc)
assert "--flash-attn" in joined, "profile flags must appear in resolved_command"
assert "-ngl" in joined, "profile flags must appear in resolved_command"


def test_lemonade_slot_has_no_runtime_container_fields(
client_with_container_slot: TestClient,
lemonade_stub: dict[str, Any],
) -> None:
"""Lemonade slots must not have runtime='container' or profile/image/resolved_command."""
with (
patch(
"hal0.providers.container.ContainerProvider.is_active",
return_value=True,
),
patch(
"hal0.providers.container.ContainerProvider.health",
new_callable=AsyncMock,
return_value={"ok": True, "status": "healthy"},
),
):
r = client_with_container_slot.get("/api/slots")
assert r.status_code == 200, r.text
by_name = {e["name"]: e for e in r.json()}
lemond_slot = by_name["chat"]

# Lemonade slots must not inherit container enrichment fields
assert lemond_slot.get("runtime") != "container"
assert "resolved_command" not in lemond_slot
3 changes: 3 additions & 0 deletions ui/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,8 @@
"tailwindcss": "^4.2.2",
"typescript": "^5.6.3",
"vite": "^6.0.3"
},
"allowScripts": {
"esbuild@0.25.12": true
}
}
3 changes: 3 additions & 0 deletions ui/src/api/endpoints.ts
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ export const ENDPOINTS = {
upstreamTest: (name: string) =>
`/api/upstreams/${encodeURIComponent(name)}/test`,

// ── Profiles (container slot templates) ─────────────────────────
profiles: '/api/profiles',

// Install / FirstRun
installState: '/api/install/state',
firstrunState: '/api/firstrun/state',
Expand Down
1 change: 1 addition & 0 deletions ui/src/api/hooks/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ export * from './useAgents'
export * from './useMcp'
export * from './useMemory'
export * from './useSettings'
export * from './useProfiles'
24 changes: 24 additions & 0 deletions ui/src/api/hooks/useProfiles.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// hal0 v3 dashboard — profiles hook (issue #658).
//
// Fetches /api/profiles — the list of named container-slot profiles
// (image + bench-tuned flags) seeded by profiles.toml.

import { useQuery } from '@tanstack/react-query'
import { apiGet } from '../client'
import { ENDPOINTS } from '../endpoints'

export interface Profile {
name: string
image: string
flags: string
mtp: boolean
resolved_flags: string
}

export function useProfiles() {
return useQuery({
queryKey: ['profiles'],
queryFn: () => apiGet<Profile[]>(ENDPOINTS.profiles),
staleTime: 60_000,
})
}
8 changes: 8 additions & 0 deletions ui/src/api/hooks/useSlots.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ export interface Slot {
* False when stopped, starting (health probe not yet passing), or crashed.
* Absent for Lemonade slots. */
container_health?: boolean | null
/** Canonical llama-server argv for this container slot, starting from the
* image tag (omits the podman boilerplate). Populated by
* _container_state_enrichment() via resolved_command_for_slot() in
* container.py. Absent/null for Lemonade slots. */
resolved_command?: string[] | null

// ── Synthetic upstream-backed entries ───────────────────────────────
// /api/slots merges real lifecycle-managed slots with synthetic
Expand Down Expand Up @@ -247,6 +252,9 @@ function normalizeSlot(s: any): Slot {
// Absent for Lemonade slots; null here keeps the type honest.
container_status: s?.container_status ?? null,
container_health: s?.container_health ?? null,
// resolved_command: backend-provided llama-server argv for container slots
// (issue #658). Absent for Lemonade slots.
resolved_command: s?.resolved_command ?? null,
}
}

Expand Down
4 changes: 4 additions & 0 deletions ui/src/dash/chrome.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ function TopBar({ route, hostUptime = "14d 02:11", onBell, onCmdK, onMenu, menuO
agent: ["Tools", "Agent"],
settings: ["Configure", "Settings"],
connections: ["Network", "Connections"],
profiles: ["iGPU Slots", "Profiles"],
};
const [eyebrow, title] = labels[route] || ["", ""];
return (
Expand Down Expand Up @@ -173,6 +174,9 @@ function useNavItems() {
return [
{ id: "dashboard", label: "Dashboard", icon: Icons.dashboard },
{ id: "slots", label: "Slots", icon: Icons.slots, cnt: slotCount },
// issue #658 — Profiles: container-slot templates (image + bench flags).
// Sits under Slots as the natural companion for container runtime config.
{ id: "profiles", label: "Profiles", icon: Icons.hardware },
{ id: "models", label: "Models", icon: Icons.models, cnt: modelCount },
{ id: "logs", label: "Logs", icon: Icons.logs },
...(memoryEnabled ? [{ id: "agent", label: "Agent", icon: Icons.agent }] : []),
Expand Down
3 changes: 2 additions & 1 deletion ui/src/dash/main.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ const TWEAK_DEFAULTS = /*EDITMODE-BEGIN*/{
// We also accept "agents/mcp" as an alias so the canonical URL path stays
// readable (`/agents/mcp` from the spec). Any unknown head falls back to
// the dashboard.
const ROUTES = ["dashboard", "firstrun", "slots", "models", "logs", "agent", "settings", "mcp", "connections"];
const ROUTES = ["dashboard", "firstrun", "slots", "profiles", "models", "logs", "agent", "settings", "mcp", "connections"];
function parseRoute() {
const raw = (window.location.hash || "#dashboard").replace(/^#/, "");
const [path, qs] = raw.split("?");
Expand Down Expand Up @@ -180,6 +180,7 @@ function App() {
<div className="empty">The memory surface is disabled in this release.</div>
</div>
);
case "profiles": return <ProfilesView />;
case "mcp": return <McpView />;
case "settings": return <SettingsView />;
case "connections": return <ConnectionsView />;
Expand Down
Loading
Loading