diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 72d1167d..5eb3d805 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -93,19 +93,39 @@ is no longer valid — primarily for FLM model-tag namespace drift. doesn't import HTTP client code, doesn't know about models other than via the registry, and doesn't make assumptions about backends beyond the provider ABC. +- **Two orthogonal routing axes — do not conflate them.** + - `Dispatcher.dispatch()` is the **transport axis**: given a model id + (or path default), it resolves which upstream URL to forward to using + three ordered tiers — registry lookup → passthrough on warm caches → + cold-cache prefetch. If all three miss, it raises `NoRouteFound`. + It does not know about capability types or slot selection policy. + - `SlotManager.route_for_request()` (via `omni_router`) is the + **capability axis**: given a capability label (chat / embed / asr / + tts / image) it selects *which slot* is the right destination, then + calls `dispatch()` for the actual forwarding. It runs before + `dispatch()` and is what multi-modal "omni" requests use to fan out + across capability groups. Do **not** remove or bypass it — + `omni_router` / `route_for_request` are verified load-bearing for + omni's tool selection and dispatch (ADR-0022). + The legacy Tier-4 path/name heuristics (`dispatcher/proxy.py`) that + blurred these axes were retired in #624. Image-gen and embed models + must now have explicit registry bindings so `dispatch()` handles them + via Tier 1. - **Dispatcher is HTTP-only.** It does not start/stop slots. It reads - slot status from the slot manager and routes requests. If a slot is - offline, it returns a structured error; restarting is a separate API - call. -- **Providers are stateless.** Each provider (`LlamaServerProvider`, - `FLMProvider`, `MoonshineProvider`, `KokoroProvider`, - `ComfyUIProvider`) is a class with `build_env()`, `start_cmd()`, - `health()`, `infer()`. They don't hold connection state, don't manage - systemd, and don't share globals. One provider per backend type. - `FLMProvider` additionally probes `flm list -j` inside the toolbox + slot status from the slot manager (`SlotManager.state()`) and routes + requests. If a slot is offline, it returns a structured error; + restarting is a separate API call. +- **Providers are stateless.** Each live provider (`LlamaServerProvider`, + `FLMProvider`, `ComfyUIProvider`) is a class with `build_env()`, + `start_cmd()`, `health()`, `infer()`. They don't hold connection state, + don't manage systemd, and don't share globals. One provider per backend + type. `FLMProvider` additionally probes `flm list -j` inside the toolbox image to advertise its own model-tag namespace (`share/flm/model_list.json`) — it does **not** run arbitrary GGUFs - from the registry. + from the registry. `LemonadeProvider` is the primary slot lifecycle + driver (v0.2+); `LlamaServerProvider` and `FLMProvider` are retained for + Vulkan and NPU slots respectively. `MoonshineProvider` and + `KokoroProvider` were retired in #620 — lemond serves STT/TTS natively. - **The registry is the only source of truth for "what models exist."** Atomic TOML files under `/var/lib/hal0/registry/`. mtime-cached. Slot configs reference model IDs from the registry; if a model is deleted, diff --git a/src/hal0/dispatcher/proxy.py b/src/hal0/dispatcher/proxy.py deleted file mode 100644 index 5f54d0d7..00000000 --- a/src/hal0/dispatcher/proxy.py +++ /dev/null @@ -1,132 +0,0 @@ -"""Legacy proxy fallback for the dispatcher. - -Path + model-name heuristics that route a request to a slot when the -registry has nothing to say. Ported from haloai ``lib/proxy.py`` and kept -during v1.0 as the last-resort step in :class:`Dispatcher.dispatch`. - -PLAN.md §3 marks this for absorption into ``router.py`` post-v0.2. Do not -delete it until then — the v1 dispatcher resolution order explicitly ends -in "legacy fallback" and operator muscle memory ("slot named coding-1m" -addressing) depends on these heuristics. - -Port target: haloai ``lib/proxy.py`` (``resolve_slot`` only — the streaming -forwarder lives in :mod:`hal0.dispatcher.router`). -""" - -from __future__ import annotations - -from typing import Any - -from hal0.errors import Hal0Error -from hal0.upstreams.registry import Upstream, UpstreamRegistry - -# NOTE: revisit in Phase 5 — absorb into router.py after Dispatcher is stable. - -# Path fragments that pin a request to a specific slot regardless of model. -# Mirrors haloai lib/proxy.py:51-58 (embeddings + rerank both target embed). -_EMBED_PATHS = ("/embeddings", "/rerank") - -# Path fragments that pin a request to the image-gen slot (ComfyUI). The -# OpenAI shape is `/v1/images/generations` — when that hits the legacy -# fallback we don't want it routed to the chat slot. -_IMAGE_PATHS = ("/images/generations", "/images/edits", "/images/variations") - -# Substrings in the model name that pin to known slot roles. Order matters: -# the ":" (FLM tag-style id) check runs before the bare-name substring checks -# so that "qwen3.5:embed" still routes to the NPU rather than to embed. -_EMBED_NAME_HINTS = ("embed", "rerank") - -# Model id prefixes that pin to the image-gen slot. Curated catalogue uses -# these prefixes (sdxl-turbo, sd-1.5-..., flux-*). Anything matching -# these in the bare-model lookup goes to the `img` slot before legacy slot -# name resolution kicks in. -_IMAGE_NAME_PREFIXES = ("sdxl", "sd-1.5", "sd15", "flux") - - -class LegacyResolutionFailed(Hal0Error): - """Raised when the legacy path/name heuristics find no slot to serve a request. - - Carries a ``dispatch.legacy_unresolved`` code so the structured error - envelope distinguishes "nothing in registry AND nothing in legacy - fallback" from "registry binding pointed at an unknown upstream." - """ - - code = "dispatch.legacy_unresolved" - status = 404 - - -def resolve_slot( # TIER1 - path: str, - body: dict[str, Any] | None, - upstreams: UpstreamRegistry, -) -> Upstream: - """Resolve a request to a slot Upstream using path+name heuristics. - - Mirrors haloai ``lib/proxy.py:resolve_slot`` but returns a typed - :class:`Upstream` (or raises a typed error) instead of the old - ``(slot_name, port)`` tuple. - - Resolution rules (in order): - 1. ``/embeddings`` or ``/rerank`` in path → ``embed`` slot. - 2. ``/images/...`` in path → ``img`` slot (ComfyUI). - 3. Model id contains ``:`` (FLM tag-style) → ``npu`` slot. - 4. Model id starts with ``sdxl``/``sd-1.5``/``sd15``/``flux`` → ``img`` slot. - 5. Model id contains ``embed`` or ``rerank`` substring → ``embed`` slot. - 6. Model id exactly matches a registered slot upstream name (other - than ``primary``) → that slot. - 7. Fallback → ``primary`` slot. - - Args: - path: The original request path (e.g. "/v1/chat/completions"). - body: Parsed JSON body dict (may be None for GETs). - upstreams: Registry to resolve slot names against. - - Returns: - An :class:`Upstream` representing the slot to forward to. - - Raises: - LegacyResolutionFailed: If the heuristics select a slot name but no - matching slot Upstream is registered. Carries a - ``dispatch.legacy_unresolved`` code via the typed Hal0Error envelope. - """ - candidate: str | None = None - - # Rule 1 — path-based pin (embeddings/rerank). - if any(frag in path for frag in _EMBED_PATHS): - candidate = "embed" - # Rule 2 — image-generation path pins to the img slot. - elif any(frag in path for frag in _IMAGE_PATHS): - candidate = "img" - elif body: - model = body.get("model", "") - if isinstance(model, str) and model: - m = model.lower() - # Rule 3 — FLM tag format "name:tag" routes to NPU. - if ":" in model: - candidate = "npu" - # Rule 4 — image-gen model id prefix pin (sdxl-/sd-1.5-/flux-). - elif any(m.startswith(prefix) for prefix in _IMAGE_NAME_PREFIXES): - candidate = "img" - # Rule 5 — name-substring pin (embed/rerank). - elif any(hint in m for hint in _EMBED_NAME_HINTS): - candidate = "embed" - else: - # Rule 6 — explicit slot-name addressing. - slot_match = upstreams.get(m) - if slot_match is not None and slot_match.kind == "slot" and m != "primary": - candidate = m - - # Rule 7 — fallback default slot. - if candidate is None: - candidate = "primary" - - upstream = upstreams.get(candidate) - if upstream is None or upstream.kind != "slot": - raise LegacyResolutionFailed( - f"legacy fallback selected slot {candidate!r} but no matching slot upstream is registered", - details={"slot": candidate, "path": path}, - ) - return upstream - - -__all__ = ["LegacyResolutionFailed", "resolve_slot"] diff --git a/src/hal0/dispatcher/router.py b/src/hal0/dispatcher/router.py index 4b4d1983..d106a228 100644 --- a/src/hal0/dispatcher/router.py +++ b/src/hal0/dispatcher/router.py @@ -17,9 +17,10 @@ Tier 2), then re-check passthrough. The prefetch fanout is wrapped in :class:`SingleFlightGroup` (Tier 3) so 100 concurrent identical prefetches share a single upstream call. - 4. **legacy fallback** — :func:`hal0.dispatcher.proxy.resolve_slot` - path-and-name heuristics from haloai ``lib/proxy.py``. Kept until - v0.2. + +If all three tiers miss, :class:`NoRouteFound` is raised immediately. +The legacy path/name heuristics (``dispatcher/proxy.py``) were retired +in #624 — image-gen and embed models must have explicit registry bindings. Decision logging: every routing decision emits one structured log line to journald with ``SYSLOG_IDENTIFIER=hal0-dispatch`` (PLAN.md §5 Tier 2), @@ -43,7 +44,6 @@ import structlog from fastapi.responses import Response, StreamingResponse -from hal0.dispatcher.proxy import LegacyResolutionFailed, resolve_slot from hal0.dispatcher.single_flight import SingleFlightGroup from hal0.errors import Hal0Error from hal0.upstreams.registry import Upstream, UpstreamRegistry @@ -544,52 +544,13 @@ async def dispatch( self._log_decision(call, t0, cache_state="prefetched") return call - # ── Step 4: legacy heuristics ──────────────────────────────────── - try: # TIER1 — narrow exception handling; log + re-raise typed errors - slot_upstream = resolve_slot(path, body, self._upstreams) - except LegacyResolutionFailed as exc: - # Bubble the typed error up after logging the decision point. - log.warning( - "legacy fallback exhausted", - model=model_id, - path=path, - error=exc.message, - ) - raise NoRouteFound( - f"model {model_id!r} not found in registry, no upstream advertised it, " - f"and legacy slot resolution failed", - details={"model": model_id, "path": path, "legacy_error": exc.message}, - ) from exc - except Hal0Error: - # Typed errors are caller-meaningful: re-raise unchanged. - raise - except Exception as exc: # TIER1 — was: silent swallow at haloai dispatcher.py:291 - log.warning( - "legacy fallback raised unexpectedly", - model=model_id, - path=path, - error=str(exc), - error_type=type(exc).__name__, - ) - raise NoRouteFound( - f"model {model_id!r}: legacy slot resolution raised {type(exc).__name__}", - details={"model": model_id, "path": path, "error": str(exc)}, - ) from exc - - call = UpstreamCall( - upstream_name=slot_upstream.name, - target_url=_join_url(slot_upstream.url, path), - headers=self._build_headers(request, slot_upstream), - body=_remap_model(body, original_model), - streaming=streaming, - method=method, - resolved_model=original_model or model_id, - requested_model=original_model, - resolution_path=f"legacy_slot:{slot_upstream.name}", - slot_name=_slot_name_of(slot_upstream), + # Tiers 1-3 exhausted — no route found. The legacy path/name heuristics + # (proxy.resolve_slot) were retired in #624; image-gen and embed models + # must have explicit registry bindings. + raise NoRouteFound( + f"model {model_id!r} not found in registry and no upstream advertised it", + details={"model": model_id, "path": path}, ) - self._log_decision(call, t0, cache_state="legacy") - return call async def forward(self, call: UpstreamCall) -> Response: """Execute the HTTP forward and return a FastAPI Response. @@ -666,7 +627,7 @@ async def _ensure_slot_loaded_backend_aware(self, call: UpstreamCall) -> None: assert self._slot_manager is not None # narrowed by forward() slot_name = call.slot_name - current = self._slot_manager._current_state(slot_name) + current = self._slot_manager.state(slot_name) if current in (SlotState.READY, SlotState.SERVING, SlotState.IDLE): # Model is already loaded under whatever backend it loaded with; # nothing to do. (A declared≠actual drift is surfaced by the @@ -701,7 +662,7 @@ def _check_slot_ready_for_dispatch(self, call: UpstreamCall) -> None: from hal0.slots.state import SlotState assert self._slot_manager is not None # narrowed by caller - current = self._slot_manager._current_state(call.slot_name) + current = self._slot_manager.state(call.slot_name) if current in (SlotState.READY, SlotState.SERVING, SlotState.IDLE): return diff --git a/src/hal0/slots/manager.py b/src/hal0/slots/manager.py index 84489549..7d254685 100644 --- a/src/hal0/slots/manager.py +++ b/src/hal0/slots/manager.py @@ -304,6 +304,18 @@ def _ensure_known(self, name: str) -> None: # ── state machine ──────────────────────────────────────────────────────── + def state(self, name: str) -> SlotState: + """Return the current :class:`SlotState` for *name* (public API). + + Reads from the in-memory cache first; falls back to the on-disk + ``state.json`` if the slot has not been seen yet this process. + Returns :attr:`SlotState.OFFLINE` when no state record exists. + + Use this instead of the private ``_current_state`` — the Dispatcher + calls this method; internal manager code uses ``_current_state``. + """ + return self._current_state(name) + def _current_state(self, name: str) -> SlotState: rec = self._states.get(name) if rec is None: diff --git a/tests/api/test_v1_chat_slot_alias.py b/tests/api/test_v1_chat_slot_alias.py index be2440c9..e6677907 100644 --- a/tests/api/test_v1_chat_slot_alias.py +++ b/tests/api/test_v1_chat_slot_alias.py @@ -167,39 +167,6 @@ async def test_rewrite_is_noop_without_slot_manager() -> None: assert body["model"] == "primary" -# ── dispatcher / proxy non-regression ─────────────────────────────────────── - - -def test_resolve_slot_primary_still_falls_through_to_lemonade() -> None: - """``resolve_slot`` keeps the ``m != "primary"`` carve-out: a chat - request that reaches the legacy fallback selects ``primary`` and - (absent a real primary slot upstream) raises the typed legacy error, - which the dispatcher converts to NoRouteFound → lemonade fall-through. - No per-slot chat upstream is matched.""" - from hal0.dispatcher.proxy import LegacyResolutionFailed, resolve_slot - from hal0.upstreams.registry import Upstream, UpstreamRegistry - - reg = UpstreamRegistry() - # Only the composite hal0 upstream exists (no per-slot chat upstreams). - reg.upsert( - Upstream( - name="hal0", - kind="slot", - url="http://127.0.0.1:8080/v1", - slot_name=None, - auth_style="none", - ) - ) - with pytest.raises(LegacyResolutionFailed): - # model id form — not an alias, not a registered slot name → falls - # to the "primary" default which has no slot upstream → legacy error. - resolve_slot( - "/v1/chat/completions", - {"model": "hermes-4-14b-q5km", "messages": []}, - reg, - ) - - def _patch_alias(monkeypatch: pytest.MonkeyPatch) -> None: async def _fake(_sm: Any) -> dict[str, str]: return { diff --git a/tests/dispatcher/test_composite_dispatch.py b/tests/dispatcher/test_composite_dispatch.py index 8057c4bb..5b612e96 100644 --- a/tests/dispatcher/test_composite_dispatch.py +++ b/tests/dispatcher/test_composite_dispatch.py @@ -70,6 +70,9 @@ class _OfflineSlotManager: def __init__(self) -> None: self.serving_calls: list[str] = [] + def state(self, _slot_name: str) -> SlotState: + return SlotState.OFFLINE + def _current_state(self, _slot_name: str) -> SlotState: return SlotState.OFFLINE diff --git a/tests/dispatcher/test_image_routing.py b/tests/dispatcher/test_image_routing.py deleted file mode 100644 index 7da0264a..00000000 --- a/tests/dispatcher/test_image_routing.py +++ /dev/null @@ -1,85 +0,0 @@ -"""Routing rules for image-gen requests in the legacy fallback proxy. - -These tests cover the new ``/v1/images/*`` path pin + the SDXL/SD/Flux -model-id prefix pin Team K added to ``hal0.dispatcher.proxy.resolve_slot``. -The chat / embed / NPU rules are intentionally not re-tested here — they -have coverage in :mod:`tests.dispatcher.test_router`. -""" - -from __future__ import annotations - -import pytest - -from hal0.dispatcher.proxy import LegacyResolutionFailed, resolve_slot -from hal0.upstreams.registry import Upstream, UpstreamRegistry - - -def _registry_with_slots(*names: str) -> UpstreamRegistry: - reg = UpstreamRegistry() - for n in names: - reg.upsert( - Upstream( - name=n, - kind="slot", - url="http://127.0.0.1:8186/v1", - slot_name=n, - auth_style="none", - ) - ) - return reg - - -def test_images_generations_path_routes_to_img_slot() -> None: - reg = _registry_with_slots("primary", "img") - upstream = resolve_slot("/v1/images/generations", {"model": "sdxl-turbo"}, reg) - assert upstream.name == "img" - - -def test_sdxl_model_id_routes_to_img_even_without_image_path() -> None: - """A bare /v1/chat/completions with model='sdxl-turbo' must NOT hit primary.""" - reg = _registry_with_slots("primary", "img") - upstream = resolve_slot( - "/v1/chat/completions", - {"model": "sdxl-turbo", "messages": [{"role": "user", "content": "hi"}]}, - reg, - ) - assert upstream.name == "img" - - -def test_sd15_model_prefix_routes_to_img() -> None: - reg = _registry_with_slots("primary", "img") - upstream = resolve_slot( - "/v1/images/generations", - {"model": "sd-1.5-pruned-emaonly", "prompt": "x"}, - reg, - ) - assert upstream.name == "img" - - -def test_flux_model_prefix_routes_to_img() -> None: - reg = _registry_with_slots("primary", "img") - upstream = resolve_slot( - "/v1/images/generations", - {"model": "Flux-2-Klein-9B-GGUF", "prompt": "x"}, - reg, - ) - assert upstream.name == "img" - - -def test_chat_model_id_still_routes_to_primary() -> None: - """The image rules must not regress chat routing.""" - reg = _registry_with_slots("primary", "img") - upstream = resolve_slot( - "/v1/chat/completions", - {"model": "qwen3-4b", "messages": []}, - reg, - ) - assert upstream.name == "primary" - - -def test_image_path_without_img_slot_raises_typed_error() -> None: - """Path pin selects 'img', missing 'img' upstream → typed legacy error.""" - reg = _registry_with_slots("primary") - with pytest.raises(LegacyResolutionFailed) as exc: - resolve_slot("/v1/images/generations", {"model": "sdxl-turbo"}, reg) - assert exc.value.code == "dispatch.legacy_unresolved" diff --git a/tests/dispatcher/test_router.py b/tests/dispatcher/test_router.py index 9e939b7e..add3f171 100644 --- a/tests/dispatcher/test_router.py +++ b/tests/dispatcher/test_router.py @@ -1,11 +1,10 @@ """Unit tests for ``hal0.dispatcher.router.Dispatcher``. -Covers all four resolution paths from PLAN.md §3: +Covers all three resolution paths from PLAN.md §3 (#624 retired Tier-4): 1. registry — exact ModelRegistry binding 2. passthrough — upstream's cached /v1/models already has the id 3. cold-cache prefetch — fanout + recheck (Tier 2 timeout + Tier 3 single-flight) - 4. legacy fallback — path/name heuristics in proxy.py Plus the structured-envelope assertions for every ``dispatch.*`` error code (PLAN.md §5 Tier 1 — no silent swallowing). @@ -23,7 +22,6 @@ from starlette.requests import Request from hal0.api.middleware import error_codes -from hal0.dispatcher.proxy import LegacyResolutionFailed from hal0.dispatcher.router import ( Dispatcher, NoRouteFound, @@ -265,42 +263,49 @@ async def slow_fetch(_u: Upstream) -> list[str]: assert exc.value.code == "dispatch.no_route" -# ── 4. legacy fallback path ────────────────────────────────────────────────── +# ── 4. NoRouteFound when all three tiers miss (#624 retired legacy fallback) ── @pytest.mark.asyncio -async def test_legacy_fallback_routes_to_primary_when_nothing_else_matches() -> None: +async def test_no_route_when_nothing_matches_any_tier() -> None: + """When registry has no binding and no upstream advertises the model, + NoRouteFound is raised immediately (Tier-4 legacy fallback was retired in #624). + """ primary = make_slot("primary") upstreams = FakeUpstreamRegistry([primary]) models = FakeModelRegistry(routes={}) # no binding dispatcher = Dispatcher(upstream_registry=upstreams, model_registry=models) - call = await dispatcher.dispatch( - make_request(), - body={"model": "some-unknown-model"}, - ) - assert call.resolution_path == "legacy_slot:primary" - assert call.upstream_name == "primary" + with pytest.raises(NoRouteFound) as exc: + await dispatcher.dispatch( + make_request(), + body={"model": "some-unknown-model"}, + ) + assert exc.value.code == "dispatch.no_route" @pytest.mark.asyncio -async def test_legacy_fallback_routes_embeddings_to_embed_slot() -> None: +async def test_no_route_for_embeddings_path_without_registry_binding() -> None: + """#624: /embeddings path with no registry binding raises NoRouteFound. + Pre-#624 the legacy fallback would have picked the 'embed' slot by + path-pin; now the embed model must be in the registry. + """ embed = make_slot("embed", "http://127.0.0.1:8082/v1") upstreams = FakeUpstreamRegistry([embed]) - models = FakeModelRegistry(routes={}) + models = FakeModelRegistry(routes={}) # no binding, cache empty dispatcher = Dispatcher(upstream_registry=upstreams, model_registry=models) - call = await dispatcher.dispatch( - make_request(path="/v1/embeddings"), - body={"input": "hello"}, - ) - assert call.resolution_path == "legacy_slot:embed" - assert call.upstream_name == "embed" + with pytest.raises(NoRouteFound) as exc: + await dispatcher.dispatch( + make_request(path="/v1/embeddings"), + body={"input": "hello"}, + ) + assert exc.value.code == "dispatch.no_route" @pytest.mark.asyncio -async def test_legacy_fallback_with_no_primary_raises_typed_no_route() -> None: - """When even legacy resolution can't find a slot, raise typed NoRouteFound.""" +async def test_no_route_raises_typed_error_with_no_upstreams() -> None: + """NoRouteFound is raised when nothing is registered — same typed error.""" upstreams = FakeUpstreamRegistry([]) # nothing registered models = FakeModelRegistry(routes={}) @@ -308,7 +313,6 @@ async def test_legacy_fallback_with_no_primary_raises_typed_no_route() -> None: with pytest.raises(NoRouteFound) as exc: await dispatcher.dispatch(make_request(), body={"model": "anything"}) assert exc.value.code == "dispatch.no_route" - assert isinstance(exc.value.__cause__, LegacyResolutionFailed) # ── path defaults ──────────────────────────────────────────────────────────── @@ -423,8 +427,20 @@ def _capture(logger: Any, method_name: str, event_dict: dict[str, Any]) -> dict[ try: primary = make_slot("primary") upstreams = FakeUpstreamRegistry([primary]) - models = FakeModelRegistry(routes={}) - dispatcher = Dispatcher(upstream_registry=upstreams, model_registry=models) + # Give the registry a binding so Tier 1 resolves and fires the log. + # Provide is_online + cached_models so the slot appears online and the + # registry path emits a dispatch.decision log before returning. + models = FakeModelRegistry(routes={"anything": "primary"}) + + async def _always_online(_u: Any) -> bool: + return True + + dispatcher = Dispatcher( + upstream_registry=upstreams, + model_registry=models, + is_online=_always_online, + cached_models=lambda name: ["anything"] if name == "primary" else [], + ) await dispatcher.dispatch(make_request(), body={"model": "anything"}) finally: # Restore the dispatcher's prior cached bind (if any) before @@ -445,14 +461,19 @@ class _FakeSlotManager: """Minimal SlotManager surface for the forward() lazy-load gate. Tracks whether ``load`` was called and reports a fixed slot state via - ``_current_state`` so we can drive both the cold-miss and already-loaded - branches of ``_ensure_slot_loaded_backend_aware``. + ``state()`` (the public API introduced in #624) so we can drive both + the cold-miss and already-loaded branches of + ``_ensure_slot_loaded_backend_aware``. """ def __init__(self, state: Any) -> None: self._state = state self.load_calls: list[str] = [] + def state(self, name: str) -> Any: + return self._state + + # Keep the private alias so any lingering internal call doesn't crash. def _current_state(self, name: str) -> Any: return self._state diff --git a/tests/dispatcher/test_serving_integration.py b/tests/dispatcher/test_serving_integration.py index 926e91c3..ef875b6c 100644 --- a/tests/dispatcher/test_serving_integration.py +++ b/tests/dispatcher/test_serving_integration.py @@ -54,11 +54,16 @@ def serving(self, slot_name: str) -> _RecordingCtx: def in_flight_count(self, slot_name: str) -> int: return self._counts.get(slot_name, 0) + def state(self, _slot_name: str) -> SlotState: + # Public API introduced in #624 — Dispatcher now calls .state() + # instead of ._current_state(). Default READY so the existing + # serving-integration tests pass; tests for the gate construct + # the mock with the state they want to assert against. + return self._state + def _current_state(self, _slot_name: str) -> SlotState: - # Mirrors SlotManager._current_state — Dispatcher's swap-window - # gate calls this before forwarding. Default READY so the - # existing serving-integration tests pass; tests for the gate - # construct the mock with the state they want to assert against. + # Keep private alias for internal SlotManager code that still + # calls _current_state() directly. return self._state async def recover_evicted_slot(self, slot_name: str) -> None: diff --git a/tests/slots/test_manager.py b/tests/slots/test_manager.py index 7429f3c0..9b31b0d8 100644 --- a/tests/slots/test_manager.py +++ b/tests/slots/test_manager.py @@ -779,3 +779,40 @@ async def test_update_config_ctx_size_alias_wins_over_stale_context_size( assert "context_size = 32768" in cfg_text assert "4096" not in cfg_text assert "ctx_size = " not in cfg_text + + +# ── SlotManager.state() public API (#624) ──────────────────────────────────── + + +def test_state_returns_offline_for_unknown_slot(tmp_hal0_home: str) -> None: + """state() returns OFFLINE when no record exists (no file, no cache).""" + sm = SlotManager() + assert sm.state("nonexistent-slot") == SlotState.OFFLINE + + +async def test_state_returns_current_after_transition(tmp_hal0_home: str) -> None: + """state() reflects the slot state after a _transition call.""" + sm = SlotManager() + await sm._transition( + "primary", + SlotState.IDLE, + model_id="qwen3-4b-q4_k_m", + port=8081, + extra={"backend": "vulkan", "provider": "lemonade"}, + force=True, + ) + assert sm.state("primary") == SlotState.IDLE + + +async def test_state_agrees_with_current_state(tmp_hal0_home: str) -> None: + """state() and _current_state() return the same value (public delegates to private).""" + sm = SlotManager() + await sm._transition( + "primary", + SlotState.READY, + model_id="qwen3-4b-q4_k_m", + port=8081, + extra={"backend": "vulkan", "provider": "lemonade"}, + force=True, + ) + assert sm.state("primary") == sm._current_state("primary")