diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index 72d1167d..5eb3d805 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -93,19 +93,39 @@ is no longer valid — primarily for FLM model-tag namespace drift.
   doesn't import HTTP client code, doesn't know about models other than
   via the registry, and doesn't make assumptions about backends beyond
   the provider ABC.
+- **Two orthogonal routing axes — do not conflate them.**
+  - `Dispatcher.dispatch()` is the **transport axis**: given a model id
+    (or path default), it resolves which upstream URL to forward to using
+    three ordered tiers — registry lookup → passthrough on warm caches →
+    cold-cache prefetch.  If all three miss, it raises `NoRouteFound`.
+    It does not know about capability types or slot selection policy.
+  - `SlotManager.route_for_request()` (via `omni_router`) is the
+    **capability axis**: given a capability label (chat / embed / asr /
+    tts / image) it selects *which slot* is the right destination, then
+    calls `dispatch()` for the actual forwarding.  It runs before
+    `dispatch()` and is what multi-modal "omni" requests use to fan out
+    across capability groups.  Do **not** remove or bypass it —
+    `omni_router` / `route_for_request` are verified load-bearing for
+    omni's tool selection and dispatch (ADR-0022).
+  The legacy Tier-4 path/name heuristics (`dispatcher/proxy.py`) that
+  blurred these axes were retired in #624.  Image-gen and embed models
+  must now have explicit registry bindings so `dispatch()` handles them
+  via Tier 1.
 - **Dispatcher is HTTP-only.** It does not start/stop slots. It reads
-  slot status from the slot manager and routes requests. If a slot is
-  offline, it returns a structured error; restarting is a separate API
-  call.
-- **Providers are stateless.** Each provider (`LlamaServerProvider`,
-  `FLMProvider`, `MoonshineProvider`, `KokoroProvider`,
-  `ComfyUIProvider`) is a class with `build_env()`, `start_cmd()`,
-  `health()`, `infer()`. They don't hold connection state, don't manage
-  systemd, and don't share globals. One provider per backend type.
-  `FLMProvider` additionally probes `flm list -j` inside the toolbox
+  slot status from the slot manager (`SlotManager.state()`) and routes
+  requests. If a slot is offline, it returns a structured error;
+  restarting is a separate API call.
+- **Providers are stateless.** Each live provider (`LlamaServerProvider`,
+  `FLMProvider`, `ComfyUIProvider`) is a class with `build_env()`,
+  `start_cmd()`, `health()`, `infer()`. They don't hold connection state,
+  don't manage systemd, and don't share globals. One provider per backend
+  type. `FLMProvider` additionally probes `flm list -j` inside the toolbox
   image to advertise its own model-tag namespace
   (`share/flm/model_list.json`) — it does **not** run arbitrary GGUFs
-  from the registry.
+  from the registry.  `LemonadeProvider` is the primary slot lifecycle
+  driver (v0.2+); `LlamaServerProvider` and `FLMProvider` are retained for
+  Vulkan and NPU slots respectively.  `MoonshineProvider` and
+  `KokoroProvider` were retired in #620 — lemond serves STT/TTS natively.
 - **The registry is the only source of truth for "what models exist."**
   Atomic TOML files under `/var/lib/hal0/registry/`. mtime-cached. Slot
   configs reference model IDs from the registry; if a model is deleted,
diff --git a/src/hal0/dispatcher/proxy.py b/src/hal0/dispatcher/proxy.py
deleted file mode 100644
index 5f54d0d7..00000000
--- a/src/hal0/dispatcher/proxy.py
+++ /dev/null
@@ -1,132 +0,0 @@
-"""Legacy proxy fallback for the dispatcher.
-
-Path + model-name heuristics that route a request to a slot when the
-registry has nothing to say.  Ported from haloai ``lib/proxy.py`` and kept
-during v1.0 as the last-resort step in :class:`Dispatcher.dispatch`.
-
-PLAN.md §3 marks this for absorption into ``router.py`` post-v0.2.  Do not
-delete it until then — the v1 dispatcher resolution order explicitly ends
-in "legacy fallback" and operator muscle memory ("slot named coding-1m"
-addressing) depends on these heuristics.
-
-Port target: haloai ``lib/proxy.py`` (``resolve_slot`` only — the streaming
-forwarder lives in :mod:`hal0.dispatcher.router`).
-"""
-
-from __future__ import annotations
-
-from typing import Any
-
-from hal0.errors import Hal0Error
-from hal0.upstreams.registry import Upstream, UpstreamRegistry
-
-# NOTE: revisit in Phase 5 — absorb into router.py after Dispatcher is stable.
-
-# Path fragments that pin a request to a specific slot regardless of model.
-# Mirrors haloai lib/proxy.py:51-58 (embeddings + rerank both target embed).
-_EMBED_PATHS = ("/embeddings", "/rerank")
-
-# Path fragments that pin a request to the image-gen slot (ComfyUI). The
-# OpenAI shape is `/v1/images/generations` — when that hits the legacy
-# fallback we don't want it routed to the chat slot.
-_IMAGE_PATHS = ("/images/generations", "/images/edits", "/images/variations")
-
-# Substrings in the model name that pin to known slot roles.  Order matters:
-# the ":" (FLM tag-style id) check runs before the bare-name substring checks
-# so that "qwen3.5:embed" still routes to the NPU rather than to embed.
-_EMBED_NAME_HINTS = ("embed", "rerank")
-
-# Model id prefixes that pin to the image-gen slot. Curated catalogue uses
-# these prefixes (sdxl-turbo, sd-1.5-..., flux-*). Anything matching
-# these in the bare-model lookup goes to the `img` slot before legacy slot
-# name resolution kicks in.
-_IMAGE_NAME_PREFIXES = ("sdxl", "sd-1.5", "sd15", "flux")
-
-
-class LegacyResolutionFailed(Hal0Error):
-    """Raised when the legacy path/name heuristics find no slot to serve a request.
-
-    Carries a ``dispatch.legacy_unresolved`` code so the structured error
-    envelope distinguishes "nothing in registry AND nothing in legacy
-    fallback" from "registry binding pointed at an unknown upstream."
-    """
-
-    code = "dispatch.legacy_unresolved"
-    status = 404
-
-
-def resolve_slot(  # TIER1
-    path: str,
-    body: dict[str, Any] | None,
-    upstreams: UpstreamRegistry,
-) -> Upstream:
-    """Resolve a request to a slot Upstream using path+name heuristics.
-
-    Mirrors haloai ``lib/proxy.py:resolve_slot`` but returns a typed
-    :class:`Upstream` (or raises a typed error) instead of the old
-    ``(slot_name, port)`` tuple.
-
-    Resolution rules (in order):
-      1. ``/embeddings`` or ``/rerank`` in path → ``embed`` slot.
-      2. ``/images/...`` in path → ``img`` slot (ComfyUI).
-      3. Model id contains ``:`` (FLM tag-style) → ``npu`` slot.
-      4. Model id starts with ``sdxl``/``sd-1.5``/``sd15``/``flux`` → ``img`` slot.
-      5. Model id contains ``embed`` or ``rerank`` substring → ``embed`` slot.
-      6. Model id exactly matches a registered slot upstream name (other
-         than ``primary``) → that slot.
-      7. Fallback → ``primary`` slot.
-
-    Args:
-        path:       The original request path (e.g. "/v1/chat/completions").
-        body:       Parsed JSON body dict (may be None for GETs).
-        upstreams:  Registry to resolve slot names against.
-
-    Returns:
-        An :class:`Upstream` representing the slot to forward to.
-
-    Raises:
-        LegacyResolutionFailed: If the heuristics select a slot name but no
-            matching slot Upstream is registered.  Carries a
-            ``dispatch.legacy_unresolved`` code via the typed Hal0Error envelope.
-    """
-    candidate: str | None = None
-
-    # Rule 1 — path-based pin (embeddings/rerank).
-    if any(frag in path for frag in _EMBED_PATHS):
-        candidate = "embed"
-    # Rule 2 — image-generation path pins to the img slot.
-    elif any(frag in path for frag in _IMAGE_PATHS):
-        candidate = "img"
-    elif body:
-        model = body.get("model", "")
-        if isinstance(model, str) and model:
-            m = model.lower()
-            # Rule 3 — FLM tag format "name:tag" routes to NPU.
-            if ":" in model:
-                candidate = "npu"
-            # Rule 4 — image-gen model id prefix pin (sdxl-/sd-1.5-/flux-).
-            elif any(m.startswith(prefix) for prefix in _IMAGE_NAME_PREFIXES):
-                candidate = "img"
-            # Rule 5 — name-substring pin (embed/rerank).
-            elif any(hint in m for hint in _EMBED_NAME_HINTS):
-                candidate = "embed"
-            else:
-                # Rule 6 — explicit slot-name addressing.
-                slot_match = upstreams.get(m)
-                if slot_match is not None and slot_match.kind == "slot" and m != "primary":
-                    candidate = m
-
-    # Rule 7 — fallback default slot.
-    if candidate is None:
-        candidate = "primary"
-
-    upstream = upstreams.get(candidate)
-    if upstream is None or upstream.kind != "slot":
-        raise LegacyResolutionFailed(
-            f"legacy fallback selected slot {candidate!r} but no matching slot upstream is registered",
-            details={"slot": candidate, "path": path},
-        )
-    return upstream
-
-
-__all__ = ["LegacyResolutionFailed", "resolve_slot"]
diff --git a/src/hal0/dispatcher/router.py b/src/hal0/dispatcher/router.py
index 4b4d1983..d106a228 100644
--- a/src/hal0/dispatcher/router.py
+++ b/src/hal0/dispatcher/router.py
@@ -17,9 +17,10 @@
      Tier 2), then re-check passthrough.  The prefetch fanout is wrapped
      in :class:`SingleFlightGroup` (Tier 3) so 100 concurrent identical
      prefetches share a single upstream call.
-  4. **legacy fallback** — :func:`hal0.dispatcher.proxy.resolve_slot`
-     path-and-name heuristics from haloai ``lib/proxy.py``.  Kept until
-     v0.2.
+
+If all three tiers miss, :class:`NoRouteFound` is raised immediately.
+The legacy path/name heuristics (``dispatcher/proxy.py``) were retired
+in #624 — image-gen and embed models must have explicit registry bindings.
 
 Decision logging: every routing decision emits one structured log line
 to journald with ``SYSLOG_IDENTIFIER=hal0-dispatch`` (PLAN.md §5 Tier 2),
@@ -43,7 +44,6 @@
 import structlog
 from fastapi.responses import Response, StreamingResponse
 
-from hal0.dispatcher.proxy import LegacyResolutionFailed, resolve_slot
 from hal0.dispatcher.single_flight import SingleFlightGroup
 from hal0.errors import Hal0Error
 from hal0.upstreams.registry import Upstream, UpstreamRegistry
@@ -544,52 +544,13 @@ async def dispatch(
                     self._log_decision(call, t0, cache_state="prefetched")
                     return call
 
-        # ── Step 4: legacy heuristics ────────────────────────────────────
-        try:  # TIER1 — narrow exception handling; log + re-raise typed errors
-            slot_upstream = resolve_slot(path, body, self._upstreams)
-        except LegacyResolutionFailed as exc:
-            # Bubble the typed error up after logging the decision point.
-            log.warning(
-                "legacy fallback exhausted",
-                model=model_id,
-                path=path,
-                error=exc.message,
-            )
-            raise NoRouteFound(
-                f"model {model_id!r} not found in registry, no upstream advertised it, "
-                f"and legacy slot resolution failed",
-                details={"model": model_id, "path": path, "legacy_error": exc.message},
-            ) from exc
-        except Hal0Error:
-            # Typed errors are caller-meaningful: re-raise unchanged.
-            raise
-        except Exception as exc:  # TIER1 — was: silent swallow at haloai dispatcher.py:291
-            log.warning(
-                "legacy fallback raised unexpectedly",
-                model=model_id,
-                path=path,
-                error=str(exc),
-                error_type=type(exc).__name__,
-            )
-            raise NoRouteFound(
-                f"model {model_id!r}: legacy slot resolution raised {type(exc).__name__}",
-                details={"model": model_id, "path": path, "error": str(exc)},
-            ) from exc
-
-        call = UpstreamCall(
-            upstream_name=slot_upstream.name,
-            target_url=_join_url(slot_upstream.url, path),
-            headers=self._build_headers(request, slot_upstream),
-            body=_remap_model(body, original_model),
-            streaming=streaming,
-            method=method,
-            resolved_model=original_model or model_id,
-            requested_model=original_model,
-            resolution_path=f"legacy_slot:{slot_upstream.name}",
-            slot_name=_slot_name_of(slot_upstream),
+        # Tiers 1-3 exhausted — no route found.  The legacy path/name heuristics
+        # (proxy.resolve_slot) were retired in #624; image-gen and embed models
+        # must have explicit registry bindings.
+        raise NoRouteFound(
+            f"model {model_id!r} not found in registry and no upstream advertised it",
+            details={"model": model_id, "path": path},
         )
-        self._log_decision(call, t0, cache_state="legacy")
-        return call
 
     async def forward(self, call: UpstreamCall) -> Response:
         """Execute the HTTP forward and return a FastAPI Response.
@@ -666,7 +627,7 @@ async def _ensure_slot_loaded_backend_aware(self, call: UpstreamCall) -> None:
 
         assert self._slot_manager is not None  # narrowed by forward()
         slot_name = call.slot_name
-        current = self._slot_manager._current_state(slot_name)
+        current = self._slot_manager.state(slot_name)
         if current in (SlotState.READY, SlotState.SERVING, SlotState.IDLE):
             # Model is already loaded under whatever backend it loaded with;
             # nothing to do. (A declared≠actual drift is surfaced by the
@@ -701,7 +662,7 @@ def _check_slot_ready_for_dispatch(self, call: UpstreamCall) -> None:
         from hal0.slots.state import SlotState
 
         assert self._slot_manager is not None  # narrowed by caller
-        current = self._slot_manager._current_state(call.slot_name)
+        current = self._slot_manager.state(call.slot_name)
         if current in (SlotState.READY, SlotState.SERVING, SlotState.IDLE):
             return
 
diff --git a/src/hal0/slots/manager.py b/src/hal0/slots/manager.py
index 84489549..7d254685 100644
--- a/src/hal0/slots/manager.py
+++ b/src/hal0/slots/manager.py
@@ -304,6 +304,18 @@ def _ensure_known(self, name: str) -> None:
 
     # ── state machine ────────────────────────────────────────────────────────
 
+    def state(self, name: str) -> SlotState:
+        """Return the current :class:`SlotState` for *name* (public API).
+
+        Reads from the in-memory cache first; falls back to the on-disk
+        ``state.json`` if the slot has not been seen yet this process.
+        Returns :attr:`SlotState.OFFLINE` when no state record exists.
+
+        Use this instead of the private ``_current_state`` — the Dispatcher
+        calls this method; internal manager code uses ``_current_state``.
+        """
+        return self._current_state(name)
+
     def _current_state(self, name: str) -> SlotState:
         rec = self._states.get(name)
         if rec is None:
diff --git a/tests/api/test_v1_chat_slot_alias.py b/tests/api/test_v1_chat_slot_alias.py
index be2440c9..e6677907 100644
--- a/tests/api/test_v1_chat_slot_alias.py
+++ b/tests/api/test_v1_chat_slot_alias.py
@@ -167,39 +167,6 @@ async def test_rewrite_is_noop_without_slot_manager() -> None:
     assert body["model"] == "primary"
 
 
-# ── dispatcher / proxy non-regression ───────────────────────────────────────
-
-
-def test_resolve_slot_primary_still_falls_through_to_lemonade() -> None:
-    """``resolve_slot`` keeps the ``m != "primary"`` carve-out: a chat
-    request that reaches the legacy fallback selects ``primary`` and
-    (absent a real primary slot upstream) raises the typed legacy error,
-    which the dispatcher converts to NoRouteFound → lemonade fall-through.
-    No per-slot chat upstream is matched."""
-    from hal0.dispatcher.proxy import LegacyResolutionFailed, resolve_slot
-    from hal0.upstreams.registry import Upstream, UpstreamRegistry
-
-    reg = UpstreamRegistry()
-    # Only the composite hal0 upstream exists (no per-slot chat upstreams).
-    reg.upsert(
-        Upstream(
-            name="hal0",
-            kind="slot",
-            url="http://127.0.0.1:8080/v1",
-            slot_name=None,
-            auth_style="none",
-        )
-    )
-    with pytest.raises(LegacyResolutionFailed):
-        # model id form — not an alias, not a registered slot name → falls
-        # to the "primary" default which has no slot upstream → legacy error.
-        resolve_slot(
-            "/v1/chat/completions",
-            {"model": "hermes-4-14b-q5km", "messages": []},
-            reg,
-        )
-
-
 def _patch_alias(monkeypatch: pytest.MonkeyPatch) -> None:
     async def _fake(_sm: Any) -> dict[str, str]:
         return {
diff --git a/tests/dispatcher/test_composite_dispatch.py b/tests/dispatcher/test_composite_dispatch.py
index 8057c4bb..5b612e96 100644
--- a/tests/dispatcher/test_composite_dispatch.py
+++ b/tests/dispatcher/test_composite_dispatch.py
@@ -70,6 +70,9 @@ class _OfflineSlotManager:
     def __init__(self) -> None:
         self.serving_calls: list[str] = []
 
+    def state(self, _slot_name: str) -> SlotState:
+        return SlotState.OFFLINE
+
     def _current_state(self, _slot_name: str) -> SlotState:
         return SlotState.OFFLINE
 
diff --git a/tests/dispatcher/test_image_routing.py b/tests/dispatcher/test_image_routing.py
deleted file mode 100644
index 7da0264a..00000000
--- a/tests/dispatcher/test_image_routing.py
+++ /dev/null
@@ -1,85 +0,0 @@
-"""Routing rules for image-gen requests in the legacy fallback proxy.
-
-These tests cover the new ``/v1/images/*`` path pin + the SDXL/SD/Flux
-model-id prefix pin Team K added to ``hal0.dispatcher.proxy.resolve_slot``.
-The chat / embed / NPU rules are intentionally not re-tested here — they
-have coverage in :mod:`tests.dispatcher.test_router`.
-"""
-
-from __future__ import annotations
-
-import pytest
-
-from hal0.dispatcher.proxy import LegacyResolutionFailed, resolve_slot
-from hal0.upstreams.registry import Upstream, UpstreamRegistry
-
-
-def _registry_with_slots(*names: str) -> UpstreamRegistry:
-    reg = UpstreamRegistry()
-    for n in names:
-        reg.upsert(
-            Upstream(
-                name=n,
-                kind="slot",
-                url="http://127.0.0.1:8186/v1",
-                slot_name=n,
-                auth_style="none",
-            )
-        )
-    return reg
-
-
-def test_images_generations_path_routes_to_img_slot() -> None:
-    reg = _registry_with_slots("primary", "img")
-    upstream = resolve_slot("/v1/images/generations", {"model": "sdxl-turbo"}, reg)
-    assert upstream.name == "img"
-
-
-def test_sdxl_model_id_routes_to_img_even_without_image_path() -> None:
-    """A bare /v1/chat/completions with model='sdxl-turbo' must NOT hit primary."""
-    reg = _registry_with_slots("primary", "img")
-    upstream = resolve_slot(
-        "/v1/chat/completions",
-        {"model": "sdxl-turbo", "messages": [{"role": "user", "content": "hi"}]},
-        reg,
-    )
-    assert upstream.name == "img"
-
-
-def test_sd15_model_prefix_routes_to_img() -> None:
-    reg = _registry_with_slots("primary", "img")
-    upstream = resolve_slot(
-        "/v1/images/generations",
-        {"model": "sd-1.5-pruned-emaonly", "prompt": "x"},
-        reg,
-    )
-    assert upstream.name == "img"
-
-
-def test_flux_model_prefix_routes_to_img() -> None:
-    reg = _registry_with_slots("primary", "img")
-    upstream = resolve_slot(
-        "/v1/images/generations",
-        {"model": "Flux-2-Klein-9B-GGUF", "prompt": "x"},
-        reg,
-    )
-    assert upstream.name == "img"
-
-
-def test_chat_model_id_still_routes_to_primary() -> None:
-    """The image rules must not regress chat routing."""
-    reg = _registry_with_slots("primary", "img")
-    upstream = resolve_slot(
-        "/v1/chat/completions",
-        {"model": "qwen3-4b", "messages": []},
-        reg,
-    )
-    assert upstream.name == "primary"
-
-
-def test_image_path_without_img_slot_raises_typed_error() -> None:
-    """Path pin selects 'img', missing 'img' upstream → typed legacy error."""
-    reg = _registry_with_slots("primary")
-    with pytest.raises(LegacyResolutionFailed) as exc:
-        resolve_slot("/v1/images/generations", {"model": "sdxl-turbo"}, reg)
-    assert exc.value.code == "dispatch.legacy_unresolved"
diff --git a/tests/dispatcher/test_router.py b/tests/dispatcher/test_router.py
index 9e939b7e..add3f171 100644
--- a/tests/dispatcher/test_router.py
+++ b/tests/dispatcher/test_router.py
@@ -1,11 +1,10 @@
 """Unit tests for ``hal0.dispatcher.router.Dispatcher``.
 
-Covers all four resolution paths from PLAN.md §3:
+Covers all three resolution paths from PLAN.md §3 (#624 retired Tier-4):
 
     1. registry            — exact ModelRegistry binding
     2. passthrough         — upstream's cached /v1/models already has the id
     3. cold-cache prefetch — fanout + recheck (Tier 2 timeout + Tier 3 single-flight)
-    4. legacy fallback     — path/name heuristics in proxy.py
 
 Plus the structured-envelope assertions for every ``dispatch.*`` error code
 (PLAN.md §5 Tier 1 — no silent swallowing).
@@ -23,7 +22,6 @@
 from starlette.requests import Request
 
 from hal0.api.middleware import error_codes
-from hal0.dispatcher.proxy import LegacyResolutionFailed
 from hal0.dispatcher.router import (
     Dispatcher,
     NoRouteFound,
@@ -265,42 +263,49 @@ async def slow_fetch(_u: Upstream) -> list[str]:
     assert exc.value.code == "dispatch.no_route"
 
 
-# ── 4. legacy fallback path ──────────────────────────────────────────────────
+# ── 4. NoRouteFound when all three tiers miss (#624 retired legacy fallback) ──
 
 
 @pytest.mark.asyncio
-async def test_legacy_fallback_routes_to_primary_when_nothing_else_matches() -> None:
+async def test_no_route_when_nothing_matches_any_tier() -> None:
+    """When registry has no binding and no upstream advertises the model,
+    NoRouteFound is raised immediately (Tier-4 legacy fallback was retired in #624).
+    """
     primary = make_slot("primary")
     upstreams = FakeUpstreamRegistry([primary])
     models = FakeModelRegistry(routes={})  # no binding
 
     dispatcher = Dispatcher(upstream_registry=upstreams, model_registry=models)
-    call = await dispatcher.dispatch(
-        make_request(),
-        body={"model": "some-unknown-model"},
-    )
-    assert call.resolution_path == "legacy_slot:primary"
-    assert call.upstream_name == "primary"
+    with pytest.raises(NoRouteFound) as exc:
+        await dispatcher.dispatch(
+            make_request(),
+            body={"model": "some-unknown-model"},
+        )
+    assert exc.value.code == "dispatch.no_route"
 
 
 @pytest.mark.asyncio
-async def test_legacy_fallback_routes_embeddings_to_embed_slot() -> None:
+async def test_no_route_for_embeddings_path_without_registry_binding() -> None:
+    """#624: /embeddings path with no registry binding raises NoRouteFound.
+    Pre-#624 the legacy fallback would have picked the 'embed' slot by
+    path-pin; now the embed model must be in the registry.
+    """
     embed = make_slot("embed", "http://127.0.0.1:8082/v1")
     upstreams = FakeUpstreamRegistry([embed])
-    models = FakeModelRegistry(routes={})
+    models = FakeModelRegistry(routes={})  # no binding, cache empty
 
     dispatcher = Dispatcher(upstream_registry=upstreams, model_registry=models)
-    call = await dispatcher.dispatch(
-        make_request(path="/v1/embeddings"),
-        body={"input": "hello"},
-    )
-    assert call.resolution_path == "legacy_slot:embed"
-    assert call.upstream_name == "embed"
+    with pytest.raises(NoRouteFound) as exc:
+        await dispatcher.dispatch(
+            make_request(path="/v1/embeddings"),
+            body={"input": "hello"},
+        )
+    assert exc.value.code == "dispatch.no_route"
 
 
 @pytest.mark.asyncio
-async def test_legacy_fallback_with_no_primary_raises_typed_no_route() -> None:
-    """When even legacy resolution can't find a slot, raise typed NoRouteFound."""
+async def test_no_route_raises_typed_error_with_no_upstreams() -> None:
+    """NoRouteFound is raised when nothing is registered — same typed error."""
     upstreams = FakeUpstreamRegistry([])  # nothing registered
     models = FakeModelRegistry(routes={})
 
@@ -308,7 +313,6 @@ async def test_legacy_fallback_with_no_primary_raises_typed_no_route() -> None:
     with pytest.raises(NoRouteFound) as exc:
         await dispatcher.dispatch(make_request(), body={"model": "anything"})
     assert exc.value.code == "dispatch.no_route"
-    assert isinstance(exc.value.__cause__, LegacyResolutionFailed)
 
 
 # ── path defaults ────────────────────────────────────────────────────────────
@@ -423,8 +427,20 @@ def _capture(logger: Any, method_name: str, event_dict: dict[str, Any]) -> dict[
     try:
         primary = make_slot("primary")
         upstreams = FakeUpstreamRegistry([primary])
-        models = FakeModelRegistry(routes={})
-        dispatcher = Dispatcher(upstream_registry=upstreams, model_registry=models)
+        # Give the registry a binding so Tier 1 resolves and fires the log.
+        # Provide is_online + cached_models so the slot appears online and the
+        # registry path emits a dispatch.decision log before returning.
+        models = FakeModelRegistry(routes={"anything": "primary"})
+
+        async def _always_online(_u: Any) -> bool:
+            return True
+
+        dispatcher = Dispatcher(
+            upstream_registry=upstreams,
+            model_registry=models,
+            is_online=_always_online,
+            cached_models=lambda name: ["anything"] if name == "primary" else [],
+        )
         await dispatcher.dispatch(make_request(), body={"model": "anything"})
     finally:
         # Restore the dispatcher's prior cached bind (if any) before
@@ -445,14 +461,19 @@ class _FakeSlotManager:
     """Minimal SlotManager surface for the forward() lazy-load gate.
 
     Tracks whether ``load`` was called and reports a fixed slot state via
-    ``_current_state`` so we can drive both the cold-miss and already-loaded
-    branches of ``_ensure_slot_loaded_backend_aware``.
+    ``state()`` (the public API introduced in #624) so we can drive both
+    the cold-miss and already-loaded branches of
+    ``_ensure_slot_loaded_backend_aware``.
     """
 
     def __init__(self, state: Any) -> None:
         self._state = state
         self.load_calls: list[str] = []
 
+    def state(self, name: str) -> Any:
+        return self._state
+
+    # Keep the private alias so any lingering internal call doesn't crash.
     def _current_state(self, name: str) -> Any:
         return self._state
 
diff --git a/tests/dispatcher/test_serving_integration.py b/tests/dispatcher/test_serving_integration.py
index 926e91c3..ef875b6c 100644
--- a/tests/dispatcher/test_serving_integration.py
+++ b/tests/dispatcher/test_serving_integration.py
@@ -54,11 +54,16 @@ def serving(self, slot_name: str) -> _RecordingCtx:
     def in_flight_count(self, slot_name: str) -> int:
         return self._counts.get(slot_name, 0)
 
+    def state(self, _slot_name: str) -> SlotState:
+        # Public API introduced in #624 — Dispatcher now calls .state()
+        # instead of ._current_state().  Default READY so the existing
+        # serving-integration tests pass; tests for the gate construct
+        # the mock with the state they want to assert against.
+        return self._state
+
     def _current_state(self, _slot_name: str) -> SlotState:
-        # Mirrors SlotManager._current_state — Dispatcher's swap-window
-        # gate calls this before forwarding.  Default READY so the
-        # existing serving-integration tests pass; tests for the gate
-        # construct the mock with the state they want to assert against.
+        # Keep private alias for internal SlotManager code that still
+        # calls _current_state() directly.
         return self._state
 
     async def recover_evicted_slot(self, slot_name: str) -> None:
diff --git a/tests/slots/test_manager.py b/tests/slots/test_manager.py
index 7429f3c0..9b31b0d8 100644
--- a/tests/slots/test_manager.py
+++ b/tests/slots/test_manager.py
@@ -779,3 +779,40 @@ async def test_update_config_ctx_size_alias_wins_over_stale_context_size(
     assert "context_size = 32768" in cfg_text
     assert "4096" not in cfg_text
     assert "ctx_size = " not in cfg_text
+
+
+# ── SlotManager.state() public API (#624) ────────────────────────────────────
+
+
+def test_state_returns_offline_for_unknown_slot(tmp_hal0_home: str) -> None:
+    """state() returns OFFLINE when no record exists (no file, no cache)."""
+    sm = SlotManager()
+    assert sm.state("nonexistent-slot") == SlotState.OFFLINE
+
+
+async def test_state_returns_current_after_transition(tmp_hal0_home: str) -> None:
+    """state() reflects the slot state after a _transition call."""
+    sm = SlotManager()
+    await sm._transition(
+        "primary",
+        SlotState.IDLE,
+        model_id="qwen3-4b-q4_k_m",
+        port=8081,
+        extra={"backend": "vulkan", "provider": "lemonade"},
+        force=True,
+    )
+    assert sm.state("primary") == SlotState.IDLE
+
+
+async def test_state_agrees_with_current_state(tmp_hal0_home: str) -> None:
+    """state() and _current_state() return the same value (public delegates to private)."""
+    sm = SlotManager()
+    await sm._transition(
+        "primary",
+        SlotState.READY,
+        model_id="qwen3-4b-q4_k_m",
+        port=8081,
+        extra={"backend": "vulkan", "provider": "lemonade"},
+        force=True,
+    )
+    assert sm.state("primary") == sm._current_state("primary")