stefanko-ch · stefanko-ch · May 27, 2026 · May 24, 2026 · May 24, 2026 · May 27, 2026
diff --git a/docs/stacks/litellm.md b/docs/stacks/litellm.md
@@ -104,14 +104,15 @@ All three must be non-empty or the deploy aborts (no silent auth-bypass).
 
 ### Ollama integration
 
-The LiteLLM compose joins the external `ollama-internal` network so it can reach `http://ollama:11434` directly without going through the public CF Tunnel route — fast and private. **The Ollama stack MUST be enabled** for LiteLLM to start: the compose declares `external: true` + `name: ollama-internal` on that network, and Docker will refuse to start the LiteLLM container if the network doesn't exist (error: `network ollama-internal not found`).
+The LiteLLM compose joins the external `ollama-internal` network so it can reach `http://ollama:11434` directly without going through the public CF Tunnel route — fast and private.
 
-If you want LiteLLM without Ollama (e.g. real-providers-only setup), you need TWO changes:
+**LiteLLM works whether or not Ollama is enabled.** The deploy pipeline (`compose_runner.run_compose_up`) pre-creates the `ollama-internal` network idempotently before `docker compose up` when `litellm` is in the enabled list. When Ollama is also enabled, its own compose joins the same network by name (`name: ollama-internal` pinned on both sides), and the cross-stack DNS lookup `http://ollama:11434` resolves. When Ollama is NOT enabled, LiteLLM still starts cleanly — only requests routed to a model whose `api_base` points at Ollama will fail.
 
-1. Remove the `ollama-internal` network declaration AND the `ollama-internal:` entry under `litellm.networks:` in `stacks/litellm/docker-compose.yml`
-2. Remove the `gpt-3.5-turbo` → Ollama route from `stacks/litellm/config.yaml.template` (otherwise the proxy serves the model name but routes to an unreachable backend)
+If you want to wire LiteLLM at real-provider keys only (no Ollama), the only edit needed is:
 
-Removing just the config route without the network change still results in container start failure.
+- Remove or comment out the `gpt-3.5-turbo` → Ollama route in `stacks/litellm/config.yaml.template` (otherwise the proxy serves the model name but the routed request to `http://ollama:11434` fails with a connection error). Add real-provider entries in its place — see "Option B" above.
+
+No edits to `stacks/litellm/docker-compose.yml` are required for the no-Ollama case.
 
 ### Persistence
 

diff --git a/src/nexus_deploy/compose_runner.py b/src/nexus_deploy/compose_runner.py
@@ -149,6 +149,7 @@ def render_remote_script(
     leaves: list[str],
     dify_storage_prep: bool = False,
     metabase_storage_prep: bool = False,
+    ollama_internal_network_prep: bool = False,
     stacks_dir: str = _REMOTE_STACKS_DIR,
     global_env: str = _REMOTE_GLOBAL_ENV,
 ) -> str:
@@ -197,6 +198,34 @@ def render_remote_script(
 chown -R 1001:1001 /mnt/nexus-data/dify/storage /mnt/nexus-data/dify/plugins
 """
 
+    # `ollama-internal` is the cross-stack bridge LiteLLM uses to reach
+    # the Ollama container by service name when both stacks are enabled.
+    # Both compose files (stacks/ollama, stacks/litellm) declare the
+    # network as `external: true` so neither owns the lifecycle —
+    # without a pre-existing network of that name, `docker compose up`
+    # aborts with "network ollama-internal declared as external, but
+    # could not be found" BEFORE the container is even created. We
+    # pre-create here, idempotently, whenever either stack is enabled.
+    #
+    # The symmetric `external: true` design (vs. having Ollama own the
+    # network as compose-managed) avoids a subtle race in the joint
+    # LiteLLM+Ollama case: parallel `docker compose up` for both
+    # parents would otherwise have one project try to create a network
+    # the other one expects to find, and Compose's tolerance for
+    # pre-existing networks varies by version.
+    #
+    # `docker network create --label` is idempotent enough for our
+    # purposes via the inspect-guard (exit 0 if exists, exit 1 if
+    # not — wrapped in a short-circuit `||`). The label lets ops
+    # tell apart nexus-managed networks from operator-created ones
+    # when troubleshooting.
+    ollama_internal_block = ""
+    if ollama_internal_network_prep:
+        ollama_internal_block = """
+docker network inspect ollama-internal >/dev/null 2>&1 || \\
+    docker network create --label managed-by=nexus-stack ollama-internal
+"""
+
     metabase_block = ""
     if metabase_storage_prep:
         # Metabase runs as uid 2000 (since v0.46 official image) and
@@ -234,7 +263,7 @@ def render_remote_script(
 
 PARENTS=({parents_q})
 LEAVES=({leaves_q})
-{dify_block}{metabase_block}
+{ollama_internal_block}{dify_block}{metabase_block}
 STARTED=0
 FAILED=0
 PIDS=()
@@ -332,6 +361,7 @@ def run_compose_up(
     host: str = "nexus",
     dify_storage_prep: bool | None = None,
     metabase_storage_prep: bool | None = None,
+    ollama_internal_network_prep: bool | None = None,
     script_runner: ScriptRunner | None = None,
 ) -> ComposeUpResult:
     """Render → exec → parse.
@@ -366,12 +396,18 @@ def run_compose_up(
     actual_metabase = (
         metabase_storage_prep if metabase_storage_prep is not None else "metabase" in enabled
     )
+    actual_ollama_internal = (
+        ollama_internal_network_prep
+        if ollama_internal_network_prep is not None
+        else ("litellm" in enabled or "ollama" in enabled)
+    )
 
     script = render_remote_script(
         parents=parents,
         leaves=leaves,
         dify_storage_prep=actual_dify,
         metabase_storage_prep=actual_metabase,
+        ollama_internal_network_prep=actual_ollama_internal,
     )
 
     run_script = script_runner or (lambda s: _remote.ssh_run_script(s, host=host))

diff --git a/stacks/litellm/docker-compose.yml b/stacks/litellm/docker-compose.yml
@@ -111,12 +111,15 @@ networks:
     driver: bridge
   # Cross-stack join into the ollama stack's internal bridge so
   # LiteLLM can reach `ollama:11434` directly without going through
-  # the public CF Tunnel route. Both sides pin the network to the
-  # global name `ollama-internal` (NOT the default project-prefixed
-  # `ollama_ollama-internal`) so the external reference here finds
-  # the actual created network. Requires the ollama stack to be
-  # enabled — if not, LiteLLM will fail to start with "network
-  # ollama-internal not found"; disable LiteLLM or enable Ollama.
+  # the public CF Tunnel route. Both stacks (here and stacks/ollama)
+  # declare this network as `external: true` + `name: ollama-internal`;
+  # the deploy pipeline's compose_runner pre-creates the network
+  # whenever either stack is enabled, so neither compose project owns
+  # the network's lifecycle. LiteLLM-alone: pre-create fires →
+  # external reference resolves → operator wires real-provider keys via
+  # config.yaml. Joint LiteLLM + Ollama: pre-create fires once →
+  # both stacks join the existing network → `http://ollama:11434`
+  # resolves cross-stack with no race on creation.
   ollama-internal:
     external: true
     name: ollama-internal
diff --git a/stacks/ollama/docker-compose.yml b/stacks/ollama/docker-compose.yml
@@ -75,8 +75,12 @@ networks:
   app-network:
     external: true
   ollama-internal:
-    # Pin the global name (NOT project-prefixed to `ollama_ollama-internal`)
-    # so cross-stack consumers like LiteLLM can declare
-    # `external: true` + `name: ollama-internal` and reliably join it.
-    driver: bridge
+    # Treated as external on both sides (here and in stacks/litellm). The
+    # deploy pipeline pre-creates `ollama-internal` whenever either stack
+    # is enabled (see compose_runner.render_remote_script's
+    # `ollama_internal_network_prep` block), so network ownership lives
+    # outside both compose projects. This keeps the joint Ollama+LiteLLM
+    # deployment deterministic — neither compose project tries to "own"
+    # the network and race on creation.
+    external: true
     name: ollama-internal
diff --git a/tests/unit/test_compose_runner.py b/tests/unit/test_compose_runner.py
@@ -426,6 +426,136 @@ def capture(script: str) -> subprocess.CompletedProcess[str]:
     assert "/mnt/nexus-data/metabase" not in captured_script["script"]
 
 
+# ---------------------------------------------------------------------------
+# ollama-internal cross-stack network prep (shared by ollama + litellm)
+# ---------------------------------------------------------------------------
+
+
+def test_render_ollama_internal_network_prep_only_when_flagged() -> None:
+    """Both `stacks/ollama` and `stacks/litellm` declare `ollama-internal`
+    as an `external: true` network. Without the network already created,
+    `docker compose up` aborts BEFORE the container is created. The
+    pre-compose block creates the network idempotently when either stack
+    is enabled (or omitted otherwise)."""
+    without = _render_default(ollama_internal_network_prep=False)
+    assert "ollama-internal" not in without
+    assert "docker network create" not in without
+
+    with_prep = _render_default(ollama_internal_network_prep=True)
+    assert "docker network inspect ollama-internal" in with_prep
+    assert "docker network create --label managed-by=nexus-stack ollama-internal" in with_prep
+
+
+def test_render_ollama_internal_network_prep_is_idempotent() -> None:
+    """The inspect-then-create guard short-circuits if the network
+    already exists. A bare `docker network create` would fail with
+    a non-zero exit on the second deploy under `set -euo pipefail`
+    and abort the entire compose-up loop.
+
+    Tests the full inspect→create chain as one contiguous expression
+    rather than just `||` presence: a future refactor that splits
+    the guard into two unrelated statements (e.g. `inspect; if [ $?
+    -ne 0 ]; then create; fi`) would lose the short-circuit semantics
+    under `set -e` and silently break idempotency. Whitespace is
+    normalised so the test isn't brittle to backslash-newline
+    continuation tweaks bash treats as one logical line.
+    """
+    script = _render_default(ollama_internal_network_prep=True)
+    # Normalise whitespace AND strip the bash line-continuation
+    # backslash (`\` followed by newline) so the substring matcher
+    # doesn't depend on the renderer's exact line-wrap choice.
+    normalised = " ".join(script.replace("\\\n", " ").split())
+    assert (
+        "docker network inspect ollama-internal >/dev/null 2>&1 || "
+        "docker network create --label managed-by=nexus-stack ollama-internal"
+    ) in normalised
+
+
+def test_run_compose_up_network_prep_default_when_litellm_in_enabled() -> None:
+    """ollama_internal_network_prep defaults to True iff 'litellm' OR
+    'ollama' is in enabled. Mirrors the dify/metabase storage-prep
+    default semantics."""
+    captured_script: dict[str, str] = {}
+
+    def capture(script: str) -> subprocess.CompletedProcess[str]:
+        captured_script["script"] = script
+        return subprocess.CompletedProcess(
+            args=["ssh"], returncode=0, stdout="RESULT started=1 failed=0", stderr=""
+        )
+
+    run_compose_up(["jupyter", "litellm"], script_runner=capture)
+    assert "docker network inspect ollama-internal" in captured_script["script"]
+
+
+def test_run_compose_up_network_prep_default_when_ollama_in_enabled() -> None:
+    """Ollama-only deployment (LiteLLM disabled) still needs the
+    pre-create because ollama's own compose declares the network as
+    `external: true` — symmetric ownership with litellm."""
+    captured_script: dict[str, str] = {}
+
+    def capture(script: str) -> subprocess.CompletedProcess[str]:
+        captured_script["script"] = script
+        return subprocess.CompletedProcess(
+            args=["ssh"], returncode=0, stdout="RESULT started=1 failed=0", stderr=""
+        )
+
+    run_compose_up(["jupyter", "ollama"], script_runner=capture)
+    assert "docker network inspect ollama-internal" in captured_script["script"]
+
+
+def test_run_compose_up_network_prep_renders_once_in_joint_case() -> None:
+    """Joint LiteLLM + Ollama deployment: only one pre-create block,
+    not duplicated. The `inspect || create` guard is idempotent at
+    runtime regardless, but rendering the block twice would be a
+    silent code smell — confirms the inference doesn't double-add."""
+    captured_script: dict[str, str] = {}
+
+    def capture(script: str) -> subprocess.CompletedProcess[str]:
+        captured_script["script"] = script
+        return subprocess.CompletedProcess(
+            args=["ssh"], returncode=0, stdout="RESULT started=2 failed=0", stderr=""
+        )
+
+    run_compose_up(["litellm", "ollama"], script_runner=capture)
+    assert (
+        captured_script["script"].count(
+            "docker network create --label managed-by=nexus-stack ollama-internal"
+        )
+        == 1
+    )
+
+
+def test_run_compose_up_network_prep_omitted_when_neither_in_enabled() -> None:
+    """Neither litellm nor ollama → no network-prep block (also no
+    spurious network created on stacks that don't need it)."""
+    captured_script: dict[str, str] = {}
+
+    def capture(script: str) -> subprocess.CompletedProcess[str]:
+        captured_script["script"] = script
+        return subprocess.CompletedProcess(
+            args=["ssh"], returncode=0, stdout="RESULT started=1 failed=0", stderr=""
+        )
+
+    run_compose_up(["jupyter"], script_runner=capture)
+    assert "ollama-internal" not in captured_script["script"]
+
+
+def test_run_compose_up_network_prep_explicit_override_beats_enabled_inference() -> None:
+    """Caller can force ollama_internal_network_prep=False even when
+    'litellm' or 'ollama' is in enabled — operator escape hatch if the
+    network handling needs to be deferred to a different mechanism."""
+    captured_script: dict[str, str] = {}
+
+    def capture(script: str) -> subprocess.CompletedProcess[str]:
+        captured_script["script"] = script
+        return subprocess.CompletedProcess(
+            args=["ssh"], returncode=0, stdout="RESULT started=1 failed=0", stderr=""
+        )
+
+    run_compose_up(["litellm"], script_runner=capture, ollama_internal_network_prep=False)
+    assert "ollama-internal" not in captured_script["script"]
+
+
 # ---------------------------------------------------------------------------
 # CLI integration
 # ---------------------------------------------------------------------------