diff --git a/docs/stacks/litellm.md b/docs/stacks/litellm.md index d2d89457..31d9eeae 100644 --- a/docs/stacks/litellm.md +++ b/docs/stacks/litellm.md @@ -104,14 +104,15 @@ All three must be non-empty or the deploy aborts (no silent auth-bypass). ### Ollama integration -The LiteLLM compose joins the external `ollama-internal` network so it can reach `http://ollama:11434` directly without going through the public CF Tunnel route — fast and private. **The Ollama stack MUST be enabled** for LiteLLM to start: the compose declares `external: true` + `name: ollama-internal` on that network, and Docker will refuse to start the LiteLLM container if the network doesn't exist (error: `network ollama-internal not found`). +The LiteLLM compose joins the external `ollama-internal` network so it can reach `http://ollama:11434` directly without going through the public CF Tunnel route — fast and private. -If you want LiteLLM without Ollama (e.g. real-providers-only setup), you need TWO changes: +**LiteLLM works whether or not Ollama is enabled.** The deploy pipeline (`compose_runner.run_compose_up`) pre-creates the `ollama-internal` network idempotently before `docker compose up` when `litellm` is in the enabled list. When Ollama is also enabled, its own compose joins the same network by name (`name: ollama-internal` pinned on both sides), and the cross-stack DNS lookup `http://ollama:11434` resolves. When Ollama is NOT enabled, LiteLLM still starts cleanly — only requests routed to a model whose `api_base` points at Ollama will fail. -1. Remove the `ollama-internal` network declaration AND the `ollama-internal:` entry under `litellm.networks:` in `stacks/litellm/docker-compose.yml` -2. Remove the `gpt-3.5-turbo` → Ollama route from `stacks/litellm/config.yaml.template` (otherwise the proxy serves the model name but routes to an unreachable backend) +If you want to wire LiteLLM at real-provider keys only (no Ollama), the only edit needed is: -Removing just the config route without the network change still results in container start failure. +- Remove or comment out the `gpt-3.5-turbo` → Ollama route in `stacks/litellm/config.yaml.template` (otherwise the proxy serves the model name but the routed request to `http://ollama:11434` fails with a connection error). Add real-provider entries in its place — see "Option B" above. + +No edits to `stacks/litellm/docker-compose.yml` are required for the no-Ollama case. ### Persistence diff --git a/src/nexus_deploy/compose_runner.py b/src/nexus_deploy/compose_runner.py index 711d6035..8647e9b0 100644 --- a/src/nexus_deploy/compose_runner.py +++ b/src/nexus_deploy/compose_runner.py @@ -149,6 +149,7 @@ def render_remote_script( leaves: list[str], dify_storage_prep: bool = False, metabase_storage_prep: bool = False, + ollama_internal_network_prep: bool = False, stacks_dir: str = _REMOTE_STACKS_DIR, global_env: str = _REMOTE_GLOBAL_ENV, ) -> str: @@ -197,6 +198,34 @@ def render_remote_script( chown -R 1001:1001 /mnt/nexus-data/dify/storage /mnt/nexus-data/dify/plugins """ + # `ollama-internal` is the cross-stack bridge LiteLLM uses to reach + # the Ollama container by service name when both stacks are enabled. + # Both compose files (stacks/ollama, stacks/litellm) declare the + # network as `external: true` so neither owns the lifecycle — + # without a pre-existing network of that name, `docker compose up` + # aborts with "network ollama-internal declared as external, but + # could not be found" BEFORE the container is even created. We + # pre-create here, idempotently, whenever either stack is enabled. + # + # The symmetric `external: true` design (vs. having Ollama own the + # network as compose-managed) avoids a subtle race in the joint + # LiteLLM+Ollama case: parallel `docker compose up` for both + # parents would otherwise have one project try to create a network + # the other one expects to find, and Compose's tolerance for + # pre-existing networks varies by version. + # + # `docker network create --label` is idempotent enough for our + # purposes via the inspect-guard (exit 0 if exists, exit 1 if + # not — wrapped in a short-circuit `||`). The label lets ops + # tell apart nexus-managed networks from operator-created ones + # when troubleshooting. + ollama_internal_block = "" + if ollama_internal_network_prep: + ollama_internal_block = """ +docker network inspect ollama-internal >/dev/null 2>&1 || \\ + docker network create --label managed-by=nexus-stack ollama-internal +""" + metabase_block = "" if metabase_storage_prep: # Metabase runs as uid 2000 (since v0.46 official image) and @@ -234,7 +263,7 @@ def render_remote_script( PARENTS=({parents_q}) LEAVES=({leaves_q}) -{dify_block}{metabase_block} +{ollama_internal_block}{dify_block}{metabase_block} STARTED=0 FAILED=0 PIDS=() @@ -332,6 +361,7 @@ def run_compose_up( host: str = "nexus", dify_storage_prep: bool | None = None, metabase_storage_prep: bool | None = None, + ollama_internal_network_prep: bool | None = None, script_runner: ScriptRunner | None = None, ) -> ComposeUpResult: """Render → exec → parse. @@ -366,12 +396,18 @@ def run_compose_up( actual_metabase = ( metabase_storage_prep if metabase_storage_prep is not None else "metabase" in enabled ) + actual_ollama_internal = ( + ollama_internal_network_prep + if ollama_internal_network_prep is not None + else ("litellm" in enabled or "ollama" in enabled) + ) script = render_remote_script( parents=parents, leaves=leaves, dify_storage_prep=actual_dify, metabase_storage_prep=actual_metabase, + ollama_internal_network_prep=actual_ollama_internal, ) run_script = script_runner or (lambda s: _remote.ssh_run_script(s, host=host)) diff --git a/stacks/litellm/docker-compose.yml b/stacks/litellm/docker-compose.yml index 4571aa35..0e8cfabb 100644 --- a/stacks/litellm/docker-compose.yml +++ b/stacks/litellm/docker-compose.yml @@ -111,12 +111,15 @@ networks: driver: bridge # Cross-stack join into the ollama stack's internal bridge so # LiteLLM can reach `ollama:11434` directly without going through - # the public CF Tunnel route. Both sides pin the network to the - # global name `ollama-internal` (NOT the default project-prefixed - # `ollama_ollama-internal`) so the external reference here finds - # the actual created network. Requires the ollama stack to be - # enabled — if not, LiteLLM will fail to start with "network - # ollama-internal not found"; disable LiteLLM or enable Ollama. + # the public CF Tunnel route. Both stacks (here and stacks/ollama) + # declare this network as `external: true` + `name: ollama-internal`; + # the deploy pipeline's compose_runner pre-creates the network + # whenever either stack is enabled, so neither compose project owns + # the network's lifecycle. LiteLLM-alone: pre-create fires → + # external reference resolves → operator wires real-provider keys via + # config.yaml. Joint LiteLLM + Ollama: pre-create fires once → + # both stacks join the existing network → `http://ollama:11434` + # resolves cross-stack with no race on creation. ollama-internal: external: true name: ollama-internal diff --git a/stacks/ollama/docker-compose.yml b/stacks/ollama/docker-compose.yml index 820310c3..2f5e21d8 100644 --- a/stacks/ollama/docker-compose.yml +++ b/stacks/ollama/docker-compose.yml @@ -75,8 +75,12 @@ networks: app-network: external: true ollama-internal: - # Pin the global name (NOT project-prefixed to `ollama_ollama-internal`) - # so cross-stack consumers like LiteLLM can declare - # `external: true` + `name: ollama-internal` and reliably join it. - driver: bridge + # Treated as external on both sides (here and in stacks/litellm). The + # deploy pipeline pre-creates `ollama-internal` whenever either stack + # is enabled (see compose_runner.render_remote_script's + # `ollama_internal_network_prep` block), so network ownership lives + # outside both compose projects. This keeps the joint Ollama+LiteLLM + # deployment deterministic — neither compose project tries to "own" + # the network and race on creation. + external: true name: ollama-internal diff --git a/tests/unit/test_compose_runner.py b/tests/unit/test_compose_runner.py index fca3dc82..79988732 100644 --- a/tests/unit/test_compose_runner.py +++ b/tests/unit/test_compose_runner.py @@ -426,6 +426,136 @@ def capture(script: str) -> subprocess.CompletedProcess[str]: assert "/mnt/nexus-data/metabase" not in captured_script["script"] +# --------------------------------------------------------------------------- +# ollama-internal cross-stack network prep (shared by ollama + litellm) +# --------------------------------------------------------------------------- + + +def test_render_ollama_internal_network_prep_only_when_flagged() -> None: + """Both `stacks/ollama` and `stacks/litellm` declare `ollama-internal` + as an `external: true` network. Without the network already created, + `docker compose up` aborts BEFORE the container is created. The + pre-compose block creates the network idempotently when either stack + is enabled (or omitted otherwise).""" + without = _render_default(ollama_internal_network_prep=False) + assert "ollama-internal" not in without + assert "docker network create" not in without + + with_prep = _render_default(ollama_internal_network_prep=True) + assert "docker network inspect ollama-internal" in with_prep + assert "docker network create --label managed-by=nexus-stack ollama-internal" in with_prep + + +def test_render_ollama_internal_network_prep_is_idempotent() -> None: + """The inspect-then-create guard short-circuits if the network + already exists. A bare `docker network create` would fail with + a non-zero exit on the second deploy under `set -euo pipefail` + and abort the entire compose-up loop. + + Tests the full inspect→create chain as one contiguous expression + rather than just `||` presence: a future refactor that splits + the guard into two unrelated statements (e.g. `inspect; if [ $? + -ne 0 ]; then create; fi`) would lose the short-circuit semantics + under `set -e` and silently break idempotency. Whitespace is + normalised so the test isn't brittle to backslash-newline + continuation tweaks bash treats as one logical line. + """ + script = _render_default(ollama_internal_network_prep=True) + # Normalise whitespace AND strip the bash line-continuation + # backslash (`\` followed by newline) so the substring matcher + # doesn't depend on the renderer's exact line-wrap choice. + normalised = " ".join(script.replace("\\\n", " ").split()) + assert ( + "docker network inspect ollama-internal >/dev/null 2>&1 || " + "docker network create --label managed-by=nexus-stack ollama-internal" + ) in normalised + + +def test_run_compose_up_network_prep_default_when_litellm_in_enabled() -> None: + """ollama_internal_network_prep defaults to True iff 'litellm' OR + 'ollama' is in enabled. Mirrors the dify/metabase storage-prep + default semantics.""" + captured_script: dict[str, str] = {} + + def capture(script: str) -> subprocess.CompletedProcess[str]: + captured_script["script"] = script + return subprocess.CompletedProcess( + args=["ssh"], returncode=0, stdout="RESULT started=1 failed=0", stderr="" + ) + + run_compose_up(["jupyter", "litellm"], script_runner=capture) + assert "docker network inspect ollama-internal" in captured_script["script"] + + +def test_run_compose_up_network_prep_default_when_ollama_in_enabled() -> None: + """Ollama-only deployment (LiteLLM disabled) still needs the + pre-create because ollama's own compose declares the network as + `external: true` — symmetric ownership with litellm.""" + captured_script: dict[str, str] = {} + + def capture(script: str) -> subprocess.CompletedProcess[str]: + captured_script["script"] = script + return subprocess.CompletedProcess( + args=["ssh"], returncode=0, stdout="RESULT started=1 failed=0", stderr="" + ) + + run_compose_up(["jupyter", "ollama"], script_runner=capture) + assert "docker network inspect ollama-internal" in captured_script["script"] + + +def test_run_compose_up_network_prep_renders_once_in_joint_case() -> None: + """Joint LiteLLM + Ollama deployment: only one pre-create block, + not duplicated. The `inspect || create` guard is idempotent at + runtime regardless, but rendering the block twice would be a + silent code smell — confirms the inference doesn't double-add.""" + captured_script: dict[str, str] = {} + + def capture(script: str) -> subprocess.CompletedProcess[str]: + captured_script["script"] = script + return subprocess.CompletedProcess( + args=["ssh"], returncode=0, stdout="RESULT started=2 failed=0", stderr="" + ) + + run_compose_up(["litellm", "ollama"], script_runner=capture) + assert ( + captured_script["script"].count( + "docker network create --label managed-by=nexus-stack ollama-internal" + ) + == 1 + ) + + +def test_run_compose_up_network_prep_omitted_when_neither_in_enabled() -> None: + """Neither litellm nor ollama → no network-prep block (also no + spurious network created on stacks that don't need it).""" + captured_script: dict[str, str] = {} + + def capture(script: str) -> subprocess.CompletedProcess[str]: + captured_script["script"] = script + return subprocess.CompletedProcess( + args=["ssh"], returncode=0, stdout="RESULT started=1 failed=0", stderr="" + ) + + run_compose_up(["jupyter"], script_runner=capture) + assert "ollama-internal" not in captured_script["script"] + + +def test_run_compose_up_network_prep_explicit_override_beats_enabled_inference() -> None: + """Caller can force ollama_internal_network_prep=False even when + 'litellm' or 'ollama' is in enabled — operator escape hatch if the + network handling needs to be deferred to a different mechanism.""" + captured_script: dict[str, str] = {} + + def capture(script: str) -> subprocess.CompletedProcess[str]: + captured_script["script"] = script + return subprocess.CompletedProcess( + args=["ssh"], returncode=0, stdout="RESULT started=1 failed=0", stderr="" + ) + + run_compose_up(["litellm"], script_runner=capture, ollama_internal_network_prep=False) + assert "ollama-internal" not in captured_script["script"] + + # --------------------------------------------------------------------------- # CLI integration # ---------------------------------------------------------------------------