modelrelay/.env.example at main · ericflo/modelrelay · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# ModelRelay docker-compose environment
# Copy this file:  cp .env.example .env
# Then edit the values below before running:  docker compose up -d

# ── Required ────────────────────────────────────────────────────────
# Shared secret that workers present when connecting to the proxy.
# Generate one with:  openssl rand -hex 32
WORKER_SECRET=change-me-to-a-strong-random-secret

# ── Proxy settings ──────────────────────────────────────────────────
# Host port the proxy is published on (container always binds 8080 internally).
LISTEN_PORT=8080
# Maximum number of requests that can wait in the queue.
MAX_QUEUE_LEN=100
# Seconds a request will wait in the queue for a worker before timing out.
QUEUE_TIMEOUT_SECS=30
# Seconds allowed for the full request lifecycle (queue + inference).
REQUEST_TIMEOUT_SECS=300
# Rust log level for the proxy (trace, debug, info, warn, error).
PROXY_LOG_LEVEL=info

# ── Worker 1 ────────────────────────────────────────────────────────
# URL of the local model server that worker 1 forwards requests to.
WORKER1_BACKEND_URL=http://host.docker.internal:8000
# Comma-separated model names this worker advertises.
WORKER1_MODELS=llama3.2:3b
# Maximum concurrent requests worker 1 will handle.
WORKER1_MAX_CONCURRENCY=1

# ── Worker 2 ────────────────────────────────────────────────────────
# URL of the local model server that worker 2 forwards requests to.
WORKER2_BACKEND_URL=http://host.docker.internal:8001
# Comma-separated model names this worker advertises.
WORKER2_MODELS=codellama:7b
# Maximum concurrent requests worker 2 will handle.
WORKER2_MAX_CONCURRENCY=1

# Rust log level for all workers (trace, debug, info, warn, error).
WORKER_LOG_LEVEL=info