beclab · progress44 · Mar 15, 2026 · Mar 15, 2026 · Mar 23, 2026
diff --git a/chatterboxtts/Chart.yaml b/chatterboxtts/Chart.yaml
@@ -0,0 +1,6 @@
+apiVersion: v2
+name: chatterboxtts
+description: Chatterbox text-to-speech API for Olares
+type: application
+version: "1.0.25"
+appVersion: "gpu-cu128"
diff --git a/chatterboxtts/OlaresManifest.yaml b/chatterboxtts/OlaresManifest.yaml
@@ -0,0 +1,117 @@
+olaresManifest.version: "0.11.0"
+olaresManifest.type: app
+metadata:
+  name: chatterboxtts
+  description: Private text-to-speech API powered by Resemble AI Chatterbox.
+  icon: https://avatars.githubusercontent.com/u/21249137?s=200&v=4
+  appid: chatterboxtts
+  title: Chatterbox TTS
+  version: "1.0.25"
+  categories:
+    - AI
+    - Utilities
+    - Developer Tools
+permission:
+  appData: true
+  appCache: true
+  userData:
+    - Home
+spec:
+  versionName: "gpu-cu128"
+  fullDescription: |
+    Chatterbox TTS exposes a simple HTTP API for speech synthesis on Olares.
+
+    Features
+    - Text-to-speech generation over HTTP
+    - OpenAI-style `/v1/audio/speech` endpoint
+    - Optional reference voice cloning
+    - Persistent Hugging Face, Torch, and reference voice storage
+    - CUDA-oriented deployment for NVIDIA GPU hosts
+
+    Endpoints
+    - `GET /`
+      - Returns service metadata, configured model, and docs path.
+    - `GET /health`
+      - Returns runtime status, CUDA availability, and detected GPU count.
+    - `GET /v1/models`
+      - Lists the exposed model ids: `turbo`, `english`, and `multilingual`.
+    - `POST /tts`
+      - JSON API for direct synthesis.
+      - Request body:
+        - `text` string, required
+        - `language` string, optional, multilingual mode only
+        - `audio_format` one of `wav`, `flac`, `ogg`
+        - `reference_voice` file name from `/data/reference-voices`, optional
+    - `POST /v1/audio/speech`
+      - OpenAI-style speech endpoint.
+      - Request body:
+        - `model` string, optional, one of `turbo`, `english`, `multilingual`
+        - `input` string, required
+        - `voice` string, optional reference voice file name
+        - `response_format` one of `wav`, `flac`, `ogg`
+        - `language` string, optional
+    - `POST /v1/audio/speech/upload`
+      - Multipart endpoint for ad hoc reference audio upload.
+      - Form fields:
+        - `input` text, required
+        - `response_format` one of `wav`, `flac`, `ogg`
+        - `language` string, optional
+        - `reference_audio` file upload, optional
+
+    Example requests
+    - Direct synthesis:
+      - `curl -X POST http://chatterboxtts-svc:8000/tts -H "Content-Type: application/json" -d '{"text":"Hello from Olares","audio_format":"wav"}' --output speech.wav`
+    - OpenAI-style synthesis:
+      - `curl -X POST http://chatterboxtts-svc:8000/v1/audio/speech -H "Content-Type: application/json" -d '{"model":"turbo","input":"This is a CUDA-backed speech request.","response_format":"wav"}' --output speech.wav`
+    - Uploaded reference voice:
+      - `curl -X POST http://chatterboxtts-svc:8000/v1/audio/speech/upload -F "input=Hej från Olares" -F "language=sv" -F "reference_audio=@./reference.wav" --output speech.wav`
+
+    Notes
+    - This package targets `amd64` Olares nodes with NVIDIA GPU support.
+    - The first synthesis request downloads model assets into the app data volume.
+    - The published GHCR image is public, so no registry credentials are required.
+  developer: progress44
+  website: https://github.com/resemble-ai/chatterbox
+  sourceCode: https://github.com/progress44/rpi-system
+  submitter: progress44
+  doc: https://github.com/resemble-ai/chatterbox
+  license:
+    - text: MIT
+      url: https://github.com/resemble-ai/chatterbox/blob/main/LICENSE
+  locale:
+    - en-US
+  requiredMemory: 12Gi
+  limitedMemory: 12Gi
+  requiredDisk: 5Gi
+  limitedDisk: 40Gi
+  requiredCpu: 4
+  limitedCpu: 4
+  requiredGpu: 12Gi
+  limitedGpu: 16Gi
+  supportArch:
+    - amd64
+options:
+  apiTimeout: 0
+  dependencies:
+    - name: olares
+      type: system
+      version: ">=1.12.1-0"
+envs:
+  - envName: OLARES_USER_HUGGINGFACE_TOKEN
+    required: false
+    applyOnChange: true
+    valueFrom:
+      envName: OLARES_USER_HUGGINGFACE_TOKEN
+  - envName: OLARES_USER_HUGGINGFACE_SERVICE
+    required: false
+    applyOnChange: true
+    valueFrom:
+      envName: OLARES_USER_HUGGINGFACE_SERVICE
+entrances:
+  - name: chatterboxtts
+    port: 8000
+    host: chatterboxtts-svc
+    title: Chatterbox TTS
+    icon: https://avatars.githubusercontent.com/u/21249137?s=200&v=4
+    authLevel: internal
+    openMethod: window
diff --git a/chatterboxtts/README.md b/chatterboxtts/README.md
@@ -0,0 +1,138 @@
+# Chatterbox TTS for Olares
+
+This package deploys the published image:
+
+- `ghcr.io/progress44/rpi-system-chatterbox-tts:latest`
+
+The app expects an NVIDIA-capable `amd64` Olares node and uses the configured
+public image directly.
+
+The chart is configured with `image.pullPolicy: Always`, so pods will always
+attempt to pull the current `latest` tag on start instead of reusing a cached
+node image.
+
+## API
+
+Base service inside the cluster:
+
+- `http://chatterboxtts-svc:8000`
+
+Endpoints:
+
+- `GET /`
+  - Service metadata and docs path.
+- `GET /health`
+  - Health status, CUDA visibility, and GPU count.
+- `GET /v1/models`
+  - Available model ids.
+- `POST /tts`
+  - Native JSON synthesis endpoint.
+- `POST /v1/audio/speech`
+  - OpenAI-compatible speech endpoint.
+- `POST /v1/audio/speech/upload`
+  - Multipart endpoint with uploaded reference audio.
+
+## Request examples
+
+Native JSON request:
+
+```bash
+curl -X POST http://chatterboxtts-svc:8000/tts \
+  -H "Content-Type: application/json" \
+  -d '{"text":"Hello from Olares","audio_format":"wav"}' \
+  --output speech.wav
+```
+
+OpenAI-style request:
+
+```bash
+curl -X POST http://chatterboxtts-svc:8000/v1/audio/speech \
+  -H "Content-Type: application/json" \
+  -d '{"model":"turbo","input":"This is a CUDA-backed speech request.","response_format":"wav"}' \
+  --output speech.wav
+```
+
+OpenAI-style multilingual request:
+
+```bash
+curl -X POST http://chatterboxtts-svc:8000/v1/audio/speech \
+  -H "Content-Type: application/json" \
+  -d '{"model":"multilingual","input":"Hej fran Olares","language":"sv","response_format":"wav"}' \
+  --output speech-sv.wav
+```
+
+Reference voice upload:
+
+```bash
+curl -X POST http://chatterboxtts-svc:8000/v1/audio/speech/upload \
+  -F "input=Hej från Olares" \
+  -F "language=sv" \
+  -F "reference_audio=@./reference.wav" \
+  --output speech.wav
+```
+
+## Notes
+
+- `/v1/audio/speech` respects the request `model` value and can switch between
+  `turbo`, `english`, and `multilingual` within the same deployment.
+- `CHATTERBOX_MODEL` remains the default model for endpoints that do not accept
+  a `model` field.
+- `language` only applies when the selected model is multilingual.
+- `reference_voice` in JSON requests must refer to a file already present in
+  `/data/reference-voices`.
+- The first synthesis request downloads model files and warms the runtime.
+
+## Registry auth
+
+The published GHCR image is public, so this package does not require registry
+credentials or image pull secrets during installation.
+
+For Hugging Face downloads, the chart maps Olares system env values
+`OLARES_USER_HUGGINGFACE_SERVICE` and `OLARES_USER_HUGGINGFACE_TOKEN` into the
+container as `HF_ENDPOINT` and `HF_TOKEN`.
+
+The container is configured to run as a non-root user so it can satisfy
+Olares admission policy for public registries.
+
+On Olares, the pod runs as UID/GID `1000` so the mounted `userspace.appData`
+paths remain writable without a privileged init container.
+
+Recommended sizing for this package:
+
+- CPU: 4
+- RAM: 12Gi
+- VRAM: 12Gi to 16Gi
+
+The runtime also disables numba JIT caching to avoid a known `librosa/numba`
+startup failure in this containerized deployment.
+
+The image also pins `numba==0.61.2` and `llvmlite==0.44.0` so the Chatterbox
+import path stays compatible with the deployed Python stack.
+
+The chart keeps `reference-voices`, Hugging Face cache, and Torch cache under
+`userspace.appData` for persistence.
+
+On Olares with HAMI, the chart now requests one GPU and sets
+`nvidia.com/gpumem: 12288` for both requests and limits.
+
+## Operational logging
+
+Every request now includes an `X-Request-Id` response header. Use that id to
+correlate client failures with pod logs.
+
+Tail logs:
+
+```bash
+kubectl logs -n chatterboxtts-progress44 deploy/chatterboxtts -f
+```
+
+The service logs:
+
+- request start and completion
+- synthesis success with endpoint, model family, language, format, text length,
+  reference source, output bytes, and duration
+- synthesis failures with request id and traceback
+- startup configuration, including model, device, CUDA visibility, and cache
+  directories
+
+The service logs metadata only and does not log full request text.
diff --git a/chatterboxtts/owners b/chatterboxtts/owners
@@ -0,0 +1 @@
+progress44
diff --git a/chatterboxtts/templates/configmap.yaml b/chatterboxtts/templates/configmap.yaml
@@ -0,0 +1,25 @@
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: chatterboxtts-config
+  namespace: {{ .Release.Namespace }}
+data:
+  CHATTERBOX_MODEL: {{ .Values.tts.model | quote }}
+  CHATTERBOX_DEVICE: {{ .Values.tts.device | quote }}
+  CHATTERBOX_ENABLE_DOCS: {{ .Values.tts.enableDocs | quote }}
+  CHATTERBOX_MAX_TEXT_LENGTH: {{ .Values.tts.maxTextLength | quote }}
+  CHATTERBOX_DEFAULT_LANGUAGE: {{ .Values.tts.defaultLanguage | quote }}
+  CHATTERBOX_DEFAULT_AUDIO_FORMAT: {{ .Values.tts.defaultAudioFormat | quote }}
+  CHATTERBOX_REF_VOICE_DIR: /data/reference-voices
+  HF_HOME: /data/huggingface
+  HF_ENDPOINT: {{ .Values.olaresEnv.OLARES_USER_HUGGINGFACE_SERVICE | default "https://huggingface.co/" | quote }}
+  HF_TOKEN: {{ .Values.olaresEnv.OLARES_USER_HUGGINGFACE_TOKEN | default "" | quote }}
+  HUGGING_FACE_HUB_TOKEN: {{ .Values.olaresEnv.OLARES_USER_HUGGINGFACE_TOKEN | default "" | quote }}
+  HF_HUB_CACHE: /data/huggingface/hub
+  TRANSFORMERS_CACHE: /data/huggingface/transformers
+  TORCH_HOME: /data/torch
+  NUMBA_CACHE_DIR: /tmp/numba
+  NUMBA_DISABLE_JIT: "1"
+  NVIDIA_VISIBLE_DEVICES: {{ .Values.tts.nvidiaVisibleDevices | quote }}
+  NVIDIA_DRIVER_CAPABILITIES: {{ .Values.tts.nvidiaDriverCapabilities | quote }}