From 5d83c87bca794b61d66cc10c742c5df76db007f4 Mon Sep 17 00:00:00 2001 From: Ani Sinanaj Date: Sun, 15 Mar 2026 16:02:47 +0100 Subject: [PATCH 1/3] add chatterbox tts app in olares --- chatterboxtts/Chart.yaml | 6 ++ chatterboxtts/OlaresManifest.yaml | 117 +++++++++++++++++++++ chatterboxtts/README.md | 134 ++++++++++++++++++++++++ chatterboxtts/owners | 1 + chatterboxtts/templates/configmap.yaml | 25 +++++ chatterboxtts/templates/deployment.yaml | 127 ++++++++++++++++++++++ chatterboxtts/values.yaml | 17 +++ 7 files changed, 427 insertions(+) create mode 100644 chatterboxtts/Chart.yaml create mode 100644 chatterboxtts/OlaresManifest.yaml create mode 100644 chatterboxtts/README.md create mode 100644 chatterboxtts/owners create mode 100644 chatterboxtts/templates/configmap.yaml create mode 100644 chatterboxtts/templates/deployment.yaml create mode 100644 chatterboxtts/values.yaml diff --git a/chatterboxtts/Chart.yaml b/chatterboxtts/Chart.yaml new file mode 100644 index 00000000..bdcab445 --- /dev/null +++ b/chatterboxtts/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: chatterboxtts +description: Chatterbox text-to-speech API for Olares +type: application +version: "1.0.19" +appVersion: "gpu-cu128" diff --git a/chatterboxtts/OlaresManifest.yaml b/chatterboxtts/OlaresManifest.yaml new file mode 100644 index 00000000..806eed5e --- /dev/null +++ b/chatterboxtts/OlaresManifest.yaml @@ -0,0 +1,117 @@ +olaresManifest.version: "0.11.0" +olaresManifest.type: app +metadata: + name: chatterboxtts + description: Private text-to-speech API powered by Resemble AI Chatterbox. + icon: https://avatars.githubusercontent.com/u/21249137?s=200&v=4 + appid: chatterboxtts + title: Chatterbox TTS + version: "1.0.19" + categories: + - AI + - Utilities + - Developer Tools +permission: + appData: true + appCache: true + userData: + - Home +spec: + versionName: "gpu-cu128" + fullDescription: | + Chatterbox TTS exposes a simple HTTP API for speech synthesis on Olares. + + Features + - Text-to-speech generation over HTTP + - OpenAI-style `/v1/audio/speech` endpoint + - Optional reference voice cloning + - Persistent Hugging Face, Torch, and reference voice storage + - CUDA-oriented deployment for NVIDIA GPU hosts + + Endpoints + - `GET /` + - Returns service metadata, configured model, and docs path. + - `GET /health` + - Returns runtime status, CUDA availability, and detected GPU count. + - `GET /v1/models` + - Lists the exposed model ids: `turbo`, `english`, and `multilingual`. + - `POST /tts` + - JSON API for direct synthesis. + - Request body: + - `text` string, required + - `language` string, optional, multilingual mode only + - `audio_format` one of `wav`, `flac`, `ogg` + - `reference_voice` file name from `/data/reference-voices`, optional + - `POST /v1/audio/speech` + - OpenAI-style speech endpoint. + - Request body: + - `model` string, optional, one of `turbo`, `english`, `multilingual` + - `input` string, required + - `voice` string, optional reference voice file name + - `response_format` one of `wav`, `flac`, `ogg` + - `language` string, optional + - `POST /v1/audio/speech/upload` + - Multipart endpoint for ad hoc reference audio upload. + - Form fields: + - `input` text, required + - `response_format` one of `wav`, `flac`, `ogg` + - `language` string, optional + - `reference_audio` file upload, optional + + Example requests + - Direct synthesis: + - `curl -X POST http://chatterboxtts-svc:8000/tts -H "Content-Type: application/json" -d '{"text":"Hello from Olares","audio_format":"wav"}' --output speech.wav` + - OpenAI-style synthesis: + - `curl -X POST http://chatterboxtts-svc:8000/v1/audio/speech -H "Content-Type: application/json" -d '{"model":"turbo","input":"This is a CUDA-backed speech request.","response_format":"wav"}' --output speech.wav` + - Uploaded reference voice: + - `curl -X POST http://chatterboxtts-svc:8000/v1/audio/speech/upload -F "input=Hej från Olares" -F "language=sv" -F "reference_audio=@./reference.wav" --output speech.wav` + + Notes + - This package targets `amd64` Olares nodes with NVIDIA GPU support. + - The first synthesis request downloads model assets into the app data volume. + - The published GHCR image is public, so no registry credentials are required. + developer: progress44 + website: https://github.com/resemble-ai/chatterbox + sourceCode: https://github.com/progress44/rpi-system + submitter: progress44 + doc: https://github.com/resemble-ai/chatterbox + license: + - text: MIT + url: https://github.com/resemble-ai/chatterbox/blob/main/LICENSE + locale: + - en-US + requiredMemory: 12Gi + limitedMemory: 12Gi + requiredDisk: 5Gi + limitedDisk: 40Gi + requiredCpu: 4 + limitedCpu: 4 + requiredGpu: 12Gi + limitedGpu: 16Gi + supportArch: + - amd64 +options: + apiTimeout: 0 + dependencies: + - name: olares + type: system + version: ">=1.12.1-0" +envs: + - envName: OLARES_USER_HUGGINGFACE_TOKEN + required: false + applyOnChange: true + valueFrom: + envName: OLARES_USER_HUGGINGFACE_TOKEN + - envName: OLARES_USER_HUGGINGFACE_SERVICE + required: false + applyOnChange: true + valueFrom: + envName: OLARES_USER_HUGGINGFACE_SERVICE +entrances: + - name: chatterboxtts + port: 8000 + host: chatterboxtts-svc + title: Chatterbox TTS + icon: https://avatars.githubusercontent.com/u/21249137?s=200&v=4 + authLevel: internal + openMethod: window diff --git a/chatterboxtts/README.md b/chatterboxtts/README.md new file mode 100644 index 00000000..51eb9284 --- /dev/null +++ b/chatterboxtts/README.md @@ -0,0 +1,134 @@ +# Chatterbox TTS for Olares + +This package deploys the published image: + +- `ghcr.io/progress44/rpi-system-chatterbox-tts:latest` + +The app expects an NVIDIA-capable `amd64` Olares node and uses the configured +public image directly. + +## API + +Base service inside the cluster: + +- `http://chatterboxtts-svc:8000` + +Endpoints: + +- `GET /` + - Service metadata and docs path. +- `GET /health` + - Health status, CUDA visibility, and GPU count. +- `GET /v1/models` + - Available model ids. +- `POST /tts` + - Native JSON synthesis endpoint. +- `POST /v1/audio/speech` + - OpenAI-compatible speech endpoint. +- `POST /v1/audio/speech/upload` + - Multipart endpoint with uploaded reference audio. + +## Request examples + +Native JSON request: + +```bash +curl -X POST http://chatterboxtts-svc:8000/tts \ + -H "Content-Type: application/json" \ + -d '{"text":"Hello from Olares","audio_format":"wav"}' \ + --output speech.wav +``` + +OpenAI-style request: + +```bash +curl -X POST http://chatterboxtts-svc:8000/v1/audio/speech \ + -H "Content-Type: application/json" \ + -d '{"model":"turbo","input":"This is a CUDA-backed speech request.","response_format":"wav"}' \ + --output speech.wav +``` + +OpenAI-style multilingual request: + +```bash +curl -X POST http://chatterboxtts-svc:8000/v1/audio/speech \ + -H "Content-Type: application/json" \ + -d '{"model":"multilingual","input":"Hej fran Olares","language":"sv","response_format":"wav"}' \ + --output speech-sv.wav +``` + +Reference voice upload: + +```bash +curl -X POST http://chatterboxtts-svc:8000/v1/audio/speech/upload \ + -F "input=Hej från Olares" \ + -F "language=sv" \ + -F "reference_audio=@./reference.wav" \ + --output speech.wav +``` + +## Notes + +- `/v1/audio/speech` respects the request `model` value and can switch between + `turbo`, `english`, and `multilingual` within the same deployment. +- `CHATTERBOX_MODEL` remains the default model for endpoints that do not accept + a `model` field. +- `language` only applies when the selected model is multilingual. +- `reference_voice` in JSON requests must refer to a file already present in + `/data/reference-voices`. +- The first synthesis request downloads model files and warms the runtime. + +## Registry auth + +The published GHCR image is public, so this package does not require registry +credentials or image pull secrets during installation. + +For Hugging Face downloads, the chart maps Olares system env values +`OLARES_USER_HUGGINGFACE_SERVICE` and `OLARES_USER_HUGGINGFACE_TOKEN` into the +container as `HF_ENDPOINT` and `HF_TOKEN`. + +The container is configured to run as a non-root user so it can satisfy +Olares admission policy for public registries. + +On Olares, the pod runs as UID/GID `1000` so the mounted `userspace.appData` +paths remain writable without a privileged init container. + +Recommended sizing for this package: + +- CPU: 4 +- RAM: 12Gi +- VRAM: 12Gi to 16Gi + +The runtime also disables numba JIT caching to avoid a known `librosa/numba` +startup failure in this containerized deployment. + +The image also pins `numba==0.61.2` and `llvmlite==0.44.0` so the Chatterbox +import path stays compatible with the deployed Python stack. + +The chart keeps `reference-voices`, Hugging Face cache, and Torch cache under +`userspace.appData` for persistence. + +On Olares with HAMI, the chart now requests one GPU and sets +`nvidia.com/gpumem: 12288` for both requests and limits. + +## Operational logging + +Every request now includes an `X-Request-Id` response header. Use that id to +correlate client failures with pod logs. + +Tail logs: + +```bash +kubectl logs -n chatterboxtts-progress44 deploy/chatterboxtts -f +``` + +The service logs: + +- request start and completion +- synthesis success with endpoint, model family, language, format, text length, + reference source, output bytes, and duration +- synthesis failures with request id and traceback +- startup configuration, including model, device, CUDA visibility, and cache + directories + +The service logs metadata only and does not log full request text. diff --git a/chatterboxtts/owners b/chatterboxtts/owners new file mode 100644 index 00000000..37cdc648 --- /dev/null +++ b/chatterboxtts/owners @@ -0,0 +1 @@ +progress44 diff --git a/chatterboxtts/templates/configmap.yaml b/chatterboxtts/templates/configmap.yaml new file mode 100644 index 00000000..a49aa036 --- /dev/null +++ b/chatterboxtts/templates/configmap.yaml @@ -0,0 +1,25 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatterboxtts-config + namespace: {{ .Release.Namespace }} +data: + CHATTERBOX_MODEL: {{ .Values.tts.model | quote }} + CHATTERBOX_DEVICE: {{ .Values.tts.device | quote }} + CHATTERBOX_ENABLE_DOCS: {{ .Values.tts.enableDocs | quote }} + CHATTERBOX_MAX_TEXT_LENGTH: {{ .Values.tts.maxTextLength | quote }} + CHATTERBOX_DEFAULT_LANGUAGE: {{ .Values.tts.defaultLanguage | quote }} + CHATTERBOX_DEFAULT_AUDIO_FORMAT: {{ .Values.tts.defaultAudioFormat | quote }} + CHATTERBOX_REF_VOICE_DIR: /data/reference-voices + HF_HOME: /data/huggingface + HF_ENDPOINT: {{ .Values.olaresEnv.OLARES_USER_HUGGINGFACE_SERVICE | default "https://huggingface.co/" | quote }} + HF_TOKEN: {{ .Values.olaresEnv.OLARES_USER_HUGGINGFACE_TOKEN | default "" | quote }} + HUGGING_FACE_HUB_TOKEN: {{ .Values.olaresEnv.OLARES_USER_HUGGINGFACE_TOKEN | default "" | quote }} + HF_HUB_CACHE: /data/huggingface/hub + TRANSFORMERS_CACHE: /data/huggingface/transformers + TORCH_HOME: /data/torch + NUMBA_CACHE_DIR: /tmp/numba + NUMBA_DISABLE_JIT: "1" + NVIDIA_VISIBLE_DEVICES: {{ .Values.tts.nvidiaVisibleDevices | quote }} + NVIDIA_DRIVER_CAPABILITIES: {{ .Values.tts.nvidiaDriverCapabilities | quote }} diff --git a/chatterboxtts/templates/deployment.yaml b/chatterboxtts/templates/deployment.yaml new file mode 100644 index 00000000..6e622d9f --- /dev/null +++ b/chatterboxtts/templates/deployment.yaml @@ -0,0 +1,127 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }} + namespace: {{ .Release.Namespace }} + labels: + app: chatterboxtts + annotations: + applications.app.bytetrade.io/gpu-inject: "true" +spec: + replicas: 1 + selector: + matchLabels: + app: chatterboxtts + strategy: + type: Recreate + template: + metadata: + labels: + app: chatterboxtts + spec: + securityContext: + fsGroup: 1000 + runAsGroup: 1000 + runAsNonRoot: true + runAsUser: 1000 + containers: + - name: chatterboxtts + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + envFrom: + - configMapRef: + name: chatterboxtts-config + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + resources: + requests: + cpu: "4" + memory: 12Gi + nvidia.com/gpu: "1" + nvidia.com/gpumem: "12288" + limits: + cpu: "4" + memory: 12Gi + nvidia.com/gpu: "1" + nvidia.com/gpumem: "12288" + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 30 + periodSeconds: 20 + timeoutSeconds: 5 + volumeMounts: + - name: huggingface + mountPath: /data/huggingface + - name: torch + mountPath: /data/torch + - name: reference-voices + mountPath: /data/reference-voices + securityContext: + allowPrivilegeEscalation: false + runAsGroup: 1000 + runAsNonRoot: true + runAsUser: 1000 + volumes: + - name: huggingface + hostPath: + {{- if .Values.sysVersion }} + {{- if semverCompare ">=1.12.3-0" (toString .Values.sysVersion) }} + path: "{{ .Values.userspace.appData }}/huggingface" + {{- else }} + path: "{{ .Values.userspace.appData }}/{{ .Release.Name }}/huggingface" + {{- end }} + {{- else }} + path: "{{ .Values.userspace.appData }}/{{ .Release.Name }}/huggingface" + {{- end }} + type: DirectoryOrCreate + - name: torch + hostPath: + {{- if .Values.sysVersion }} + {{- if semverCompare ">=1.12.3-0" (toString .Values.sysVersion) }} + path: "{{ .Values.userspace.appData }}/torch" + {{- else }} + path: "{{ .Values.userspace.appData }}/{{ .Release.Name }}/torch" + {{- end }} + {{- else }} + path: "{{ .Values.userspace.appData }}/{{ .Release.Name }}/torch" + {{- end }} + type: DirectoryOrCreate + - name: reference-voices + hostPath: + {{- if .Values.sysVersion }} + {{- if semverCompare ">=1.12.3-0" (toString .Values.sysVersion) }} + path: "{{ .Values.userspace.appData }}/reference-voices" + {{- else }} + path: "{{ .Values.userspace.appData }}/{{ .Release.Name }}/reference-voices" + {{- end }} + {{- else }} + path: "{{ .Values.userspace.appData }}/{{ .Release.Name }}/reference-voices" + {{- end }} + type: DirectoryOrCreate +--- +apiVersion: v1 +kind: Service +metadata: + name: chatterboxtts-svc + namespace: {{ .Release.Namespace }} +spec: + type: ClusterIP + selector: + app: chatterboxtts + ports: + - name: http + protocol: TCP + port: {{ .Values.service.port }} + targetPort: http diff --git a/chatterboxtts/values.yaml b/chatterboxtts/values.yaml new file mode 100644 index 00000000..8da91807 --- /dev/null +++ b/chatterboxtts/values.yaml @@ -0,0 +1,17 @@ +image: + repository: ghcr.io/progress44/rpi-system-chatterbox-tts + tag: latest + pullPolicy: IfNotPresent + +service: + port: 8000 + +tts: + model: turbo + device: cuda + enableDocs: "true" + maxTextLength: "4000" + defaultLanguage: en + defaultAudioFormat: wav + nvidiaVisibleDevices: all + nvidiaDriverCapabilities: compute,utility From 12a31413c4d95fe400312e3d44ec362b42b5f1dc Mon Sep 17 00:00:00 2001 From: Ani Sinanaj Date: Sun, 15 Mar 2026 20:21:58 +0100 Subject: [PATCH 2/3] update chart --- chatterboxtts/Chart.yaml | 2 +- chatterboxtts/OlaresManifest.yaml | 2 +- chatterboxtts/README.md | 4 ++++ chatterboxtts/values.yaml | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/chatterboxtts/Chart.yaml b/chatterboxtts/Chart.yaml index bdcab445..4b85999c 100644 --- a/chatterboxtts/Chart.yaml +++ b/chatterboxtts/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: chatterboxtts description: Chatterbox text-to-speech API for Olares type: application -version: "1.0.19" +version: "1.0.25" appVersion: "gpu-cu128" diff --git a/chatterboxtts/OlaresManifest.yaml b/chatterboxtts/OlaresManifest.yaml index 806eed5e..9ef43690 100644 --- a/chatterboxtts/OlaresManifest.yaml +++ b/chatterboxtts/OlaresManifest.yaml @@ -6,7 +6,7 @@ metadata: icon: https://avatars.githubusercontent.com/u/21249137?s=200&v=4 appid: chatterboxtts title: Chatterbox TTS - version: "1.0.19" + version: "1.0.25" categories: - AI - Utilities diff --git a/chatterboxtts/README.md b/chatterboxtts/README.md index 51eb9284..c040d46d 100644 --- a/chatterboxtts/README.md +++ b/chatterboxtts/README.md @@ -7,6 +7,10 @@ This package deploys the published image: The app expects an NVIDIA-capable `amd64` Olares node and uses the configured public image directly. +The chart is configured with `image.pullPolicy: Always`, so pods will always +attempt to pull the current `latest` tag on start instead of reusing a cached +node image. + ## API Base service inside the cluster: diff --git a/chatterboxtts/values.yaml b/chatterboxtts/values.yaml index 8da91807..340521f0 100644 --- a/chatterboxtts/values.yaml +++ b/chatterboxtts/values.yaml @@ -1,7 +1,7 @@ image: repository: ghcr.io/progress44/rpi-system-chatterbox-tts tag: latest - pullPolicy: IfNotPresent + pullPolicy: Always service: port: 8000 From a4aba901464665fedfd3b30ecddffdeb00340736 Mon Sep 17 00:00:00 2001 From: Ani Sinanaj Date: Tue, 24 Mar 2026 00:52:53 +0200 Subject: [PATCH 3/3] add kb whisper large olares app --- kbwhisperlarge/Chart.yaml | 6 ++ kbwhisperlarge/OlaresManifest.yaml | 96 +++++++++++++++++++++++ kbwhisperlarge/README.md | 35 +++++++++ kbwhisperlarge/owners | 1 + kbwhisperlarge/templates/configmap.yaml | 20 +++++ kbwhisperlarge/templates/deployment.yaml | 99 ++++++++++++++++++++++++ kbwhisperlarge/values.yaml | 16 ++++ 7 files changed, 273 insertions(+) create mode 100644 kbwhisperlarge/Chart.yaml create mode 100644 kbwhisperlarge/OlaresManifest.yaml create mode 100644 kbwhisperlarge/README.md create mode 100644 kbwhisperlarge/owners create mode 100644 kbwhisperlarge/templates/configmap.yaml create mode 100644 kbwhisperlarge/templates/deployment.yaml create mode 100644 kbwhisperlarge/values.yaml diff --git a/kbwhisperlarge/Chart.yaml b/kbwhisperlarge/Chart.yaml new file mode 100644 index 00000000..4e26502c --- /dev/null +++ b/kbwhisperlarge/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: kbwhisperlarge +description: Olares app for KBLab kb-whisper-large transcription API +type: application +version: 0.1.0 +appVersion: "1.0.0" diff --git a/kbwhisperlarge/OlaresManifest.yaml b/kbwhisperlarge/OlaresManifest.yaml new file mode 100644 index 00000000..8c99c714 --- /dev/null +++ b/kbwhisperlarge/OlaresManifest.yaml @@ -0,0 +1,96 @@ +olaresManifest.version: "0.11.0" +olaresManifest.type: app +metadata: + name: kbwhisperlarge + description: Private transcription API powered by KBLab kb-whisper-large. + icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg + appid: kbwhisperlarge + title: KB Whisper Large + version: "1.0.0" + categories: + - AI + - Utilities + - Developer Tools +permission: + appData: true + appCache: true + userData: + - Home +spec: + versionName: "gpu-cu128" + fullDescription: | + KB Whisper Large exposes a transcription API on Olares using the model: + + - `KBLab/kb-whisper-large` + + Endpoints + - `GET /` + - Returns service metadata and model initialization state. + - `GET /health` + - Returns runtime health and model loading status. + - `GET /v1/models` + - Returns the configured model id. + - `POST /v1/audio/transcriptions` + - OpenAI-style transcription endpoint using multipart form-data. + - Request fields: + - `file` (required) + - `model` (must be `KBLab/kb-whisper-large`) + - `language` (optional) + - `prompt` (optional) + - `response_format` (`json`, `verbose_json`, or `text`) + - `temperature` (optional) + - `POST /transcribe` + - Simple alias that returns `{ "text": "..." }`. + + Example request + - `curl -X POST http://kbwhisperlarge-svc:8000/v1/audio/transcriptions -F "model=KBLab/kb-whisper-large" -F "file=@./sample.wav" -F "language=sv"` + + Notes + - First request may be slow while model weights are pulled into app data. + - Hugging Face cache is persisted at `userspace.appData/huggingface`. + - This package targets `amd64` Olares nodes with NVIDIA GPU support. + developer: progress44 + website: https://huggingface.co/KBLab/kb-whisper-large + sourceCode: https://github.com/progress44/rpi-system + submitter: progress44 + doc: https://huggingface.co/KBLab/kb-whisper-large + license: + - text: Apache-2.0 + url: https://huggingface.co/KBLab/kb-whisper-large + locale: + - en-US + requiredMemory: 12Gi + limitedMemory: 12Gi + requiredDisk: 5Gi + limitedDisk: 40Gi + requiredCpu: 4 + limitedCpu: 4 + requiredGpu: 12Gi + limitedGpu: 16Gi + supportArch: + - amd64 +options: + apiTimeout: 0 + dependencies: + - name: olares + type: system + version: ">=1.12.1-0" +envs: + - envName: OLARES_USER_HUGGINGFACE_TOKEN + required: false + applyOnChange: true + valueFrom: + envName: OLARES_USER_HUGGINGFACE_TOKEN + - envName: OLARES_USER_HUGGINGFACE_SERVICE + required: false + applyOnChange: true + valueFrom: + envName: OLARES_USER_HUGGINGFACE_SERVICE +entrances: + - name: kbwhisperlarge + port: 8000 + host: kbwhisperlarge-svc + title: KB Whisper Large + icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg + authLevel: internal + openMethod: window diff --git a/kbwhisperlarge/README.md b/kbwhisperlarge/README.md new file mode 100644 index 00000000..6bfa9b1b --- /dev/null +++ b/kbwhisperlarge/README.md @@ -0,0 +1,35 @@ +# KB Whisper Large for Olares + +This package deploys the published image: + +- `ghcr.io/progress44/rpi-system-kb-whisper-large:latest` + +The app exposes OpenAI-compatible transcription with +`KBLab/kb-whisper-large` at: + +- `http://kbwhisperlarge-svc:8000` + +## Endpoints + +- `GET /` +- `GET /health` +- `GET /v1/models` +- `POST /v1/audio/transcriptions` +- `POST /transcribe` + +## Request example + +```bash +curl -X POST http://kbwhisperlarge-svc:8000/v1/audio/transcriptions \ + -F "model=KBLab/kb-whisper-large" \ + -F "file=@./sample.wav" \ + -F "language=sv" \ + -F "response_format=json" +``` + +## Notes + +- The first request may be slower while the model downloads and caches. +- Hugging Face cache persists under `userspace.appData`. +- Use Olares env variables `OLARES_USER_HUGGINGFACE_TOKEN` and + `OLARES_USER_HUGGINGFACE_SERVICE` if needed for your environment. diff --git a/kbwhisperlarge/owners b/kbwhisperlarge/owners new file mode 100644 index 00000000..37cdc648 --- /dev/null +++ b/kbwhisperlarge/owners @@ -0,0 +1 @@ +progress44 diff --git a/kbwhisperlarge/templates/configmap.yaml b/kbwhisperlarge/templates/configmap.yaml new file mode 100644 index 00000000..ae616e24 --- /dev/null +++ b/kbwhisperlarge/templates/configmap.yaml @@ -0,0 +1,20 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kbwhisperlarge-config + namespace: {{ .Release.Namespace }} +data: + WHISPER_MODEL_ID: {{ .Values.whisper.modelId | quote }} + WHISPER_DEVICE: {{ .Values.whisper.device | quote }} + WHISPER_DEFAULT_LANGUAGE: {{ .Values.whisper.defaultLanguage | quote }} + WHISPER_MAX_UPLOAD_SIZE_MB: {{ .Values.whisper.maxUploadSizeMb | quote }} + WHISPER_ENABLE_DOCS: {{ .Values.whisper.enableDocs | quote }} + HF_ENDPOINT: {{ .Values.olaresEnv.OLARES_USER_HUGGINGFACE_SERVICE | default "https://huggingface.co/" | quote }} + HF_TOKEN: {{ .Values.olaresEnv.OLARES_USER_HUGGINGFACE_TOKEN | default "" | quote }} + HUGGING_FACE_HUB_TOKEN: {{ .Values.olaresEnv.OLARES_USER_HUGGINGFACE_TOKEN | default "" | quote }} + HF_HOME: /data/huggingface + HF_HUB_CACHE: /data/huggingface/hub + TRANSFORMERS_CACHE: /data/huggingface/transformers + NVIDIA_VISIBLE_DEVICES: {{ .Values.whisper.nvidiaVisibleDevices | quote }} + NVIDIA_DRIVER_CAPABILITIES: {{ .Values.whisper.nvidiaDriverCapabilities | quote }} diff --git a/kbwhisperlarge/templates/deployment.yaml b/kbwhisperlarge/templates/deployment.yaml new file mode 100644 index 00000000..10382d0b --- /dev/null +++ b/kbwhisperlarge/templates/deployment.yaml @@ -0,0 +1,99 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }} + namespace: {{ .Release.Namespace }} + labels: + app: kbwhisperlarge + annotations: + applications.app.bytetrade.io/gpu-inject: "true" +spec: + replicas: 1 + selector: + matchLabels: + app: kbwhisperlarge + strategy: + type: Recreate + template: + metadata: + labels: + app: kbwhisperlarge + spec: + securityContext: + fsGroup: 1000 + runAsGroup: 1000 + runAsNonRoot: true + runAsUser: 1000 + containers: + - name: kbwhisperlarge + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + envFrom: + - configMapRef: + name: kbwhisperlarge-config + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + resources: + requests: + cpu: "4" + memory: 12Gi + nvidia.com/gpu: "1" + nvidia.com/gpumem: "12288" + limits: + cpu: "4" + memory: 12Gi + nvidia.com/gpu: "1" + nvidia.com/gpumem: "12288" + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 5 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 45 + periodSeconds: 20 + timeoutSeconds: 5 + volumeMounts: + - name: huggingface + mountPath: /data/huggingface + securityContext: + allowPrivilegeEscalation: false + runAsGroup: 1000 + runAsNonRoot: true + runAsUser: 1000 + volumes: + - name: huggingface + hostPath: + {{- if .Values.sysVersion }} + {{- if semverCompare ">=1.12.3-0" (toString .Values.sysVersion) }} + path: "{{ .Values.userspace.appData }}/huggingface" + {{- else }} + path: "{{ .Values.userspace.appData }}/{{ .Release.Name }}/huggingface" + {{- end }} + {{- else }} + path: "{{ .Values.userspace.appData }}/{{ .Release.Name }}/huggingface" + {{- end }} + type: DirectoryOrCreate +--- +apiVersion: v1 +kind: Service +metadata: + name: kbwhisperlarge-svc + namespace: {{ .Release.Namespace }} +spec: + type: ClusterIP + selector: + app: kbwhisperlarge + ports: + - name: http + protocol: TCP + port: {{ .Values.service.port }} + targetPort: http diff --git a/kbwhisperlarge/values.yaml b/kbwhisperlarge/values.yaml new file mode 100644 index 00000000..d3f5e034 --- /dev/null +++ b/kbwhisperlarge/values.yaml @@ -0,0 +1,16 @@ +image: + repository: ghcr.io/progress44/rpi-system-kb-whisper-large + tag: latest + pullPolicy: Always + +service: + port: 8000 + +whisper: + modelId: KBLab/kb-whisper-large + device: cuda + defaultLanguage: sv + maxUploadSizeMb: "200" + enableDocs: "true" + nvidiaVisibleDevices: all + nvidiaDriverCapabilities: compute,utility