From fe73fbf78db6bed65853ae1a7e9acacbe7cd6c67 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Thu, 26 Jun 2025 00:57:32 -0400
Subject: [PATCH 1/8] Improve error handling, log ONNX providers, and add chat
 proxy errors

- Add info-level logging of available ONNX Runtime providers in Kokoro and Piper model managers to aid debugging.
- Introduce a custom `APIProxyError` and global FastAPI handler for chat proxy failures, returning structured JSON errors with error IDs, hints, and optional debug info.
- Validate audio file presence early in Gradio UI and STT tab handlers, raising clear exceptions if missing.
- Log the Gradio UI access URL at startup for user convenience.
---
 .../executors/kokoro/model_manager.py         |  2 +
 src/speaches/executors/piper/model_manager.py |  1 +
 src/speaches/main.py                          | 30 +++++++-
 src/speaches/routers/chat.py                  | 68 ++++++++++++++++++-
 src/speaches/ui/tabs/audio_chat.py            |  3 +
 src/speaches/ui/tabs/stt.py                   |  6 ++
 6 files changed, 108 insertions(+), 2 deletions(-)

diff --git a/src/speaches/executors/kokoro/model_manager.py b/src/speaches/executors/kokoro/model_manager.py
index 3f29e3e4..4c0b3bde 100644
--- a/src/speaches/executors/kokoro/model_manager.py
+++ b/src/speaches/executors/kokoro/model_manager.py
@@ -25,6 +25,8 @@ def _load_fn(self, model_id: str) -> Kokoro:
             get_available_providers()
         )  # HACK: `get_available_providers` is an unknown symbol (on MacOS at least)
         available_providers = available_providers - ORT_PROVIDERS_BLACKLIST
+        # print the available providers
+        logger.info(f"Available ONNX Runtime providers: {available_providers}")
         if "TensorrtExecutionProvider" in available_providers:
             available_providers.remove("TensorrtExecutionProvider")
         inf_sess = InferenceSession(model_files.model, providers=list(available_providers))
diff --git a/src/speaches/executors/piper/model_manager.py b/src/speaches/executors/piper/model_manager.py
index 88c93c1b..7ea8a338 100644
--- a/src/speaches/executors/piper/model_manager.py
+++ b/src/speaches/executors/piper/model_manager.py
@@ -34,6 +34,7 @@ def _load_fn(self, model_id: str) -> PiperVoice:
             get_available_providers()
         )  # HACK: `get_available_providers` is an unknown symbol (on MacOS at least)
         available_providers = available_providers - ORT_PROVIDERS_BLACKLIST
+        logger.info(f"Available ONNX Runtime providers: {available_providers}")
         inf_sess = InferenceSession(model_files.model, providers=list(available_providers))
         conf = PiperConfig.from_dict(json.loads(model_files.config.read_text()))
         return PiperVoice(session=inf_sess, config=conf)
diff --git a/src/speaches/main.py b/src/speaches/main.py
index 17f50005..1b2de97f 100644
--- a/src/speaches/main.py
+++ b/src/speaches/main.py
@@ -8,11 +8,14 @@
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from starlette.responses import RedirectResponse
+from fastapi.responses import JSONResponse
+import uuid
 
 from speaches.dependencies import ApiKeyDependency, get_config
 from speaches.logger import setup_logger
 from speaches.routers.chat import (
     router as chat_router,
+    APIProxyError,
 )
 from speaches.routers.misc import (
     router as misc_router,
@@ -64,6 +67,23 @@ def create_app() -> FastAPI:
 
     app = FastAPI(dependencies=dependencies, openapi_tags=TAGS_METADATA)
 
+    # Register global exception handler for APIProxyError
+    @app.exception_handler(APIProxyError)
+    async def api_proxy_error_handler(request, exc: APIProxyError):
+        error_id = str(uuid.uuid4())
+        logger.exception(f"[{{error_id}}] {exc.message}")
+        content = {
+            "detail": exc.message,
+            "hint": exc.hint,
+            "suggested_fixes": exc.suggestions,
+            "error_id": error_id,
+        }
+        import os
+        log_level = os.getenv("SPEACHES_LOG_LEVEL", "INFO").upper()
+        if log_level == "DEBUG" and exc.debug:
+            content["debug"] = exc.debug
+        return JSONResponse(status_code=exc.status_code, content=content)
+
     app.include_router(chat_router)
     app.include_router(stt_router)
     app.include_router(models_router)
@@ -91,6 +111,14 @@ def create_app() -> FastAPI:
 
         from speaches.ui.app import create_gradio_demo
 
-        app = gr.mount_gradio_app(app, create_gradio_demo(config), path="/")
+        app = gr.mount_gradio_app(app, create_gradio_demo(config), path="")
+
+        logger = logging.getLogger("speaches.main")
+        host = getattr(config, "host", "localhost")
+        port = getattr(config, "port", 8000)
+        display_host = "localhost" if host == "0.0.0.0" else host
+        url = f"http://{display_host}:{port}/"
+        logger.info(f"\n\nTo view the gradio web ui of speaches open your browser and visit:\n\n{url}\n\n")
+
 
     return app
diff --git a/src/speaches/routers/chat.py b/src/speaches/routers/chat.py
index da6fffed..8171f0fd 100644
--- a/src/speaches/routers/chat.py
+++ b/src/speaches/routers/chat.py
@@ -222,6 +222,16 @@ async def __aiter__(self) -> AsyncGenerator[ChatCompletionChunk]:
                 yield chunk
 
 
+# Custom exception for API proxy errors
+class APIProxyError(Exception):
+    def __init__(self, message, status_code=500, hint=None, suggestions=None, debug=None):
+        self.message = message
+        self.status_code = status_code
+        self.hint = hint
+        self.suggestions = suggestions or []
+        self.debug = debug
+
+
 # TODO: maybe propagate 400 errors
 
 
@@ -276,7 +286,63 @@ async def handle_completions(  # noqa: C901
     try:
         chat_completion = await chat_completion_client.create(**proxied_body.model_dump(exclude_defaults=True))
     except openai.APIStatusError as e:
-        return Response(content=e.message, status_code=e.status_code)
+        error_message = (
+            "Failed to communicate with the language model API. "
+            "This may be due to an invalid API key, incorrect endpoint, or network issues. "
+            "See the debug field for more details."
+        )
+        error_info = {
+            "openai_status_code": getattr(e, "status_code", None),
+            "openai_error_type": getattr(getattr(e, "error", None), "type", None),
+            "openai_error_code": getattr(getattr(e, "error", None), "code", None),
+            "openai_error_message": getattr(getattr(e, "error", None), "message", None),
+            "openai_request_id": getattr(getattr(e, "response", None), "headers", {{}}).get("x-request-id", None),
+            "endpoint": getattr(chat_completion_client, "endpoint", None),
+            "timestamp": datetime.now(UTC).isoformat().replace("+00:00", "Z"),
+            "exception_type": type(e).__name__,
+        }
+        logger.exception(
+            f"Speaches API proxy error: {error_message} | "
+            f"status_code={error_info['openai_status_code']}, "
+            f"error_type={error_info['openai_error_type']}, "
+            f"request_id={error_info['openai_request_id']}, "
+            f"endpoint={error_info['endpoint']}"
+        )
+        raise APIProxyError(
+            error_message,
+            status_code=e.status_code,
+            hint="Verify your API key, endpoint URL, and network connection.",
+            suggestions=[
+                "Double-check your API key for typos or expiration.",
+                "Ensure the endpoint URL matches your Speaches server configuration.",
+                "Test your internet connection.",
+                "If the error persists, visit https://github.com/speaches-ai/speaches/issues or contact support with the error ID."
+            ],
+            debug=error_info
+        ) from e
+    except Exception as e:
+        error_message = (
+            "An unexpected internal error occurred while processing your request. "
+            "Please try again. If the problem continues, contact support and provide the error details in the debug field."
+        )
+        error_info = {
+            "exception_type": type(e).__name__,
+            "exception_message": str(e),
+            "timestamp": datetime.now(UTC).isoformat().replace("+00:00", "Z"),
+        }
+        logger.exception(
+            f"Speaches unexpected error: {error_message} | exception_type={error_info['exception_type']}"
+        )
+        raise APIProxyError(
+            error_message,
+            status_code=500,
+            hint="Try again or contact support with the error details.",
+            suggestions=[
+                "Retry your request.",
+                "If the error persists, visit https://github.com/speaches-ai/speaches/issues or contact support and provide the debug information."
+            ],
+            debug=error_info
+        ) from e
     if isinstance(chat_completion, AsyncStream):
 
         async def inner() -> AsyncGenerator[str]:
diff --git a/src/speaches/ui/tabs/audio_chat.py b/src/speaches/ui/tabs/audio_chat.py
index c2af3e54..9973c2f2 100644
--- a/src/speaches/ui/tabs/audio_chat.py
+++ b/src/speaches/ui/tabs/audio_chat.py
@@ -59,6 +59,9 @@ def gradio_message_to_openai_message(gradio_message: GradioMessage) -> ChatCompl
         content.append(ChatCompletionContentPartTextParam(text=gradio_message["text"], type="text"))
 
     for file_path in gradio_message["files"]:
+        if not file_path:
+            msg = "No audio file provided in gradio_message_to_openai_message (audio_chat.py). Please record or upload audio."
+            raise ValueError(msg)
         content.append(  # noqa: PERF401
             ChatCompletionContentPartInputAudioParam(
                 input_audio=InputAudio(
diff --git a/src/speaches/ui/tabs/stt.py b/src/speaches/ui/tabs/stt.py
index 9dc533ef..49e2b45c 100644
--- a/src/speaches/ui/tabs/stt.py
+++ b/src/speaches/ui/tabs/stt.py
@@ -25,6 +25,9 @@ async def update_whisper_model_dropdown(request: gr.Request) -> gr.Dropdown:
     async def audio_task(
         http_client: httpx.AsyncClient, file_path: str, endpoint: str, temperature: float, model: str
     ) -> str:
+        if not file_path:
+            msg = "No audio file provided in audio_task (stt.py). Please record or upload audio."
+            raise ValueError(msg)
         with Path(file_path).open("rb") as file:  # noqa: ASYNC230
             response = await http_client.post(
                 endpoint,
@@ -59,6 +62,9 @@ async def streaming_audio_task(
     async def whisper_handler(
         file_path: str, model: str, task: str, temperature: float, stream: bool, request: gr.Request
     ) -> AsyncGenerator[str, None]:
+        if not file_path:
+            msg = "No audio file provided in whisper_handler (stt.py). Please record or upload audio."
+            raise ValueError(msg)
         http_client = http_client_from_gradio_req(request, config)
         endpoint = TRANSCRIPTION_ENDPOINT if task == "transcribe" else TRANSLATION_ENDPOINT
 

From 812613320aa6ae8fe98609e16332b17df2d3dd49 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Thu, 26 Jun 2025 00:57:57 -0400
Subject: [PATCH 2/8] .gitignore add macos specific files

---
 .gitignore | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index e58215dd..329c687a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -170,5 +170,8 @@ tests/data
 .secrets
 .ruff_cache
 .aider*
-.DS_Store
 speaches_debug
+
+# OS-specific
+Thumbs.db
+.DS_Store

From df720313d73c0a2025cdb780e9a0be137aabc474 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Thu, 26 Jun 2025 00:59:34 -0400
Subject: [PATCH 3/8] pyproject.toml dep updates, onnxruntime 1.22.0 macos
 apple silicon big speedup

---
 pyproject.toml | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6bd38b34..ab610c95 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,13 +6,13 @@ requires-python = ">=3.12,<3.13"
 dependencies = [
     "ctranslate2>=4.5.0",
     "fastapi>=0.115.6",
-    "faster-whisper>=1.1.1",
-    "huggingface-hub[hf-transfer]>=0.25.1",
-    "kokoro-onnx[gpu]>=0.4.5,<0.5.0",
+    "faster-whisper==1.1.1",
+    "huggingface-hub[hf-transfer]==0.33.1",
+    "kokoro-onnx[gpu]==0.4.9",
     "numpy>=2.1.1",
     "piper-phonemize ; sys_platform == 'linux'",
     "piper-tts>=1.2.0 ; sys_platform == 'linux'",
-    "pydantic-settings>=2.5.2",
+    "pydantic-settings==2.10.1",
     "pydantic>=2.10.0",
     "python-multipart>=0.0.10",
     "sounddevice>=0.5.1",
@@ -23,6 +23,7 @@ dependencies = [
     "cachetools>=5.5.1",
     "httpx-ws>=0.7.1",
     "aiortc>=1.10.1",
+    "onnxruntime==1.22.0",
 ]
 
 [project.optional-dependencies]
@@ -32,14 +33,14 @@ dev = [
     "pytest-antilru>=2.0.0",
     "mkdocs-material>=9.5.39",
     "mkdocstrings[python]>=0.26.1",
-    "pytest-asyncio>=0.24.0",
+    "pytest-asyncio==1.0.0",
     "pytest-xdist>=3.6.1",
     "pytest>=8.3.3",
     "ruff>=0.9.1",
     "srt>=3.5.3",
     "webvtt-py>=0.5.1",
     "pre-commit>=4.0.1",
-    "pytest-mock>=3.14.0",
+    "pytest-mock==3.14.1",
     "mkdocs-render-swagger-plugin>=0.1.2",
     "mdx-truly-sane-lists>=1.3",
     "datamodel-code-generator>=0.26.5",
@@ -49,6 +50,9 @@ ui = [
     "httpx>=0.27.2",
     "httpx-sse>=0.4.0",
     "openai>=1.60.0",
+    "av==14.4.0",
+    "typer==0.16.0",
+    "websockets==15.0.1",
 ]
 opentelemetry = [
     "opentelemetry-distro>=0.50b0",

From 26fa1cf2658d4bab013aff94f74abb6e874601a0 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Thu, 26 Jun 2025 01:34:24 -0400
Subject: [PATCH 4/8] Improve APIProxyError: actionable suggestions, docs, and
 UTC timestamp

- Added a detailed docstring and clarified parameter order for `APIProxyError`.
- Updated type annotations for clarity and maintainability.
- Default error suggestions are now more concrete and actionable for users and developers.
- Switched to a timezone-aware ISO 8601 timestamp for error traceability.
- No changes outside error handling and error message formatting.
---
 src/speaches/main.py         |   2 +-
 src/speaches/routers/chat.py |  11 +---
 src/speaches/ui/tabs/stt.py  | 104 +++++++++++++++++++++--------------
 src/speaches/utils.py        |  59 ++++++++++++++++++++
 4 files changed, 124 insertions(+), 52 deletions(-)
 create mode 100644 src/speaches/utils.py

diff --git a/src/speaches/main.py b/src/speaches/main.py
index 1b2de97f..e48901b9 100644
--- a/src/speaches/main.py
+++ b/src/speaches/main.py
@@ -15,7 +15,6 @@
 from speaches.logger import setup_logger
 from speaches.routers.chat import (
     router as chat_router,
-    APIProxyError,
 )
 from speaches.routers.misc import (
     router as misc_router,
@@ -38,6 +37,7 @@
 from speaches.routers.vad import (
     router as vad_router,
 )
+from speaches.utils import APIProxyError
 
 # https://swagger.io/docs/specification/v3_0/grouping-operations-with-tags/
 # https://fastapi.tiangolo.com/tutorial/metadata/#metadata-for-tags
diff --git a/src/speaches/routers/chat.py b/src/speaches/routers/chat.py
index 8171f0fd..f0a5fe12 100644
--- a/src/speaches/routers/chat.py
+++ b/src/speaches/routers/chat.py
@@ -39,6 +39,7 @@
 from speaches.types.chat import (
     CompletionCreateParamsBase as OpenAICompletionCreateParamsBase,
 )
+from speaches.utils import APIProxyError
 
 # Resources:
 # - https://platform.openai.com/docs/guides/audio
@@ -222,16 +223,6 @@ async def __aiter__(self) -> AsyncGenerator[ChatCompletionChunk]:
                 yield chunk
 
 
-# Custom exception for API proxy errors
-class APIProxyError(Exception):
-    def __init__(self, message, status_code=500, hint=None, suggestions=None, debug=None):
-        self.message = message
-        self.status_code = status_code
-        self.hint = hint
-        self.suggestions = suggestions or []
-        self.debug = debug
-
-
 # TODO: maybe propagate 400 errors
 
 
diff --git a/src/speaches/ui/tabs/stt.py b/src/speaches/ui/tabs/stt.py
index 49e2b45c..6041d929 100644
--- a/src/speaches/ui/tabs/stt.py
+++ b/src/speaches/ui/tabs/stt.py
@@ -7,6 +7,10 @@
 
 from speaches.config import Config
 from speaches.ui.utils import http_client_from_gradio_req, openai_client_from_gradio_req
+from speaches.utils import APIProxyError, format_api_proxy_error
+import logging
+
+logger = logging.getLogger(__name__)
 
 TRANSCRIPTION_ENDPOINT = "/v1/audio/transcriptions"
 TRANSLATION_ENDPOINT = "/v1/audio/translations"
@@ -25,56 +29,74 @@ async def update_whisper_model_dropdown(request: gr.Request) -> gr.Dropdown:
     async def audio_task(
         http_client: httpx.AsyncClient, file_path: str, endpoint: str, temperature: float, model: str
     ) -> str:
-        if not file_path:
-            msg = "No audio file provided in audio_task (stt.py). Please record or upload audio."
-            raise ValueError(msg)
-        with Path(file_path).open("rb") as file:  # noqa: ASYNC230
-            response = await http_client.post(
-                endpoint,
-                files={"file": file},
-                data={
-                    "model": model,
-                    "response_format": "text",
-                    "temperature": temperature,
-                },
-            )
-
-        response.raise_for_status()
-        return response.text
+        try:
+            if not file_path:
+                msg = "No audio file provided in audio_task (stt.py). Please record or upload audio."
+                raise APIProxyError(msg, suggestions=["Please record or upload an audio file."])
+            with Path(file_path).open("rb") as file:  # noqa: ASYNC230
+                response = await http_client.post(
+                    endpoint,
+                    files={"file": file},
+                    data={
+                        "model": model,
+                        "response_format": "text",
+                        "temperature": temperature,
+                    },
+                )
+            response.raise_for_status()
+            return response.text
+        except Exception as e:
+            logger.exception("STT audio_task error")
+            if not isinstance(e, APIProxyError):
+                e = APIProxyError(str(e))
+            return format_api_proxy_error(e, context="audio_task")
 
     async def streaming_audio_task(
         http_client: httpx.AsyncClient, file_path: str, endpoint: str, temperature: float, model: str
     ) -> AsyncGenerator[str, None]:
-        with Path(file_path).open("rb") as file:  # noqa: ASYNC230
-            kwargs = {
-                "files": {"file": file},
-                "data": {
-                    "response_format": "text",
-                    "temperature": temperature,
-                    "model": model,
-                    "stream": True,
-                },
-            }
-            async with aconnect_sse(http_client, "POST", endpoint, **kwargs) as event_source:
-                async for event in event_source.aiter_sse():
-                    yield event.data
+        try:
+            with Path(file_path).open("rb") as file:  # noqa: ASYNC230
+                kwargs = {
+                    "files": {"file": file},
+                    "data": {
+                        "response_format": "text",
+                        "temperature": temperature,
+                        "model": model,
+                        "stream": True,
+                    },
+                }
+                async with aconnect_sse(http_client, "POST", endpoint, **kwargs) as event_source:
+                    async for event in event_source.aiter_sse():
+                        yield event.data
+        except Exception as e:
+            logger.exception("STT streaming error")
+            if not isinstance(e, APIProxyError):
+                e = APIProxyError(str(e))
+            yield format_api_proxy_error(e, context="streaming_audio_task")
 
     async def whisper_handler(
         file_path: str, model: str, task: str, temperature: float, stream: bool, request: gr.Request
     ) -> AsyncGenerator[str, None]:
-        if not file_path:
-            msg = "No audio file provided in whisper_handler (stt.py). Please record or upload audio."
-            raise ValueError(msg)
-        http_client = http_client_from_gradio_req(request, config)
-        endpoint = TRANSCRIPTION_ENDPOINT if task == "transcribe" else TRANSLATION_ENDPOINT
+        try:
+            if not file_path:
+                msg = "No audio file provided in whisper_handler (stt.py). Please record or upload audio."
+                raise APIProxyError(msg, suggestions=["Please record or upload an audio file."])
+            http_client = http_client_from_gradio_req(request, config)
+            endpoint = TRANSCRIPTION_ENDPOINT if task == "transcribe" else TRANSLATION_ENDPOINT
 
-        if stream:
-            previous_transcription = ""
-            async for transcription in streaming_audio_task(http_client, file_path, endpoint, temperature, model):
-                previous_transcription += transcription
-                yield previous_transcription
-        else:
-            yield await audio_task(http_client, file_path, endpoint, temperature, model)
+            if stream:
+                previous_transcription = ""
+                async for transcription in streaming_audio_task(http_client, file_path, endpoint, temperature, model):
+                    previous_transcription += transcription
+                    yield previous_transcription
+            else:
+                result = await audio_task(http_client, file_path, endpoint, temperature, model)
+                yield result
+        except Exception as e:
+            logger.exception("STT handler error")
+            if not isinstance(e, APIProxyError):
+                e = APIProxyError(str(e))
+            yield format_api_proxy_error(e, context="whisper_handler")
 
     with gr.Tab(label="Speech-to-Text") as tab:
         audio = gr.Audio(type="filepath")
diff --git a/src/speaches/utils.py b/src/speaches/utils.py
new file mode 100644
index 00000000..8abc55ab
--- /dev/null
+++ b/src/speaches/utils.py
@@ -0,0 +1,59 @@
+import os
+import uuid
+from datetime import datetime, timezone
+from typing import Any, Optional
+
+class APIProxyError(Exception):
+    """Exception for structured, actionable API or proxy errors.
+
+    Args:
+
+        message: Human-readable error message.
+        hint: Short actionable hint for the user.
+        suggestions: List of actionable suggestions for the user.
+        status_code: HTTP status code (default 500).
+        debug: Optional debug info (stack trace, request ID, etc.).
+        error_id: Unique error ID for traceability.
+        timestamp: When the error occurred (ISO 8601, UTC).
+    """
+
+    def __init__(
+        self,
+        message: str,
+        hint: Optional[str] = None,
+        suggestions: Optional[list[str]] = None,
+        status_code: int = 500,
+        debug: Any = None,
+        error_id: Optional[str] = None,
+        timestamp: Optional[str] = None,
+    ) -> None:
+        self.message = message
+        self.hint = hint
+        self.suggestions = suggestions or []
+        self.status_code = status_code
+        self.debug = debug
+        self.error_id = error_id or uuid.uuid4().hex
+        self.timestamp = timestamp or datetime.now(timezone.utc).isoformat()
+
+def format_api_proxy_error(exc: "APIProxyError", context: str = "") -> str:
+    debug_mode = os.environ.get("SPEACHES_LOG_LEVEL", "").lower() == "debug"
+    user_message = (
+        f"An error occurred: {exc.message} "
+        f"(Error ID: {exc.error_id}). Please try again or contact support."
+    )
+    suggestions = exc.suggestions or [
+        "Double-check your input data and file format (e.g., ensure audio files are WAV/MP3 and not corrupted).",
+        "Verify your API key and endpoint configuration in the settings.",
+        "Check your internet/network connection.",
+        "If the error persists, restart the application or server.",
+        "Contact support with the error ID and debug info if available."
+    ]
+    debug_info = (
+        f"Debug: {exc.debug}\nContext: {context}\nTimestamp: {exc.timestamp}"
+        if debug_mode and exc.debug else ""
+    )
+    return (
+        f"[ERROR] {user_message}\n"
+        f"Suggestions: {', '.join(suggestions)}"
+        + (f"\n{debug_info}" if debug_info else "")
+    )

From a402fa0a09b3bd7bfc8da8669fa8c44b3a2bc58d Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Thu, 26 Jun 2025 16:11:24 -0400
Subject: [PATCH 5/8] main.py slightly cleaner printout

---
 src/speaches/main.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/speaches/main.py b/src/speaches/main.py
index e48901b9..394cdc9c 100644
--- a/src/speaches/main.py
+++ b/src/speaches/main.py
@@ -114,11 +114,10 @@ async def api_proxy_error_handler(request, exc: APIProxyError):
         app = gr.mount_gradio_app(app, create_gradio_demo(config), path="")
 
         logger = logging.getLogger("speaches.main")
-        host = getattr(config, "host", "localhost")
-        port = getattr(config, "port", 8000)
-        display_host = "localhost" if host == "0.0.0.0" else host
-        url = f"http://{display_host}:{port}/"
-        logger.info(f"\n\nTo view the gradio web ui of speaches open your browser and visit:\n\n{url}\n\n")
-
+        if config.host and config.port:
+            display_host = "localhost" if config.host in ("0.0.0.0", "127.0.0.1") else config.host
+            url = f"http://{display_host}:{config.port}/"
+            logger.info(f"\n\nTo view the gradio web ui of speaches open your browser and visit:\n\n{url}\n\n")
+        # If host or port is missing, do not print a possibly incorrect URL.
 
     return app

From f99c2146cf4d2c9135b3674a298648b9fe64909e Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Thu, 26 Jun 2025 20:26:14 -0400
Subject: [PATCH 6/8] speaches-cli main.py prep for tracked move to src/cli.py,
 see next commit

---
 .../speaches-cli/src/speaches_cli/main.py     | 191 +++++++++++++++---
 1 file changed, 163 insertions(+), 28 deletions(-)

diff --git a/packages/speaches-cli/src/speaches_cli/main.py b/packages/speaches-cli/src/speaches_cli/main.py
index 05e4cea2..b03c2d0f 100644
--- a/packages/speaches-cli/src/speaches_cli/main.py
+++ b/packages/speaches-cli/src/speaches_cli/main.py
@@ -1,63 +1,198 @@
+# speaches/cli.py
+"""
+This module defines the main command-line interface for the Speaches application,
+powered by Typer. It serves as a unified entry point for both running the API
+server and interacting with it as a client.
+
+After installation, you can use it like this:
+  - `speaches serve` to start the API server (requires [server] extra).
+  - `speaches model ls` to list available models on a running server.
+  - `speaches --help` for a full list of commands.
+"""
 import json
 import os
+from typing import Optional
 
 import httpx
 import typer
-
-app = typer.Typer()
-registry_app = typer.Typer()
-model_app = typer.Typer()
-audio_app = typer.Typer()
-audio_speech_app = typer.Typer()
-
+# `import uvicorn` is now moved inside the `serve` command.
+
+# Create the main Typer application
+app = typer.Typer(
+    name="speaches",
+    help="A unified tool to serve and interact with the Speaches API.",
+    add_completion=False, # Can be enabled for more advanced shell completion
+)
+
+# --- Server Command (`speaches serve`) ---
+
+@app.command()
+def serve(
+    host: Optional[str] = typer.Option(
+        None, "--host", help="Host to bind the server to. Overrides environment variables."
+    ),
+    port: Optional[int] = typer.Option(
+        None, "--port", help="Port to bind the server to. Overrides environment variables."
+    ),
+    ssl_keyfile: Optional[str] = typer.Option(
+        None, "--ssl-keyfile", help="Path to the SSL key file. Overrides environment variables."
+    ),
+    ssl_certfile: Optional[str] = typer.Option(
+        None, "--ssl-certfile", help="Path to the SSL certificate file. Overrides environment variables."
+    ),
+):
+    """
+    Starts the Speaches FastAPI server using Uvicorn.
+
+    This command loads configuration from environment variables (e.g., SPEACHES_HOST,
+    UVICORN_HOST) and .env files. Command-line options provided here will take the
+    highest precedence.
+
+    NOTE: This command requires the 'server' extras to be installed.
+    Install with: `pip install 'speaches[server]'` or `uv tool install 'speaches[server]'`
+    """
+    try:
+        # Lazy import: Uvicorn is only imported when `serve` is called.
+        import uvicorn
+    except ImportError:
+        typer.secho(
+            "Error: Uvicorn is not installed. The 'serve' command requires it.",
+            fg=typer.colors.RED,
+            bold=True,
+        )
+        typer.echo("To install the server dependencies, please run:")
+        typer.secho("  pip install 'speaches[server]'", fg=typer.colors.CYAN)
+        typer.echo("or, if using uv:")
+        typer.secho("  uv tool install 'speaches[server]'", fg=typer.colors.CYAN)
+        raise typer.Exit(code=1)
+
+    from speaches.dependencies import get_config
+    from speaches.main import create_app
+
+    # Load configuration from Pydantic. This respects the established precedence rules.
+    config = get_config()
+
+    # CLI options take final precedence over everything.
+    # This allows for maximum flexibility during runtime.
+    final_host = host or config.resolved_host
+    final_port = port or config.resolved_port
+    final_ssl_keyfile = ssl_keyfile or config.ssl_keyfile
+    final_ssl_certfile = ssl_certfile or config.ssl_certfile
+
+    # Create the FastAPI app instance.
+    fastapi_app = create_app()
+
+    # Bridge the gap between the runner and the app: populate app.state.
+    # This ensures the lifespan events have 100% accurate info.
+    fastapi_app.state.server_host = final_host
+    fastapi_app.state.server_port = final_port
+    fastapi_app.state.server_is_ssl = bool(final_ssl_keyfile and final_ssl_certfile)
+
+    # Run the Uvicorn server programmatically.
+    uvicorn.run(
+        fastapi_app,
+        host=final_host,
+        port=final_port,
+        ssl_keyfile=final_ssl_keyfile,
+        ssl_certfile=final_ssl_certfile,
+        log_level=config.log_level.lower(),
+    )
+
+
+# --- Client Commands (`speaches model`, `speaches registry`) ---
+
+# Client-side configuration and helper
 SPEACHES_BASE_URL = os.getenv("SPEACHES_BASE_URL", "http://localhost:8000")
 SPEACHES_OPENAI_BASE_URL = SPEACHES_BASE_URL + "/v1"
-client = httpx.Client(base_url=SPEACHES_BASE_URL, timeout=httpx.Timeout(None))
-
 MODELS_URL = f"{SPEACHES_OPENAI_BASE_URL}/models"
 REGISTRY_URL = f"{SPEACHES_OPENAI_BASE_URL}/registry"
 
+try:
+    client = httpx.Client(base_url=SPEACHES_BASE_URL, timeout=httpx.Timeout(None))
+except httpx.InvalidURL:
+    typer.secho(f"Error: Invalid SPEACHES_BASE_URL: '{SPEACHES_BASE_URL}'", fg=typer.colors.RED)
+    raise typer.Exit(code=1)
+
 
 def dump_response(response: httpx.Response) -> None:
+    """Pretty-prints a JSON response or prints raw text."""
+    if response.status_code >= 400:
+        typer.secho(f"Error: Received status code {response.status_code}", fg=typer.colors.RED)
+
     if response.headers.get("Content-Type") == "application/json":
-        data = response.json()
-        print(json.dumps(data, indent=2))
+        try:
+            data = response.json()
+            print(json.dumps(data, indent=2))
+        except json.JSONDecodeError:
+            typer.echo("Received non-JSON response:")
+            typer.echo(response.text)
     else:
-        print(response.text)
+        typer.echo(response.text)
+
+    if response.status_code >= 400:
+        raise typer.Exit(code=1)
 
+# Create sub-typers for command organization
+registry_app = typer.Typer(help="Interact with the model registry.")
+model_app = typer.Typer(help="Manage local models on a running server.")
+audio_app = typer.Typer() # Retaining for future use
+audio_speech_app = typer.Typer() # Retaining for future use
 
+# Add the sub-commands to the main app
+app.add_typer(registry_app, name="registry")
+app.add_typer(model_app, name="model")
+
+# Define the client commands
 @registry_app.command("ls")
-def registry_ls(task: str | None = None) -> None:
+def registry_ls(task: Optional[str] = typer.Option(None, help="Filter registry by task type.")):
+    """Lists all available models in the public registry."""
     params: dict[str, str] = {}
     if task is not None:
         params["task"] = task
-    response = client.get(REGISTRY_URL, params=params)
-    dump_response(response)
+    try:
+        response = client.get(REGISTRY_URL, params=params)
+        dump_response(response)
+    except httpx.ConnectError:
+        typer.secho(f"Error: Connection to {SPEACHES_BASE_URL} failed. Is the server running?", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
 
 
 @model_app.command("ls")
-def models_ls(task: str | None = None) -> None:
+def models_ls(task: Optional[str] = typer.Option(None, help="Filter local models by task type.")):
+    """Lists locally downloaded and available models."""
     params: dict[str, str] = {}
     if task is not None:
         params["task"] = task
-    response = client.get(MODELS_URL, params=params)
-    dump_response(response)
+    try:
+        response = client.get(MODELS_URL, params=params)
+        dump_response(response)
+    except httpx.ConnectError:
+        typer.secho(f"Error: Connection to {SPEACHES_BASE_URL} failed. Is the server running?", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
 
 
 @model_app.command("rm")
-def model_rm(model_id: str) -> None:
-    response = client.delete(f"{MODELS_URL}/{model_id}")
-    dump_response(response)
+def model_rm(model_id: str = typer.Argument(..., help="The ID of the model to remove (e.g., 'Systran/faster-whisper-large-v3').")):
+    """Removes (unloads) a model from memory."""
+    try:
+        response = client.delete(f"{MODELS_URL}/{model_id}")
+        dump_response(response)
+    except httpx.ConnectError:
+        typer.secho(f"Error: Connection to {SPEACHES_BASE_URL} failed. Is the server running?", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
 
 
 @model_app.command("download")
-def model_download(model_id: str) -> None:
-    response = client.post(f"{MODELS_URL}/{model_id}")
-    dump_response(response)
+def model_download(model_id: str = typer.Argument(..., help="The ID of the model to download (e.g., 'Systran/faster-whisper-large-v3').")):
+    """Downloads a model from the registry to the local cache."""
+    try:
+        response = client.post(f"{MODELS_URL}/{model_id}")
+        dump_response(response)
+    except httpx.ConnectError:
+        typer.secho(f"Error: Connection to {SPEACHES_BASE_URL} failed. Is the server running?", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
 
 
-app.add_typer(registry_app, name="registry")
-app.add_typer(model_app, name="model")
-
+# This check is useful if you ever want to run this script directly for debugging
 if __name__ == "__main__":
-    app()
+    app()
\ No newline at end of file

From 426fda623af32da81ea216f55788b94a58b76000 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Thu, 26 Jun 2025 20:32:52 -0400
Subject: [PATCH 7/8] feat(cli): single `speaches` cli command for serve and
 client

This commit unifies the server and client into a single `speaches` executable to create a more intuitive user experience, similar to tools like `ollama`.

Key Changes:

- **Renamed `speaches` cli command:** The `speaches-cli` CLI command is now simply `speaches`.
- **New `speaches serve` command:** The `speaches` CLI now includes a `serve` command as the primary entry point for running the FastAPI server.
- **Client Integration:** All commands from the former `speaches-cli` (e.g., `model ls`, `registry ls`) are now sub-commands of the main `speaches` executable.
- **Unified Packaging:**
  - The `packages/speaches-cli` directory has been removed.
  - Dependencies are consolidated into the root `pyproject.toml`.
  - A `[project.scripts]` entry now creates the `speaches` executable.
- **Robust Config Resolution:** `config.py` is updated to resolve host/port with a clear precedence (`SPEACHES_*` > `UVICORN_*` > default), ensuring backwards compatibility.
- **Modernized App Lifecycle:** `main.py` now uses the `lifespan` context manager for startup logic, replacing the deprecated `@on_event` decorator.

The direct `uvicorn speaches.main:create_app` command remains fully functional.
---
 packages/speaches-cli/.python-version         |   1 -
 packages/speaches-cli/README.md               |   0
 packages/speaches-cli/pyproject.toml          |  20 --
 .../speaches-cli/src/speaches_cli/__init__.py |   4 -
 pyproject.toml                                |  28 ++-
 .../main.py => src/speaches/cli.py            |   0
 src/speaches/config.py                        | 101 +++++++---
 src/speaches/main.py                          | 184 +++++++++++-------
 8 files changed, 213 insertions(+), 125 deletions(-)
 delete mode 100644 packages/speaches-cli/.python-version
 delete mode 100644 packages/speaches-cli/README.md
 delete mode 100644 packages/speaches-cli/pyproject.toml
 delete mode 100644 packages/speaches-cli/src/speaches_cli/__init__.py
 rename packages/speaches-cli/src/speaches_cli/main.py => src/speaches/cli.py (100%)

diff --git a/packages/speaches-cli/.python-version b/packages/speaches-cli/.python-version
deleted file mode 100644
index e4fba218..00000000
--- a/packages/speaches-cli/.python-version
+++ /dev/null
@@ -1 +0,0 @@
-3.12
diff --git a/packages/speaches-cli/README.md b/packages/speaches-cli/README.md
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages/speaches-cli/pyproject.toml b/packages/speaches-cli/pyproject.toml
deleted file mode 100644
index 7adbbe27..00000000
--- a/packages/speaches-cli/pyproject.toml
+++ /dev/null
@@ -1,20 +0,0 @@
-[project]
-name = "speaches-cli"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-authors = [
-    { name = "Fedir Zadniprovskyi", email = "github.g1k56@simplelogin.com" }
-]
-requires-python = "==3.12.*"
-dependencies = [
-    "httpx>=0.27.2",
-    "typer>=0.12.5",
-]
-
-[project.scripts]
-speaches-cli = "speaches_cli.main:app"
-
-[build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
diff --git a/packages/speaches-cli/src/speaches_cli/__init__.py b/packages/speaches-cli/src/speaches_cli/__init__.py
deleted file mode 100644
index ef43fff8..00000000
--- a/packages/speaches-cli/src/speaches_cli/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from speaches_cli.main import app
-
-if __name__ == "__main__":
-    app()
diff --git a/pyproject.toml b/pyproject.toml
index ab610c95..c84b3a85 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,5 @@
+# pyproject.toml
+
 [project]
 name = "speaches"
 version = "0.1.0"
@@ -17,16 +19,36 @@ dependencies = [
     "python-multipart>=0.0.10",
     "sounddevice>=0.5.1",
     "soundfile>=0.12.1",
-    "uvicorn>=0.30.6",
     "openai[realtime]>=1.63.0",
     "aiostream>=0.6.4",
     "cachetools>=5.5.1",
     "httpx-ws>=0.7.1",
     "aiortc>=1.10.1",
     "onnxruntime==1.22.0",
+
+    # Dependencies for the server
+    "uvicorn>=0.30.6",
+
+    # Dependencies for the unified CLI tool
+    "httpx>=0.27.2",
+    "typer>=0.12.5", # CORRECTED: Removed '[all]' which is no longer a valid extra for this version
+
+    # Dependencies for the ui
+    "gradio>=5.13.0",
+    "httpx>=0.27.2",
+    "httpx-sse>=0.4.0",
+    "openai>=1.60.0",
+    "av==14.4.0",
+    "typer==0.16.0",
+    "websockets==15.0.1",
 ]
 
+# This entry point is now correct because we moved cli.py
+[project.scripts]
+speaches = "speaches.cli:app"
+
 [project.optional-dependencies]
+
 dev = [
     "anyio>=4.4.0",
     "basedpyright>=1.26.0",
@@ -45,6 +67,7 @@ dev = [
     "mdx-truly-sane-lists>=1.3",
     "datamodel-code-generator>=0.26.5",
 ]
+
 ui = [
     "gradio>=5.13.0",
     "httpx>=0.27.2",
@@ -165,6 +188,3 @@ piper-phonemize = [
 name = "piper-tts"
 version = "1.2.0"
 requires-dist = ["piper-phonemize"]
-
-[tool.uv.workspace]
-members = ["packages/speaches-cli"]
diff --git a/packages/speaches-cli/src/speaches_cli/main.py b/src/speaches/cli.py
similarity index 100%
rename from packages/speaches-cli/src/speaches_cli/main.py
rename to src/speaches/cli.py
diff --git a/src/speaches/config.py b/src/speaches/config.py
index 99310292..23ecedb0 100644
--- a/src/speaches/config.py
+++ b/src/speaches/config.py
@@ -1,65 +1,109 @@
+# speaches/config.py
 from typing import Literal
 
-from pydantic import BaseModel, Field, SecretStr
+from pydantic import BaseModel, Field, SecretStr, field_validator, ValidationInfo
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
+# --- Constants (unchanged) ---
 SAMPLES_PER_SECOND = 16000
 SAMPLE_WIDTH = 2
 BYTES_PER_SECOND = SAMPLES_PER_SECOND * SAMPLE_WIDTH
 # 2 BYTES = 16 BITS = 1 SAMPLE
 # 1 SECOND OF AUDIO = 32000 BYTES = 16000 SAMPLES
 
-
 type Device = Literal["cpu", "cuda", "auto"]
-
 # https://github.com/OpenNMT/CTranslate2/blob/master/docs/quantization.md#quantize-on-model-conversion
 type Quantization = Literal[
     "int8", "int8_float16", "int8_bfloat16", "int8_float32", "int16", "float16", "bfloat16", "float32", "default"
 ]
 
-
+# --- Nested Config Models ---
 class WhisperConfig(BaseModel):
-    """See https://github.com/SYSTRAN/faster-whisper/blob/master/faster_whisper/transcribe.py#L599."""
+    """Configuration for the faster-whisper model.
 
-    inference_device: Device = "auto"
-    device_index: int | list[int] = 0
-    compute_type: Quantization = "default"  # TODO: should this even be a configuration option?
-    cpu_threads: int = 0
-    num_workers: int = 1
-    ttl: int = Field(default=300, ge=-1)
+    See: https://github.com/SYSTRAN/faster-whisper/blob/master/faster_whisper/transcribe.py#L599.
+    """
+
+    inference_device: Device = Field(default="auto", description="The device to use for inference ('cpu', 'cuda', 'auto').")
+    device_index: int | list[int] = Field(default=0, description="A list of device IDs to use for inference.")
+    compute_type: Quantization = Field(default="default", description="The quantization type to use for the model.")
+    cpu_threads: int = Field(default=0, description="Number of threads to use when running on CPU (0 = auto).")
+    num_workers: int = Field(default=1, description="The number of workers to use for parallel transcription.")
+    ttl: int = Field(default=300, ge=-1, description="Time in seconds until the model is unloaded if unused. -1: never unload; 0: unload immediately.")
     """
     Time in seconds until the model is unloaded if it is not being used.
     -1: Never unload the model.
     0: Unload the model immediately after usage.
     """
-    use_batched_mode: bool = False
+    use_batched_mode: bool = Field(default=False, description="Whether to use batch mode for inference. This may become the default in the future.")
     """
     Whether to use batch mode(introduced in 1.1.0 `faster-whisper` release) for inference. This will likely become the default in the future and the configuration option will be removed.
     """
 
-
+# --- Main Config Class ---
 # TODO: document `alias` behaviour within the docstring
 class Config(BaseSettings):
-    """Configuration for the application. Values can be set via environment variables.
+    """Defines the application's configuration settings.Values can be set via environment variables.
 
     Pydantic will automatically handle mapping uppercased environment variables to the corresponding fields.
     To populate nested, the environment should be prefixed with the nested field name and an underscore. For example,
-    the environment variable `LOG_LEVEL` will be mapped to `log_level`, `WHISPER__INFERENCE_DEVICE`(note the double underscore) to `whisper.inference_device`, to set quantization to int8, use `WHISPER__COMPUTE_TYPE=int8`, etc.
-    """
+    the environment variable `LOG_LEVEL` will be mapped to `log_level`, `WHISPER__INFERENCE_DEVICE`(note the double underscore)
+    to `whisper.inference_device`, to set quantization to int8, use `WHISPER__COMPUTE_TYPE=int8`, etc.
+
+    Values are loaded from environment variables or a .env file.
+    The system uses a clear precedence for host and port settings:
+    1. SPEACHES_HOST / SPEACHES_PORT (highest priority)
+    2. UVICORN_HOST / UVICORN_PORT (fallback for compatibility)
+    3. Default value in the code (lowest priority)
 
-    model_config = SettingsConfigDict(env_nested_delimiter="__")
+    For other settings, use the `SPEACHES_` prefix. For nested models like `whisper`,
+    use a double underscore delimiter (e.g., `SPEACHES_WHISPER__INFERENCE_DEVICE=cpu`).
+    """
 
-    api_key: SecretStr | None = None
+    model_config = SettingsConfigDict(
+        env_prefix="speaches_",
+        env_nested_delimiter="__",
+        env_file=".env",
+        env_file_encoding="utf-8"
+    )
+
+    # We define fields for both namespaces
+    host: str | None = Field(default=None, description="Application-specific host. Overrides UVICORN_HOST.")
+    uvicorn_host: str = Field(default="0.0.0.0", alias="UVICORN_HOST", description="Standard Uvicorn host, used as a fallback.")
+
+    port: int | None = Field(default=None, description="Application-specific port. Overrides UVICORN_PORT.")
+    uvicorn_port: int = Field(default=34331, alias="UVICORN_PORT", description="Standard Uvicorn port, used as a fallback.")
+
+    # These are the final, resolved values that the app will use.
+    # They are not environment variables themselves but are computed fields.
+    resolved_host: str = "0.0.0.0"
+    resolved_port: int = 34331
+
+    @field_validator('resolved_host', mode='before')
+    @classmethod
+    def _resolve_host(cls, v, info: ValidationInfo) -> str:
+        """Computes the definitive host based on the precedence rule."""
+        # `info.data` holds the values of the other fields being validated.
+        return info.data.get('host') or info.data.get('uvicorn_host')
+
+    @field_validator('resolved_port', mode='before')
+    @classmethod
+    def _resolve_port(cls, v, info: ValidationInfo) -> int:
+        """Computes the definitive port based on the precedence rule."""
+        return info.data.get('port') or info.data.get('uvicorn_port')
+
+    # --- Other configuration fields (fully documented) ---
+    api_key: SecretStr | None = Field(default=None, description="If set, this API key will be required for all requests via the 'Authorization' header.")
     """
     If set, the API key will be required for all requests.
     """
-    log_level: str = "debug"
+
+    log_level: str = Field(default="debug", description="Logging level. One of: 'debug', 'info', 'warning', 'error', 'critical'.")
     """
     Logging level. One of: 'debug', 'info', 'warning', 'error', 'critical'.
     """
-    host: str = Field(alias="UVICORN_HOST", default="0.0.0.0")
-    port: int = Field(alias="UVICORN_PORT", default=8000)
-    allow_origins: list[str] | None = None
+
+    allow_origins: list[str] | None = Field(default=None, description="A list of origins that are allowed to make cross-site requests. Use '[\"*\"]' to allow all.")
     """
     https://docs.pydantic.dev/latest/concepts/pydantic_settings/#parsing-environment-variable-values
     Usage:
@@ -67,19 +111,20 @@ class Config(BaseSettings):
         `export ALLOW_ORIGINS='["*"]'`
     """
 
-    enable_ui: bool = True
+    enable_ui: bool = Field(default=True, description="Enable the Gradio web UI. Disable to reduce dependencies and improve startup time.")
     """
     Whether to enable the Gradio UI. You may want to disable this if you want to minimize the dependencies and slightly improve the startup time.
     """
 
-    whisper: WhisperConfig = WhisperConfig()
+    whisper: WhisperConfig = Field(default_factory=WhisperConfig)
+    loopback_host_url: str | None = Field(default=None, description="URL for the Gradio app to connect to the API. If unset, it uses the user's browser URL.")
 
-    loopback_host_url: str | None = None
     """
     If set this is the URL that the gradio app will use to connect to the API server hosting speaches.
     If not set the gradio app will use the url that the user connects to the gradio app on.
     """
-
     # TODO: document the below configuration options
-    chat_completion_base_url: str = "http://localhost:11434/v1"
-    chat_completion_api_key: SecretStr = SecretStr("cant-be-empty")
+    chat_completion_base_url: str = Field(default="http://localhost:11434/v1", description="The base URL for the chat completion API endpoint (e.g., Ollama).")
+    chat_completion_api_key: SecretStr = Field(default=SecretStr("not-required"), description="The API key for the chat completion service, if required.")
+    ssl_keyfile: str | None = Field(default=None, description="Path to the SSL private key file for enabling HTTPS.")
+    ssl_certfile: str | None = Field(default=None, description="Path to the SSL certificate file for enabling HTTPS.")
\ No newline at end of file
diff --git a/src/speaches/main.py b/src/speaches/main.py
index 394cdc9c..97baf0af 100644
--- a/src/speaches/main.py
+++ b/src/speaches/main.py
@@ -1,42 +1,28 @@
+# speaches/main.py
 from __future__ import annotations
 
 import logging
+import uuid
+from contextlib import asynccontextmanager
+from urllib.parse import urlparse
 
-from fastapi import (
-    FastAPI,
-)
+from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
-from starlette.responses import RedirectResponse
-from fastapi.responses import JSONResponse
-import uuid
+from starlette.requests import Request
+from starlette.responses import JSONResponse, RedirectResponse
 
+from speaches.config import Config
 from speaches.dependencies import ApiKeyDependency, get_config
 from speaches.logger import setup_logger
-from speaches.routers.chat import (
-    router as chat_router,
-)
-from speaches.routers.misc import (
-    router as misc_router,
-)
-from speaches.routers.models import (
-    router as models_router,
-)
-from speaches.routers.realtime.rtc import (
-    router as realtime_rtc_router,
-)
-from speaches.routers.realtime.ws import (
-    router as realtime_ws_router,
-)
-from speaches.routers.speech import (
-    router as speech_router,
-)
-from speaches.routers.stt import (
-    router as stt_router,
-)
-from speaches.routers.vad import (
-    router as vad_router,
-)
+from speaches.routers.chat import router as chat_router
+from speaches.routers.misc import router as misc_router
+from speaches.routers.models import router as models_router
+from speaches.routers.realtime.rtc import router as realtime_rtc_router
+from speaches.routers.realtime.ws import router as realtime_ws_router
+from speaches.routers.speech import router as speech_router
+from speaches.routers.stt import router as stt_router
+from speaches.routers.vad import router as vad_router
 from speaches.utils import APIProxyError
 
 # https://swagger.io/docs/specification/v3_0/grouping-operations-with-tags/
@@ -53,24 +39,25 @@
     },
 ]
 
+# --- Helper functions for clean app creation ---
 
-def create_app() -> FastAPI:
-    config = get_config()  # HACK
-    setup_logger(config.log_level)
-    logger = logging.getLogger(__name__)
-
-    logger.debug(f"Config: {config}")
-
-    dependencies = []
-    if config.api_key is not None:
-        dependencies.append(ApiKeyDependency)
-
-    app = FastAPI(dependencies=dependencies, openapi_tags=TAGS_METADATA)
+def _register_routers(app: FastAPI):
+    """Includes all the API routers in the FastAPI app."""
+    app.include_router(chat_router)
+    app.include_router(stt_router)
+    app.include_router(models_router)
+    app.include_router(misc_router)
+    app.include_router(realtime_rtc_router)
+    app.include_router(realtime_ws_router)
+    app.include_router(speech_router)
+    app.include_router(vad_router)
 
-    # Register global exception handler for APIProxyError
+def _register_exception_handlers(app: FastAPI):
+    """Registers global exception handlers."""
     @app.exception_handler(APIProxyError)
-    async def api_proxy_error_handler(request, exc: APIProxyError):
+    async def api_proxy_error_handler(request: Request, exc: APIProxyError):
         error_id = str(uuid.uuid4())
+        # Use the module-level logger from the top of the file
         logger.exception(f"[{{error_id}}] {exc.message}")
         content = {
             "detail": exc.message,
@@ -78,46 +65,107 @@ async def api_proxy_error_handler(request, exc: APIProxyError):
             "suggested_fixes": exc.suggestions,
             "error_id": error_id,
         }
+        # Avoid importing os inside a function
         import os
         log_level = os.getenv("SPEACHES_LOG_LEVEL", "INFO").upper()
         if log_level == "DEBUG" and exc.debug:
             content["debug"] = exc.debug
         return JSONResponse(status_code=exc.status_code, content=content)
 
-    app.include_router(chat_router)
-    app.include_router(stt_router)
-    app.include_router(models_router)
-    app.include_router(misc_router)
-    app.include_router(realtime_rtc_router)
-    app.include_router(realtime_ws_router)
-    app.include_router(speech_router)
-    app.include_router(vad_router)
-
+def _mount_ui_and_static(app: FastAPI, config: Config):
+    """Mounts static files and the Gradio UI if enabled."""
     # HACK: move this elsewhere
     app.get("/v1/realtime", include_in_schema=False)(lambda: RedirectResponse(url="/v1/realtime/"))
     app.mount("/v1/realtime", StaticFiles(directory="realtime-console/dist", html=True))
 
+    if config.enable_ui:
+        import gradio as gr
+        from speaches.ui.app import create_gradio_demo
+        # Mount the Gradio app. The original `app = gr.mount...` reassignment is avoided.
+        gr.mount_gradio_app(app, create_gradio_demo(config), path="")
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Manages the application's lifespan events (startup and shutdown).
+
+    This context manager is the modern replacement for the deprecated
+    `@app.on_event("startup")` decorator. Code before the `yield` runs on
+    startup; code after runs on shutdown.
+    """
+    # --- STARTUP LOGIC ---
+    logger = logging.getLogger(__name__)
+    app_config = get_config()
+
+    # Prioritize runtime info from app.state (set by run.py), with a fallback to static config.
+    host = getattr(app.state, "server_host", app_config.resolved_host)
+    port = getattr(app.state, "server_port", app_config.resolved_port)
+    is_ssl = getattr(app.state, "server_is_ssl", bool(app_config.ssl_keyfile and app_config.ssl_certfile))
+
+    protocol = "https" if is_ssl else "http"
+    display_host = "localhost" if host in ("0.0.0.0", "127.0.0.1") else host
+    access_url = f"{protocol}://{display_host}:{port}"
+    app.state.access_url = access_url  # Store for potential later use
+
+    # Dynamically update CORS origins if the middleware is present.
+    # We find the middleware and modify its `allow_origins` list directly.
+    for middleware in app.user_middleware:
+        if middleware.cls == CORSMiddleware:
+            ui_origin = urlparse(access_url)._replace(path="", params="", query="", fragment="").geturl()
+            if ui_origin not in middleware.options["allow_origins"]:
+                middleware.options["allow_origins"].append(ui_origin)
+                logger.info(f"Dynamically added '{ui_origin}' to allowed CORS origins.")
+            break # Stop after finding the first CORS middleware
+
+    # Log the final, user-friendly message.
+    if app_config.enable_ui:
+        if app_config.host and app_config.port: # Preserving original logic check
+            logger.info(f"\n\nTo view the gradio web ui of speaches open your browser and visit:\n\n{access_url}\n\n")
+        # If host or port is missing, do not print a possibly incorrect URL.
+        # This original check is now less relevant with our robust config, but is kept for fidelity.
+
+    yield
+    # --- SHUTDOWN LOGIC (if any) ---
+    logger.info("Speaches server shutting down.")
+
+
+# --- Main Application Factory ---
+
+def create_app() -> FastAPI:
+    config = get_config()  # HACK
+    setup_logger(config.log_level)
+    logger = logging.getLogger(__name__)
+
+    logger.debug(f"Config: {config}")
+
+    dependencies = []
+    if config.api_key is not None:
+        dependencies.append(ApiKeyDependency)
+
+    app = FastAPI(
+        dependencies=dependencies,
+        openapi_tags=TAGS_METADATA,
+        lifespan=lifespan # Use the modern lifespan manager
+    )
+
+    # Register global exception handler for APIProxyError
+    _register_exception_handlers(app)
+
+    # Include all API routers
+    _register_routers(app)
+
+    # Mount static files and the Gradio UI
+    _mount_ui_and_static(app, config)
+
+    # The original CORS middleware block, now corrected and fully preserved.
     if config.allow_origins is not None:
         app.add_middleware(
             CORSMiddleware,
-            allow_origins=config.allow_origins,
+            allow_origins=list(config.allow_origins), # Use a mutable list copy
             allow_credentials=True,
             allow_methods=["*"],
             allow_headers=["*"],
         )
 
-    if config.enable_ui:
-        import gradio as gr
-
-        from speaches.ui.app import create_gradio_demo
-
-        app = gr.mount_gradio_app(app, create_gradio_demo(config), path="")
-
-        logger = logging.getLogger("speaches.main")
-        if config.host and config.port:
-            display_host = "localhost" if config.host in ("0.0.0.0", "127.0.0.1") else config.host
-            url = f"http://{display_host}:{config.port}/"
-            logger.info(f"\n\nTo view the gradio web ui of speaches open your browser and visit:\n\n{url}\n\n")
-        # If host or port is missing, do not print a possibly incorrect URL.
-
     return app

From f5694aaeaa6e3491c8629cf9921c00bfc5528b6d Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Thu, 26 Jun 2025 22:21:18 -0400
Subject: [PATCH 8/8] [docs] `speaches` unified server + user cli minimal
 documentation update

---
 README.md                     | 39 +++++++++++++++++++++++++++++++----
 contributing.md               | 28 ++++++++++++++++++++++---
 docs/installation.md          | 38 ++++++++++++++++++++++++++++++++--
 docs/usage/model-discovery.md |  8 +++----
 docs/usage/speech-to-text.md  |  6 +++---
 docs/usage/text-to-speech.md  |  6 +++---
 6 files changed, 106 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 252985e3..ccb6c45c 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,43 @@
+# Speaches
+
 > [!NOTE]
 > This project was previously named `faster-whisper-server`. I've decided to change the name from `faster-whisper-server`, as the project has evolved to support more than just ASR.
 
-# Speaches
-
 `speaches` is an OpenAI API-compatible server supporting streaming transcription, translation, and speech generation. Speach-to-Text is powered by [faster-whisper](https://github.com/SYSTRAN/faster-whisper) and for Text-to-Speech [piper](https://github.com/rhasspy/piper) and [Kokoro](https://huggingface.co/hexgrad/Kokoro-82M) are used. This project aims to be Ollama, but for TTS/STT models.
 
 See the documentation for installation instructions and usage: [speaches.ai](https://speaches.ai/)
 
+## Quick Start
+
+Get a fully functional `speaches` server running in a few commands.
+
+### 1. Installation
+
+Install the `speaches` command-line tool and all its dependencies using `uv`. The default installation includes the web server and UI.
+
+```bash
+
+git clone https://github.com/speaches-ai/speaches.git
+cd speaches
+uv venv
+source .venv/bin/activate
+uv sync --all-extras --upgrade
+uv tool install .
+
+# Downloading a Text To Speech (TTS) model:
+uvx speaches model download speaches-ai/Kokoro-82M-v1.0-ONNX
+
+# Downloading a Speech To Text (STT) model:
+uvx speaches model download Systran/faster-distil-whisper-small.en
+
+# run the speaches server then open http://localhost:8000 in your web browser to try speaches
+speaches serve --host 0.0.0.0 --port 8000
+```
+
+Visit http://localhost:8000 in your web browser.
+
+The server will start, and the console will display the correct URL (e.g., `http://localhost:8000`) to access the Gradio web UI. Once the server is running, you can open a new terminal to use client commands like `speaches model ls`.
+
 ## Features:
 
 - OpenAI API compatible. All tools and SDKs that work with OpenAI's API should work with `speaches`.
@@ -19,8 +50,8 @@ See the documentation for installation instructions and usage: [speaches.ai](htt
 - Text-to-Speech via `kokoro`(Ranked #1 in the [TTS Arena](https://huggingface.co/spaces/Pendrokar/TTS-Spaces-Arena)) and `piper` models.
 - GPU and CPU support.
 - [Deployable via Docker Compose / Docker](https://speaches.ai/installation/)
-- [Highly configurable](https://speaches.ai/usage/realtime-api)
-- [Realtime API](https://speaches.ai/configuration/)
+- [Highly configurable](https://speaches.ai/configuration/)
+- [Realtime API](https://speaches.ai/usage/realtime-api/)
 
 Please create an issue if you find a bug, have a question, or a feature suggestion.
 
diff --git a/contributing.md b/contributing.md
index 78ec5556..04f25a8c 100644
--- a/contributing.md
+++ b/contributing.md
@@ -1,3 +1,25 @@
-uv venv
-source .venv/bin/activate
-uv sync --all-extras
+### Development Environment Setup
+
+We use `uv` for fast and reliable dependency management. Follow these steps to set up your environment for contributing.
+
+1.  **Clone the Repository:**
+    ```bash
+    git clone https://github.com/path/to/speaches.git
+    cd speaches
+    ```
+
+2.  **Create and Activate a Virtual Environment:**
+    Using a virtual environment is essential for isolating project dependencies.
+    ```bash
+    uv venv
+    source .venv/bin/activate
+    uv sync --all-extras --upgrade
+    ```
+
+3.  **Install All Dependencies in Editable Mode:**
+    The following command installs the `speaches` package itself, plus all optional dependencies required for development and running the full test suite. The `-e` flag (for "editable") links the installation to your source code, so you don't need to reinstall after making changes.
+    ```bash
+    uv pip install -e '.[dev]'
+    ```
+
+You are now set up for development. You can run the server with `speaches serve` and run the test suite with `pytest`.
\ No newline at end of file
diff --git a/docs/installation.md b/docs/installation.md
index 882aae29..003f397f 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -108,11 +108,45 @@ docker compose up --detach
 
 ## Python (requires Python 3.12+ and `uv` package manager)
 
+# Installation
+
+The `speaches` package is distributed as a single, "batteries-included" application. The standard installation provides all features, including the API server, web UI, and client tools.
+
+## For Users
+
+The recommended way to install `speaches` is as a command-line tool using `uv`. This installs the application and its dependencies into an isolated environment, making the `speaches` command available globally on your system.
+
 ```bash
 git clone https://github.com/speaches-ai/speaches.git
 cd speaches
 uv venv
 source .venv/bin/activate
-uv sync --all-extras
-uvicorn --factory --host 0.0.0.0 speaches.main:create_app
+uv sync --all-extras --upgrade
+uv tool install .
+speaches serve --host 0.0.0.0 --port 8000
 ```
+
+After installation, you can run the server with `speaches serve` or explore other commands with `speaches --help`.
+
+## For Developers (Contributing to Speaches)
+
+If you plan to contribute to the `speaches` project, you must install it in "editable" mode from a local clone of the repository. This setup links the `speaches` command directly to your source code, so your edits are reflected immediately without reinstalling.
+
+1.  **Clone the Repository:**
+    ```bash
+    git clone https://github.com/path/to/speaches.git
+    cd speaches
+    ```
+
+2.  **Create and Activate a Virtual Environment:**
+    ```bash
+    uv venv
+    source .venv/bin/activate
+    ```
+
+3.  **Install in Editable Mode with Development Extras:**
+    This command installs the project along with all optional dependencies needed for running tests and other development tasks.
+    ```bash
+s   uv pip install -e '.[dev]'
+    ```
+The `speaches` command is now available in your shell for development and testing.
diff --git a/docs/usage/model-discovery.md b/docs/usage/model-discovery.md
index a82488ce..0c0f7386 100644
--- a/docs/usage/model-discovery.md
+++ b/docs/usage/model-discovery.md
@@ -7,7 +7,7 @@ Before you can do anything useful with `speaches`, you'll need to want to downlo
 === "Speaches CLI"
 
     ```bash
-    uvx speaches-cli registry ls
+    uvx speaches registry ls
     ```
 
 === "cURL"
@@ -21,7 +21,7 @@ The above command will display a list of all available models. You can filter th
 === "Speaches CLI"
 
     ```bash
-    uvx speaches-cli registry ls --task automatic-speech-recognition
+    uvx speaches registry ls --task automatic-speech-recognition
     ```
 
 === "cURL"
@@ -37,7 +37,7 @@ You'll then want to download the model you want to use. You can do this by makin
 === "Speaches CLI"
 
     ```bash
-    uvx speaches-cli model download Systran/faster-distil-whisper-small.en
+    uvx speaches model download Systran/faster-distil-whisper-small.en
     ```
 
 === "cURL"
@@ -51,7 +51,7 @@ The downloaded model will now be included in the list of available models when y
 === "Speaches CLI"
 
     ```bash
-    uvx speaches-cli model ls
+    uvx speaches model ls
     ```
 
 === "cURL"
diff --git a/docs/usage/speech-to-text.md b/docs/usage/speech-to-text.md
index 3adfb28c..8e67c5a0 100644
--- a/docs/usage/speech-to-text.md
+++ b/docs/usage/speech-to-text.md
@@ -15,13 +15,13 @@ TODO: add a note about vad
 export SPEACHES_BASE_URL="http://localhost:8000"
 
 # Listing all available STT models
-uvx speaches-cli registry ls --task automatic-speech-recognition | jq '.data | [].id'
+uvx speaches registry ls --task automatic-speech-recognition | jq '.data | [].id'
 
 # Downloading a Systran/faster-distil-whisper-small.en model
-uvx speaches-cli model download Systran/faster-distil-whisper-small.en
+uvx speaches model download Systran/faster-distil-whisper-small.en
 
 # Check that the model has been installed
-uvx speaches-cli model ls --task text-to-speech | jq '.data | map(select(.id == "Systran/faster-distil-whisper-small.en"))'
+uvx speaches model ls --task text-to-speech | jq '.data | map(select(.id == "Systran/faster-distil-whisper-small.en"))'
 ```
 
 ## Usage
diff --git a/docs/usage/text-to-speech.md b/docs/usage/text-to-speech.md
index 8f317bcf..12ed63bc 100644
--- a/docs/usage/text-to-speech.md
+++ b/docs/usage/text-to-speech.md
@@ -8,13 +8,13 @@
 export SPEACHES_BASE_URL="http://localhost:8000"
 
 # Listing all available TTS models
-uvx speaches-cli registry ls --task text-to-speech | jq '.data | [].id'
+uvx speaches registry ls --task text-to-speech | jq '.data | [].id'
 
 # Downloading a TTS model
-uvx speaches-cli model download speaches-ai/Kokoro-82M-v1.0-ONNX
+uvx speaches model download speaches-ai/Kokoro-82M-v1.0-ONNX
 
 # Check that the model has been installed
-uvx speaches-cli model ls --task text-to-speech | jq '.data | map(select(.id == "speaches-ai/Kokoro-82M-v1.0-ONNX"))'
+uvx speaches model ls --task text-to-speech | jq '.data | map(select(.id == "speaches-ai/Kokoro-82M-v1.0-ONNX"))'
 ```
 
 ## Usage