Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion asr-worker/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "asr-worker"
version = "1.18.0"
version = "1.18.1"
description = "Local ASR worker for english-trainer (Parakeet EN/JA routing)"
readme = "README.md"
requires-python = ">=3.10"
Expand Down
2 changes: 1 addition & 1 deletion asr-worker/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion mcp-server/dist/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { randomUUID } from 'node:crypto';
import { createFramedMessageParser, writeMessage } from './mcp_stdio.js';

const SERVER_NAME = 'minimum-headroom';
const SERVER_VERSION = '1.18.0';
const SERVER_VERSION = '1.18.1';
const PROTOCOL_VERSION = '2024-11-05';
const FACE_WS_URL = process.env.FACE_WS_URL ?? 'ws://127.0.0.1:8765/ws';
const FACE_AUTH_TOKEN = (() => {
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "minimum-headroom",
"version": "1.18.0",
"version": "1.18.1",
"private": true,
"type": "module",
"scripts": {
Expand Down
2 changes: 1 addition & 1 deletion tts-worker/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "minimum-headroom-tts-worker"
version = "1.18.0"
version = "1.18.1"
description = "Minimum Headroom TTS worker (Kokoro ONNX default, optional Qwen3-TTS)"
readme = "README.md"
requires-python = ">=3.12"
Expand Down
9 changes: 8 additions & 1 deletion tts-worker/src/tts_worker/kokoro_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from .chunking import TextChunk, split_text_chunks
from .engine import EngineMetadata
from .kokoro_text import strip_japanese_silent_punctuation


@dataclass(frozen=True)
Expand Down Expand Up @@ -105,6 +106,8 @@ def synthesize_chunks(self, chunks: Iterable[TextChunk], *, voice_override: str
source_text = chunk.text
if chunk.is_phonemes:
source_text = self._to_ja_phonemes(chunk.text)
if not source_text:
continue

audio, chunk_rate = self._kokoro_create(
source_text,
Expand All @@ -130,10 +133,14 @@ def synthesize_chunks(self, chunks: Iterable[TextChunk], *, voice_override: str
return combined.astype(np.float32, copy=False), sample_rate

def _to_ja_phonemes(self, text: str) -> str:
cleaned = strip_japanese_silent_punctuation(text).strip()
if not cleaned:
return ''

capture = io.StringIO()
# Some pyopenjtalk-backed helpers print progress text to stdout; keep protocol stdout JSON-only.
with contextlib.redirect_stdout(capture):
raw = self._ja_g2p(text)
raw = self._ja_g2p(cleaned)

echoed = capture.getvalue().strip()
if echoed:
Expand Down
17 changes: 17 additions & 0 deletions tts-worker/src/tts_worker/kokoro_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from __future__ import annotations

import re

# Misaki's pyopenjtalk-backed Japanese G2P maps the JA full-stop 「。」
# (and its fullwidth ASCII twin 「.」) to an audible phoneme rather than
# silence, so Kokoro renders chunk endings as a short "ye"-like sound.
# Chunk boundaries already separate sentences, so dropping these
# characters before phonemization removes the artifact without losing
# meaningful prosody. Other JA punctuation (「、」「!」「?」「・」) is
# left in place because it either drives prosodic pausing or has not
# been observed to produce an artifact.
_SILENT_PUNCT_RE = re.compile(r'[。.]+')


def strip_japanese_silent_punctuation(text: str) -> str:
return _SILENT_PUNCT_RE.sub('', text)
54 changes: 54 additions & 0 deletions tts-worker/tests/test_kokoro_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from __future__ import annotations

import sys
import unittest
from pathlib import Path


ROOT_DIR = Path(__file__).resolve().parents[2]
SRC_DIR = ROOT_DIR / 'tts-worker' / 'src'
if str(SRC_DIR) not in sys.path:
sys.path.insert(0, str(SRC_DIR))

from tts_worker.kokoro_text import strip_japanese_silent_punctuation


class StripJapaneseSilentPunctuationTests(unittest.TestCase):
def test_drops_trailing_full_stop(self) -> None:
self.assertEqual(strip_japanese_silent_punctuation('あ。'), 'あ')

def test_drops_runs_of_full_stops(self) -> None:
self.assertEqual(strip_japanese_silent_punctuation('あ。。。。。'), 'あ')

def test_drops_internal_and_trailing_full_stops(self) -> None:
self.assertEqual(
strip_japanese_silent_punctuation('あ。い。う。'), 'あいう'
)

def test_drops_fullwidth_period(self) -> None:
self.assertEqual(strip_japanese_silent_punctuation('あ.'), 'あ')

def test_preserves_other_japanese_punctuation(self) -> None:
# Comma, exclamation, question mark, middle dot, and ellipsis are
# kept because they either drive prosody or have not been observed
# to produce an artifact.
self.assertEqual(
strip_japanese_silent_punctuation('あ、い!う?え・お…'),
'あ、い!う?え・お…',
)

def test_returns_empty_for_punctuation_only_input(self) -> None:
self.assertEqual(strip_japanese_silent_punctuation('。。。'), '')

def test_passes_through_empty_string(self) -> None:
self.assertEqual(strip_japanese_silent_punctuation(''), '')

def test_passes_through_ascii_text(self) -> None:
self.assertEqual(
strip_japanese_silent_punctuation('Hello, world.'),
'Hello, world.',
)


if __name__ == '__main__':
unittest.main()
2 changes: 1 addition & 1 deletion tts-worker/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading