diff --git a/Dockerfile.redhat b/Dockerfile.redhat
index bc574eaaf2..3686d98493 100644
--- a/Dockerfile.redhat
+++ b/Dockerfile.redhat
@@ -100,6 +100,37 @@ WORKDIR /ovms/third_party/opencv
 RUN if [ "$VERBOSE_LOGS" == "ON" ] ; then export VERBOSE=1 ; fi && ./install_opencv.sh
 ####### End of OpenCV
 
+# Build espeak-ng from sources
+FROM base_build as espeak_build
+
+ARG ESPEAK_NG_VERSION=1.51.1
+WORKDIR /tmp/espeak_build
+
+RUN dnf install -y libtool automake autoconf pkgconfig && \
+    dnf clean all
+
+RUN cd /tmp/espeak_build && \
+    git clone --branch ${ESPEAK_NG_VERSION} https://github.com/espeak-ng/espeak-ng.git espeak-ng-src 2>&1 && \
+    ls -lah /tmp/espeak_build/
+
+RUN cd /tmp/espeak_build/espeak-ng-src && \
+    touch AUTHORS NEWS && \
+    libtoolize --force --copy && \
+    aclocal && \
+    autoheader && \
+    autoconf && \
+    automake --add-missing --copy && \
+    ./configure --prefix=/usr/local \
+        --disable-shared \
+        --enable-static \
+        --disable-mbrola \
+        --disable-klatt \
+        --without-audio && \
+    make -j$(nproc) && \
+    make install
+
+RUN rm -rf /tmp/espeak_build
+
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
 FROM base_build as build
 ARG BASE_IMAGE
@@ -404,6 +435,9 @@ LABEL base-image=${RELEASE_BASE_IMAGE}
 ENV PYTHONPATH=/ovms/lib/python:/ovms/python_deps
 
 WORKDIR /
+COPY --from=base_build /usr/local/share/espeak-ng-data /usr/local/share/espeak-ng-data
+ENV ESPEAK_DATA_PATH=/usr/local/share/espeak-ng-data
+
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 COPY ./install_redhat_gpu_drivers.sh /install_gpu_drivers.sh
 # hadolint ignore=DL3003,DL3041,SC2164,SC1091
diff --git a/Dockerfile.ubuntu b/Dockerfile.ubuntu
index 15e47daf20..33f6cbc4e0 100644
--- a/Dockerfile.ubuntu
+++ b/Dockerfile.ubuntu
@@ -87,6 +87,37 @@ WORKDIR /ovms/third_party/opencv
 RUN ./install_opencv.sh
 ####### End of OpenCV
 
+# Build espeak-ng from sources
+
+ARG ESPEAK_NG_VERSION=1.51.1
+WORKDIR /tmp/espeak_build
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        libtool automake autoconf pkg-config && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+RUN cd /tmp/espeak_build && \
+    git clone --branch ${ESPEAK_NG_VERSION} https://github.com/espeak-ng/espeak-ng.git espeak-ng-src 2>&1 && \
+    ls -lah /tmp/espeak_build/
+
+RUN cd /tmp/espeak_build/espeak-ng-src && \
+    touch AUTHORS NEWS && \
+    libtoolize --force --copy && \
+    aclocal && \
+    autoheader && \
+    autoconf && \
+    automake --add-missing --copy && \
+    ./configure --prefix=/usr/local \
+        --disable-shared \
+        --enable-static \
+        --disable-mbrola \
+        --disable-klatt \
+        --without-audio && \
+    make -j$(nproc) && \
+    make install
+
+RUN rm -rf /tmp/espeak_build
+
 ################### BASE BUILD ##########################
 FROM base_build as build
 ARG BASE_IMAGE
@@ -99,6 +130,7 @@ RUN if [ "$BASE_OS" == "ubuntu24" ] ; then apt-get update && \
     apt-get install -y software-properties-common --no-install-recommends; add-apt-repository 'ppa:deadsnakes/ppa' -y && \
     apt-get clean && rm -rf /var/lib/apt/lists/* ; fi
 ENV PIP_BREAK_SYSTEM_PACKAGES=1
+
 RUN apt-get update && apt-get install --no-install-recommends -y \
             libgflags-dev \
             bc \
@@ -262,7 +294,6 @@ ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/openvino/runtime/lib/intel64/:
 # FROM BASE BUILD
 COPY --from=base_build /opt/opencv /opt/opencv/
 COPY third_party /ovms/third_party/
-
 # Mediapipe
 COPY BUILD.bazel /ovms/
 COPY *\.bzl /ovms/
@@ -392,6 +423,9 @@ RUN if [ -f /ovms_release/lib/libovms_shared.so ] ; then mv /ovms_release/lib/li
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
 
 FROM $BASE_IMAGE as release
+ARG http_proxy
+ARG https_proxy
+ARG no_proxy
 ARG INSTALL_RPMS_FROM_URL=
 ARG INSTALL_DRIVER_VERSION="24.26.30049"
 ARG GPU=0
@@ -408,6 +442,9 @@ SHELL ["/bin/bash", "-c"]
 WORKDIR /
 
 COPY release_files/drivers /drivers
+COPY --from=base_build /usr/local/share/espeak-ng-data /usr/local/share/espeak-ng-data
+ENV ESPEAK_DATA_PATH=/usr/local/share/espeak-ng-data
+
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 ARG INSTALL_DRIVER_VERSION="24.39.31294"
 COPY ./install_ubuntu_gpu_drivers.sh /tmp/install_gpu_drivers.sh
diff --git a/demos/audio/export_kokoro.py b/demos/audio/export_kokoro.py
new file mode 100644
index 0000000000..8ca3ed89f6
--- /dev/null
+++ b/demos/audio/export_kokoro.py
@@ -0,0 +1,141 @@
+#
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+
+import torch
+import json
+import time
+from pathlib import Path
+from kokoro.model import KModel
+from kokoro import KPipeline
+import openvino as ov
+import shutil
+
+MAX_SEQ_LENGTH = 500
+
+
+class KokoroTTSPipeline:
+    def __init__(self):
+        model_id = "hexgrad/Kokoro-82M"
+        self.pipeline = KPipeline(lang_code="a", repo_id=model_id)
+
+    def __call__(self, text: str, voice: str = "af_heart"):
+        with torch.no_grad():
+            generator = self.pipeline(text, voice=voice)
+            result = next(generator)
+        return result.audio
+
+
+class OVKModel(KModel):
+    def __init__(self, model_dir: Path, device: str, plugin_config: dict = {}):
+        torch.nn.Module.__init__(self)
+
+        core = ov.Core()
+
+        self.repo_id = model_id
+        with (model_dir / "config.json").open("r", encoding="utf-8") as f:
+            config = json.load(f)
+        self.vocab = config["vocab"]
+        print("Starting to compile OpenVINO model on device:", device)
+
+        start = time.time()
+        self.model = core.compile_model(model_dir / "openvino_model.xml", device.upper(), config=plugin_config)
+        print(f"Model compiled successfully in {time.time() - start:.2f}s.")
+        self.context_length = config["plbert"]["max_position_embeddings"]
+
+    @property
+    def device(self):
+        return torch.device("cpu")
+
+    def forward_with_tokens(self, input_ids: torch.LongTensor, ref_s: torch.FloatTensor, speed: float = 1) -> tuple[torch.FloatTensor, torch.LongTensor]:
+        text_len = input_ids.shape[-1]
+
+        if text_len < MAX_SEQ_LENGTH:
+            # 0 in this model context is acting as BOS/EOS/PAD.
+            # Since 0 causes artifacts, we might consider space (16) or period (4).
+            padding_value = 16
+            input_ids = torch.nn.functional.pad(input_ids, (0, MAX_SEQ_LENGTH - text_len), value=padding_value)
+
+        start = time.time()
+        print("Running inference on OpenVINO model...")
+        outputs = self.model([input_ids, ref_s, torch.tensor(speed)])
+        print(f"Inference completed in {time.time() - start:.2f}s.")
+
+        audio = torch.from_numpy(outputs[0])
+        pred_dur = torch.from_numpy(outputs[1])
+
+        if text_len < MAX_SEQ_LENGTH:
+            pred_dur = pred_dur[:text_len]
+            # Approximate audio trimming based on duration ratio
+            total_dur = outputs[1].sum()
+            valid_dur = pred_dur.sum()
+            if total_dur > 0:
+                audio_keep = int(audio.shape[-1] * (valid_dur / total_dur))
+                audio = audio[:audio_keep]
+
+        return audio, pred_dur
+
+    @staticmethod
+    def download_and_convert(model_dir: Path, repo_id: str, ttsPipeline: KokoroTTSPipeline):
+        import openvino as ov
+        from huggingface_hub import hf_hub_download
+        import gc
+
+        if not (model_dir / "openvino_model.xml").exists():
+            print(f"Converting Kokoro model to OpenVINO format at {model_dir}...")
+            model = ttsPipeline.pipeline.model
+            model.forward = model.forward_with_tokens
+            input_ids = torch.randint(1, 100, (48,)).numpy()
+            input_ids = torch.LongTensor([[0, *input_ids, 0]])
+            style = torch.randn(1, 256)
+            speed = torch.randint(1, 10, (1,), dtype=torch.float32)
+
+            ov_model = ov.convert_model(model, example_input=(input_ids, style, speed), input=[
+                                        ov.PartialShape("[1, 2..]"), ov.PartialShape([1, -1])])
+            ov.save_model(ov_model, model_dir / "openvino_model.xml")
+            hf_hub_download(repo_id=model_id, filename="config.json", local_dir=model_dir)
+        else:
+            print(f"OpenVINO model already exists at {model_dir}, skipping conversion.")
+
+        gc.collect()
+
+    @staticmethod
+    def convert_to_static(input_model_dir: Path, output_model_dir: Path):
+        import openvino as ov
+
+        print(f"Converting OpenVINO model to static shapes at {input_model_dir}...")
+        core = ov.Core()
+        model = core.read_model(input_model_dir / "openvino_model.xml")
+        static_shape = {"input_ids": [1, MAX_SEQ_LENGTH], "ref_s": [1, 256], "speed": [1], }
+        model.reshape(static_shape)
+        print("Reshaped model inputs:", model.inputs)
+        ov.save_model(model, output_model_dir / "openvino_model.xml")
+        print("Conversion to static shapes completed.")
+        # Copy config file
+        shutil.copy(input_model_dir / "config.json", output_model_dir / "config.json")
+
+
+if __name__ == "__main__":
+
+    model_id = "hexgrad/Kokoro-82M-v1.1-zh"
+
+    # Download model from Hugging Face and convert to OpenVINO format.
+    pipeline = KokoroTTSPipeline()
+
+    # Convert and save the Kokoro model to OpenVINO format
+    OVKModel.download_and_convert(Path("./kokoro_openvino_model_zh"), repo_id=model_id, ttsPipeline=pipeline)
+
+    # To run inference on NPU, model must have static input shapes
+    OVKModel.convert_to_static(Path("./kokoro_openvino_model_zh"), Path("./kokoro_static_openvino_model_zh"))
+    # # Execution on NPU require config file
+    # config = {
+    #     "NPU": {
+    #         "NPU_USE_NPUW": "YES",
+    #         "NPUW_DEVICES": "NPU,CPU",
+    #         "NPUW_KOKORO": "YES",
+    #     }
+    # }
+
+    # # NPUW_CACHE_DIR can be used to avoid compilation on every run
+    # config["NPU"]["NPUW_CACHE_DIR"] = "./npu_cache_kokoro"
\ No newline at end of file
diff --git a/demos/audio/tts_test_strings.py b/demos/audio/tts_test_strings.py
new file mode 100644
index 0000000000..79b1194a3e
--- /dev/null
+++ b/demos/audio/tts_test_strings.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+"""Send a battery of tricky TTS test strings to an OpenAI-compatible
+speech endpoint, one by one, and save each result as a numbered WAV file.
+
+Usage:
+    python tts_test_strings.py --endpoint http://localhost:8000/v3 \
+                               --model kokoro \
+                               [--voice None] \
+                               [--output-dir tts_output]
+"""
+
+import argparse
+import os
+import sys
+import time
+
+from openai import OpenAI
+
+TEST_STRINGS = [
+    'Dr. A. B. Carter Jr. met Sen. O\'Neill at 5 p.m., Wed., in Washington, D.C.',
+    'Mr. Smith, Ph.D., arrived on Fri. at 6:30 a.m.; Mrs. Jones left at noon.',
+    'We meet on 01/02/2025 at 05:30 IST; is that India or Israel time?',
+    'The deadline is 2025\u201102\u201101 23:59 UTC\u221205:00 (EST).',
+    'He finished 1st; she was 22nd\u2014barely.',
+    'Prices: $1,234.56 vs \u20ac1.234,56; also \u00a512 345 (thin space).',
+    'Add \u00be cup, then \u00bd tsp; total \u2248 1\u00bc cups.',
+    'Chapter XLIV starts on page ix; version v2.0.0 follows v1.12.9.',
+    'Dose: 5 mg vs 5 \u03bcg\u2014don\'t confuse micrograms with milligrams.',
+    'Avogadro\'s number is 6.022e23; \u03c0 \u2248 3.14159; \u221a2 \u2248 1.4142.',
+    'Temperature dropped to \u221210 \u00b0C (14 \u00b0F) with 90% RH.',
+    'Visit https://example.com/a/b?x=1&y=2#frag or email ops+alerts@example.org.',
+    'Open C:\\Program Files\\Project\\config.yaml or /usr/local/bin/run.sh.',
+    '.NET, Node.js, C#, C++17, and Rust\'s crate\u2011names\u2011with\u2011hyphens.',
+    '"WYSIWYG," "GIF" (hard or soft g?), "SQL" (sequel or S\u2011Q\u2011L?).',
+    'I will present the present to the lead singer who stepped on the lead.',
+    'They desert the desert; the dove dove; he wound the wound.',
+    'Please record the record before the minute is up in a minute.',
+    'She sells seashells by the seashore; truly Irish wristwatch.',
+    'Unique New York, toy boat, red leather yellow leather.',
+    'A na\u00efve co\u00f6perative fa\u00e7ade in S\u00e3o Paulo; \u0141\u00f3d\u017a and Krak\u00f3w in Poland.',
+    'Pi\u00f1ata, jalape\u00f1o, cr\u00e8me br\u00fbl\u00e9e, bouillabaisse, d\u00e9j\u00e0 vu.',
+    '\U0001f44d\U0001f3fb is a thumbs\u2011up with light skin tone; \U0001f9d1\u200d\U0001f4bb writes code; \U0001f468\u200d\U0001f469\u200d\U0001f467\u200d\U0001f466 is a family; \U0001f1f5\U0001f1f1 is a flag.',
+    'Faces: \U0001f642\U0001f609\U0001f610\U0001f611\U0001f636; hearts: \u2764\ufe0f\U0001f9e1\U0001f49b\U0001f49a\U0001f499; mixed: \U0001f937\u200d\u2642\ufe0f\U0001f926\u200d\u2640\ufe0f.',
+    'Latin "A" vs Cyrillic "\u0410"; Greek "\u03c1" vs Latin "p"; micro "\u00b5" vs Greek "\u03bc".',
+    '\u05e9\u05dc\u05d5\u05dd and \u0645\u0631\u062d\u0628\u064b\u0627 appear with left\u2011to\u2011right text in one line.',
+    'Prosody markers: \u02c8primary, \u02ccsecondary, and length \u02d0 are tricky for tokenizers.',
+    'Arrows for intonation: \u2197 rising, \u2198 falling, \u2193 drop.',
+    'He said, "She replied, \'no\u2014never\u2026\'," then left\u2014silently.',
+    'Parentheticals (like this\u2014really!) and em\u2011dashes\u2014here\u2014confuse prosody.',
+    'Let f(x)=x^2; then d/dx x^2=2x; \u2202/\u2202x is the operator.',
+    'Inline code x += 1; and TeX E=mc^2 should be read clearly.',
+    'N,N\u2011Diethyl\u2011meta\u2011toluamide (DEET) differs from p\u2011xylene and m\u2011cresol.',
+    'The RFC 7231/HTTP\u2011semantics "GET" vs "HEAD" distinction matters.',
+    'Read "macOS" vs "Mac OS", "iOS", "SQL", "URL", and "S3" correctly.',
+]
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Send TTS test strings to an OpenAI-compatible speech endpoint."
+    )
+    parser.add_argument(
+        "--endpoint", required=True,
+        help="Base URL of the API (e.g. http://localhost:8000/v3)"
+    )
+    parser.add_argument(
+        "--model", required=True,
+        help="Model name to use for speech generation"
+    )
+    parser.add_argument(
+        "--voice", default=None,
+        help="Voice name (default: voice1)"
+    )
+    parser.add_argument(
+        "--output-dir", default="tts_output",
+        help="Directory to save output WAV files (default: tts_output)"
+    )
+    args = parser.parse_args()
+
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    client = OpenAI(base_url=args.endpoint, api_key="unused")
+
+    total = len(TEST_STRINGS)
+    print(f"Sending {total} test strings to {args.endpoint} (model={args.model}, voice={args.voice})")
+    print(f"Output directory: {args.output_dir}\n")
+
+    succeeded = 0
+    failed = 0
+    total_size_kb = 0.0
+    t_start = time.time()
+
+    for idx, text in enumerate(TEST_STRINGS, start=1):
+        preview = text[:80] + ("..." if len(text) > 80 else "")
+        print(f"[{idx:2d}/{total}] {preview}")
+
+        out_path = os.path.join(args.output_dir, f"{idx:02d}.wav")
+        t0 = time.time()
+        try:
+            response = client.audio.speech.create(
+                model=args.model,
+                voice=args.voice,
+                input=text,
+            )
+            response.write_to_file(out_path)
+            elapsed = time.time() - t0
+            size_kb = os.path.getsize(out_path) / 1024
+            total_size_kb += size_kb
+            succeeded += 1
+            print(f"        -> {out_path}  ({size_kb:.1f} KB, {elapsed:.2f}s)")
+        except Exception as exc:
+            elapsed = time.time() - t0
+            failed += 1
+            print(f"        !! FAILED after {elapsed:.2f}s: {exc}", file=sys.stderr)
+
+    total_elapsed = time.time() - t_start
+    print(f"\n{'='*60}")
+    print(f"Summary: {succeeded} succeeded, {failed} failed out of {total}")
+    print(f"Total time: {total_elapsed:.2f}s  (avg {total_elapsed/total:.2f}s per string)")
+    print(f"Total audio size: {total_size_kb:.1f} KB")
+    print(f"{'='*60}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/BUILD b/src/BUILD
index 71321ca7ee..0318099727 100644
--- a/src/BUILD
+++ b/src/BUILD
@@ -563,6 +563,7 @@ ovms_cc_library(
                 "//src/image_gen:image_gen_calculator",
                 "//src/audio/speech_to_text:s2t_calculator",
                 "//src/audio/text_to_speech:t2s_calculator",
+                "//src/audio/kokoro:kokoro_calculator",
                 "//src/audio:audio_utils",
                 "//src/image_gen:imagegen_init",
                 "//src/llm:openai_completions_api_handler",
diff --git a/src/audio/audio_utils.cpp b/src/audio/audio_utils.cpp
index 77b38e70df..59668be23f 100644
--- a/src/audio/audio_utils.cpp
+++ b/src/audio/audio_utils.cpp
@@ -22,6 +22,7 @@
 #include "src/logging.hpp"
 #include <string>
 #include <vector>
+#include <cmath>
 #include <random>
 #include <algorithm>
 #pragma warning(push)
@@ -188,3 +189,33 @@ void prepareAudioOutput(void** ppData, size_t& pDataSize, uint16_t bitsPerSample
     auto outputPreparationTime = (timer.elapsed<std::chrono::microseconds>(OUTPUT_PREPARATION)) / 1000;
     SPDLOG_LOGGER_DEBUG(t2s_calculator_logger, "Output preparation time: {} ms", outputPreparationTime);
 }
+
+void prepareAudioOutputKokoro(void** ppData, size_t& pDataSize, size_t speechSize, const float* waveformPtr) {
+    enum : unsigned int {
+        OUTPUT_PREPARATION,
+        TIMER_END
+    };
+    Timer<TIMER_END> timer;
+    timer.start(OUTPUT_PREPARATION);
+
+    drwav_data_format format;
+    format.container = drwav_container_riff;
+    format.format = DR_WAVE_FORMAT_IEEE_FLOAT;
+    format.channels = 1;
+    format.sampleRate = 24000;  // Kokoro native sample rate
+    format.bitsPerSample = 32;
+    drwav wav;
+
+    auto status = drwav_init_memory_write(&wav, ppData, &pDataSize, &format, nullptr);
+    if (status == DRWAV_FALSE) {
+        throw std::runtime_error("Failed to initialize WAV writer");
+    }
+    drwav_uint64 framesWritten = drwav_write_pcm_frames(&wav, speechSize, waveformPtr);
+    if (framesWritten != speechSize) {
+        throw std::runtime_error("Failed to write all frames");
+    }
+    drwav_uninit(&wav);
+    timer.stop(OUTPUT_PREPARATION);
+    auto outputPreparationTime = (timer.elapsed<std::chrono::microseconds>(OUTPUT_PREPARATION)) / 1000;
+    SPDLOG_LOGGER_DEBUG(t2s_calculator_logger, "Output preparation time: {} ms", outputPreparationTime);
+}
diff --git a/src/audio/audio_utils.hpp b/src/audio/audio_utils.hpp
index cbeea8b457..0928d03f3d 100644
--- a/src/audio/audio_utils.hpp
+++ b/src/audio/audio_utils.hpp
@@ -25,3 +25,4 @@ bool isWavBuffer(const std::string buf);
 std::vector<float> readWav(const std::string_view& wavData);
 std::vector<float> readMp3(const std::string_view& mp3Data);
 void prepareAudioOutput(void** ppData, size_t& pDataSize, uint16_t bitsPerSample, size_t speechSize, const float* waveformPtr);
+void prepareAudioOutputKokoro(void** ppData, size_t& pDataSize, size_t speechSize, const float* waveformPtr);
diff --git a/src/audio/kokoro/BUILD b/src/audio/kokoro/BUILD
new file mode 100644
index 0000000000..d7d3b64b1a
--- /dev/null
+++ b/src/audio/kokoro/BUILD
@@ -0,0 +1,60 @@
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+load("@mediapipe//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library", "mediapipe_proto_library")
+load("//:common_settings.bzl", "ovms_cc_library")
+
+ovms_cc_library(
+    name = "kokoro_servable",
+    hdrs = ["kokoro_servable.hpp"],
+    deps= ["//third_party:openvino",
+    "//src:libovms_ovinferrequestsqueue",
+    ],
+    visibility = ["//visibility:public"],
+    alwayslink = 1,
+)
+
+ovms_cc_library(
+    name = "kokoro_calculator",
+    srcs = ["kokoro_calculator.cc"],
+    deps = [
+        "@mediapipe//mediapipe/framework:calculator_framework",
+        "//src:httppayload",
+        "//src:libovmslogging",
+        "kokoro_calculator_cc_proto",
+        "//src/port:dr_audio",
+        "//src/port:rapidjson_stringbuffer",
+        "//src/port:rapidjson_writer",
+        ":kokoro_servable",
+        "//third_party:genai",
+        "//src/audio:audio_utils",
+        "//src:executingstreamidguard",
+        "//src:model_metric_reporter",
+        "//third_party/espeak_ng:espeak_ng",
+    ],
+    visibility = ["//visibility:public"],
+    alwayslink = 1,
+)
+
+mediapipe_proto_library(
+    name = "kokoro_calculator_proto",
+    srcs = ["kokoro_calculator.proto"],
+    visibility = ["//visibility:private"],
+    deps = [
+        "@mediapipe//mediapipe/framework:calculator_options_proto",
+        "@mediapipe//mediapipe/framework:calculator_proto",
+    ],
+)
diff --git a/src/audio/kokoro/kokoro_calculator.cc b/src/audio/kokoro/kokoro_calculator.cc
new file mode 100644
index 0000000000..728e0f88b4
--- /dev/null
+++ b/src/audio/kokoro/kokoro_calculator.cc
@@ -0,0 +1,433 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include <algorithm>
+#include <mutex>
+#include <string>
+#include <vector>
+
+#pragma warning(push)
+#pragma warning(disable : 4005 4309 6001 6385 6386 6326 6011 6246 4456 6246)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/port/canonical_errors.h"
+#pragma GCC diagnostic pop
+#pragma warning(pop)
+
+#include "src/audio/audio_utils.hpp"
+#include "src/http_payload.hpp"
+#include "src/logging.hpp"
+#include "src/port/dr_audio.hpp"
+
+#include "../../model_metric_reporter.hpp"
+#include "../../executingstreamidguard.hpp"
+
+#pragma warning(push)
+#pragma warning(disable : 6001 4324 6385 6386)
+#include "absl/strings/str_cat.h"
+#pragma warning(pop)
+
+#include <espeak-ng/speak_lib.h>
+
+#include "kokoro_servable.hpp"
+
+#ifdef _WIN32
+#include <fcntl.h>
+#include <io.h>
+#endif
+
+using namespace ovms;
+
+namespace {
+
+#ifndef espeakPHONEMES_IPA
+#define espeakPHONEMES_IPA 0x02
+#endif
+#ifndef espeakPHONEMES_NO_STRESS
+#define espeakPHONEMES_NO_STRESS 0x08
+#endif
+
+std::string retone(const std::string& p) {
+    std::string result = p;
+    
+    auto replaceAll = [](std::string& s, const std::string& from, const std::string& to) {
+        size_t pos = 0;
+        while ((pos = s.find(from, pos)) != std::string::npos) {
+            s.replace(pos, from.size(), to);
+            pos += to.size();
+        }
+    };
+    
+    // Tone mark replacements
+    replaceAll(result, "˧˩˧", "↓");  // third tone
+    replaceAll(result, "˧˥", "↗");   // second tone
+    replaceAll(result, "˥˩", "↘");   // fourth tone
+    replaceAll(result, "˥", "→");    // first tone
+    
+    // Unicode character replacements (UTF-8 encoded)
+    replaceAll(result, "\xCA\x97\xCC\x89", "ɨ");  // chr(635)+chr(809)
+    replaceAll(result, "\xCA\x91\xCC\x89", "ɨ");  // chr(633)+chr(809)
+    
+    // Verify chr(809) removed
+    if (result.find("\xCC\x89") != std::string::npos) {
+        SPDLOG_WARN("Combining diacritic (chr 809) still present: {}", result);
+    }
+    
+    return result;
+}
+
+std::string getEspeakVoice(const std::string& isoLanguageCode) {
+    // ISO 639-1 codes with optional region codes
+    if (isoLanguageCode == "en-us") {
+        return "en-us";  // American English (default for 'en')
+    } else if (isoLanguageCode == "en-gb") {
+        return "en";     // British English
+    } else if (isoLanguageCode == "en") {
+        return "en-us";  // Default to American English when only 'en' specified
+    } else if (isoLanguageCode == "es") {
+        return "es";
+    } else if (isoLanguageCode == "fr") {
+        return "fr";
+    } else if (isoLanguageCode == "hi") {
+        return "hi";
+    } else if (isoLanguageCode == "it") {
+        return "it";
+    } else if (isoLanguageCode == "ja") {
+        return "ja";
+    } else if (isoLanguageCode == "pt-br") {
+        return "pt";     // Brazilian Portuguese
+    } else if (isoLanguageCode == "zh" || isoLanguageCode == "zh-cn") {
+        return "cmn-latn-pinyin";    // Mandarin Chinese
+    }
+    return "";  // Unsupported
+}
+
+bool isSupportedLanguage(const std::string& isoLanguageCode) {
+    // Only accept ISO 639-1 codes and regional variants
+    return !getEspeakVoice(isoLanguageCode).empty();
+}
+
+void espeakPhonemizeAll(const std::string& textUtf8, std::string& outIpa, const std::string& language = "en", bool noStress = true) {
+    outIpa.clear();
+    auto& espeak = ovms::EspeakInstance::instance();
+    if (!espeak.isReady()) {
+        SPDLOG_ERROR("eSpeak not initialized");
+        return;
+    }
+
+    std::lock_guard<std::mutex> guard(espeak.mutex());
+
+    // Get the eSpeak voice name from the ISO language code
+    // Kokoro supports 9 languages: American English, British English, Spanish, French, Hindi, Italian, Japanese, Brazilian Portuguese, Mandarin Chinese
+    std::string voiceName = getEspeakVoice(language);
+    if (voiceName.empty()) {
+        // This should not happen if validation was done, but fallback just in case
+        SPDLOG_ERROR("Invalid language code '{}' passed to espeakPhonemizeAll", language);
+        voiceName = "en-us";
+    }
+    if (espeak_SetVoiceByName(voiceName.c_str()) != EE_OK) {
+        SPDLOG_ERROR("Failed to set eSpeak voice '{}'", voiceName);
+        if (voiceName != "en-us" && espeak_SetVoiceByName("en-us") == EE_OK) {
+            voiceName = "en-us";
+        } else {
+            return;
+        }
+    }
+
+    const int mode = espeakPHONEMES_IPA | (noStress ? espeakPHONEMES_NO_STRESS : 0);
+    const void* pos = static_cast<const void*>(textUtf8.c_str());
+    const char* endPtr = static_cast<const char*>(pos) + textUtf8.size();
+    std::string rawIpa;
+
+    while (pos && static_cast<const char*>(pos) < endPtr) {
+        const char* ipaChunk = espeak_TextToPhonemes(&pos, espeakCHARS_UTF8, mode);
+        if (ipaChunk && *ipaChunk) {
+            if (!rawIpa.empty()) {
+                rawIpa.push_back(' ');
+            }
+            rawIpa.append(ipaChunk);
+        }
+    }
+
+    // Strip combining diacriticals (U+0300..U+036F) and collapse spaces
+    std::string cleaned;
+    cleaned.reserve(rawIpa.size());
+    for (size_t i = 0; i < rawIpa.size(); ++i) {
+        unsigned char c = static_cast<unsigned char>(rawIpa[i]);
+        if (i + 1 < rawIpa.size()) {
+            unsigned char next = static_cast<unsigned char>(rawIpa[i + 1]);
+            if ((c == 0xCC && next >= 0x80) || (c == 0xCD && next <= 0xAF)) {
+                i++;
+                continue;
+            }
+        }
+        cleaned.push_back(c);
+    }
+
+    outIpa.reserve(cleaned.size());
+    bool lastSpace = false;
+    for (char c : cleaned) {
+        if (std::isspace(static_cast<unsigned char>(c))) {
+            if (!lastSpace) {
+                outIpa.push_back(' ');
+                lastSpace = true;
+            }
+        } else {
+            outIpa.push_back(c);
+            lastSpace = false;
+        }
+    }
+
+    if (!outIpa.empty() && std::isspace(static_cast<unsigned char>(outIpa.back()))) {
+        outIpa.pop_back();
+    }
+
+    SPDLOG_DEBUG("IPA phonemes: '{}' (length: {})", outIpa, outIpa.size());
+}
+
+
+size_t utf8CharLen(unsigned char lead) {
+    if (lead < 0x80)
+        return 1;
+    if ((lead >> 5) == 0x6)
+        return 2;
+    if ((lead >> 4) == 0xE)
+        return 3;
+    if ((lead >> 3) == 0x1E)
+        return 4;
+    return 1;
+}
+
+void tokenize(const std::string& textUtf8,
+    std::vector<int64_t>& tokenIds,
+    const ovms::VocabIndex& ix,
+    const std::string& language = "en") {
+    tokenIds.clear();
+    // Reserve estimated capacity to avoid reallocations
+    tokenIds.reserve(textUtf8.size() / 2);
+    
+    size_t pos = 0;
+    const size_t n = textUtf8.size();
+    size_t unknownCount = 0;
+
+    while (pos < n) {
+        size_t maxTry = std::min(ix.max_token_bytes, n - pos);
+        int foundId = -1;
+        size_t foundLen = 0;
+
+        for (size_t len = maxTry; len > 0; --len) {
+            auto it = ix.by_token.find(std::string(textUtf8.data() + pos, len));
+            if (it != ix.by_token.end()) {
+                foundId = it->second;
+                foundLen = len;
+                break;
+            }
+        }
+
+        if (foundId >= 0) {
+            tokenIds.push_back(foundId);
+            pos += foundLen;
+        } else {
+            const unsigned char lead = static_cast<unsigned char>(textUtf8[pos]);
+            const size_t adv = utf8CharLen(lead);
+            std::string unknownBytes(textUtf8.data() + pos, std::min(adv, n - pos));
+            unknownCount++;
+            SPDLOG_DEBUG("Tokenizer [lang={}]: unknown phoneme at pos {}: '{}' (skipping)",
+                language, pos, unknownBytes);
+            pos += std::min(adv, n - pos);
+        }
+    }
+    if (unknownCount > 0) {
+        SPDLOG_WARN("Tokenize [lang={}]: {} unknown phonemes found. Produced {} token ids. "
+                    "Consider updating vocabulary for better {} speech quality.",
+                    language, unknownCount, tokenIds.size(), language);
+    } else {
+        SPDLOG_DEBUG("Tokenize [lang={}]: produced {} ids without unknown phonemes", language, tokenIds.size());
+    }
+}
+}  // namespace
+
+namespace mediapipe {
+
+const std::string KOKORO_SESSION_SIDE_PACKET_TAG = "KOKORO_NODE_RESOURCES";
+
+class KokoroCalculator : public CalculatorBase {
+    static const std::string INPUT_TAG_NAME;
+    static const std::string OUTPUT_TAG_NAME;
+    std::string defaultLanguage;  // Language configured in graph pbtxt
+
+public:
+    static absl::Status GetContract(CalculatorContract* cc) {
+        RET_CHECK(!cc->Inputs().GetTags().empty());
+        RET_CHECK(!cc->Outputs().GetTags().empty());
+        cc->Inputs().Tag(INPUT_TAG_NAME).Set<ovms::HttpPayload>();
+        cc->InputSidePackets().Tag(KOKORO_SESSION_SIDE_PACKET_TAG).Set<KokoroServableMap>();
+        cc->Outputs().Tag(OUTPUT_TAG_NAME).Set<std::string>();
+        return absl::OkStatus();
+    }
+
+    absl::Status Close(CalculatorContext* cc) final {
+        SPDLOG_LOGGER_DEBUG(kokoro_calculator_logger, "KokoroCalculator [Node: {}] Close", cc->NodeName());
+        return absl::OkStatus();
+    }
+
+    absl::Status Open(CalculatorContext* cc) final {
+        SPDLOG_LOGGER_DEBUG(kokoro_calculator_logger, "KokoroCalculator [Node: {}] Open", cc->NodeName());
+        
+        // Read language from graph configuration
+        const auto& options = cc->Options<KokoroCalculatorOptions>();
+        this->defaultLanguage = options.has_language() ? options.language() : "en";
+        
+        // Normalize language code to lowercase
+        std::transform(this->defaultLanguage.begin(), this->defaultLanguage.end(), this->defaultLanguage.begin(), ::tolower);
+        
+        // Validate language is supported
+        if (!isSupportedLanguage(this->defaultLanguage)) {
+            return absl::InvalidArgumentError(absl::StrCat(
+                "Invalid language in graph config: '", this->defaultLanguage, "'. ",
+                "Supported ISO 639-1 language codes: en, es, fr, hi, it, ja, pt-br, zh. ",
+                "Regional variants: en-us, en-gb, pt-br, zh-cn"
+            ));
+        }
+        
+        SPDLOG_LOGGER_DEBUG(kokoro_calculator_logger, 
+            "KokoroCalculator [Node: {}] configured for language: {}", 
+            cc->NodeName(), this->defaultLanguage);
+        
+        return absl::OkStatus();
+    }
+
+    absl::Status Process(CalculatorContext* cc) final {
+        SPDLOG_LOGGER_DEBUG(kokoro_calculator_logger, "KokoroCalculator [Node: {}] Process start", cc->NodeName());
+        try {
+            KokoroServableMap servablesMap = cc->InputSidePackets()
+                                                 .Tag(KOKORO_SESSION_SIDE_PACKET_TAG)
+                                                 .Get<KokoroServableMap>();
+            auto servableIt = servablesMap.find(cc->NodeName());
+            RET_CHECK(servableIt != servablesMap.end())
+                << "Could not find initialized Kokoro node named: " << cc->NodeName();
+            auto servable = servableIt->second;
+
+            const auto& payload = cc->Inputs().Tag(INPUT_TAG_NAME).Get<ovms::HttpPayload>();
+            auto it = payload.parsedJson->FindMember("input");
+            RET_CHECK(it != payload.parsedJson->MemberEnd()) << "Missing 'input' in request";
+            RET_CHECK(it->value.IsString()) << "'input' must be a string";
+            const std::string text = it->value.GetString();
+
+            // Read optional "voice" parameter (OpenAI TTS API)
+            std::string voiceName;
+            auto voiceIt = payload.parsedJson->FindMember("voice");
+            if (voiceIt != payload.parsedJson->MemberEnd() && voiceIt->value.IsString()) {
+                voiceName = voiceIt->value.GetString();
+            }
+
+            // Language is configured in the graph pbtxt, not from request
+            // Use the defaultLanguage set during Open()
+            const std::string language = this->defaultLanguage;
+            SPDLOG_DEBUG("Using configured language: {}", language);
+
+            // Text -> IPA phonemization
+            std::string phonemes;
+            
+            // Use eSpeak for all languages
+            espeakPhonemizeAll(text, phonemes, language, /*noStress=*/false);
+            if(language == "zh" || language == "zh-cn"){
+                phonemes = retone(phonemes);
+            }
+            
+            SPDLOG_DEBUG("Input text: '{}' (language: {}), IPA phonemes ({} chars): '{}'", text, language, phonemes.size(), phonemes);
+
+            // Preserve trailing punctuation from original text (eSpeak strips it)
+            // if (!text.empty()) {
+            //     char last = text.back();
+            //     if (last == '.' || last == '!' || last == '?' || last == ';' || last == ':' || last == ',') {
+            //         phonemes.push_back(last);
+            //     }
+            // }
+            SPDLOG_DEBUG("After E2M mapping ({} chars): '{}'", phonemes.size(), phonemes);
+            // IPA -> Kokoro token IDs
+            const auto& vocabIx = servable->getVocabIndex();
+            std::vector<int64_t> tokenIds;
+            tokenize(phonemes, tokenIds, vocabIx, language);
+
+            // Wrap with PAD token (id=0) at both ends — matches official
+            // forward_with_tokens: input_ids = [[0, *tokens, 0]]
+            tokenIds.insert(tokenIds.begin(), 0);
+            tokenIds.push_back(0);
+
+            // Voice embedding — select slice from voice pack based on content token count
+            size_t numContentTokens = tokenIds.size() >= 2 ? tokenIds.size() - 2 : 0;  // exclude BOS pad + EOS
+            const float* voiceSlice = servable->getVoiceSlice(voiceName, numContentTokens);
+            RET_CHECK(voiceSlice != nullptr) << "No voice pack loaded (place .bin files in <model_dir>/voices/)";
+
+            auto inputIdsTensor = ov::Tensor{ov::element::i64, ov::Shape{1, tokenIds.size()}};
+            auto refS = ov::Tensor{ov::element::f32, ov::Shape{1, KokoroServable::STYLE_DIM}};
+            auto speed = ov::Tensor{ov::element::f32, ov::Shape{1}};
+
+            *reinterpret_cast<float*>(speed.data()) = 1.0f;
+            std::copy(tokenIds.data(), tokenIds.data() + tokenIds.size(),
+                reinterpret_cast<int64_t*>(inputIdsTensor.data()));
+            std::copy(voiceSlice, voiceSlice + KokoroServable::STYLE_DIM,
+                reinterpret_cast<float*>(refS.data()));
+
+            // Inference
+            ModelMetricReporter unused(nullptr, nullptr, "unused", 1);
+            auto executingStreamIdGuard =
+                std::make_unique<ExecutingStreamIdGuard>(servable->getInferRequestsQueue(), unused);
+            ov::InferRequest& inferRequest = executingStreamIdGuard->getInferRequest();
+
+            inferRequest.set_tensor("input_ids", inputIdsTensor);
+            inferRequest.set_tensor("103", refS);
+            inferRequest.set_tensor("speed", speed);
+            inferRequest.start_async();
+            inferRequest.wait();
+
+            // Collect audio output
+            auto out = inferRequest.get_tensor(inferRequest.get_compiled_model().outputs()[0]);
+            RET_CHECK(out.get_shape().size() == 1);
+            RET_CHECK(out.get_element_type() == ov::element::f32);
+            const size_t samples = out.get_shape()[0];
+            const float* data = out.data<float>();
+
+            SPDLOG_DEBUG("Model output: {} audio samples ({:.2f}s at 24kHz)",
+                samples, static_cast<float>(samples) / 24000.0f);
+
+            void* wavDataPtr = nullptr;
+            size_t wavSize = 0;
+            prepareAudioOutputKokoro(&wavDataPtr, wavSize, samples, data);
+
+            auto output = std::make_unique<std::string>(reinterpret_cast<char*>(wavDataPtr), wavSize);
+            drwav_free(wavDataPtr, NULL);
+
+            cc->Outputs().Tag(OUTPUT_TAG_NAME).Add(output.release(), cc->InputTimestamp());
+        } catch (const std::exception& e) {
+            SPDLOG_ERROR("KokoroCalculator [Node: {}] Process failed: {}", cc->NodeName(), e.what());
+            return absl::InvalidArgumentError(e.what());
+        } catch (...) {
+            SPDLOG_ERROR("KokoroCalculator [Node: {}] Process failed: unknown error", cc->NodeName());
+            return absl::InvalidArgumentError("Kokoro processing failed");
+        }
+        SPDLOG_LOGGER_DEBUG(kokoro_calculator_logger, "KokoroCalculator [Node: {}] Process end", cc->NodeName());
+        return absl::OkStatus();
+    }
+};
+
+const std::string KokoroCalculator::INPUT_TAG_NAME{"HTTP_REQUEST_PAYLOAD"};
+const std::string KokoroCalculator::OUTPUT_TAG_NAME{"HTTP_RESPONSE_PAYLOAD"};
+
+REGISTER_CALCULATOR(KokoroCalculator);
+
+}  // namespace mediapipe
diff --git a/src/audio/kokoro/kokoro_calculator.proto b/src/audio/kokoro/kokoro_calculator.proto
new file mode 100644
index 0000000000..8ec0f43341
--- /dev/null
+++ b/src/audio/kokoro/kokoro_calculator.proto
@@ -0,0 +1,34 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+syntax = "proto2";
+package mediapipe;
+
+import "mediapipe/framework/calculator.proto";
+
+
+message KokoroCalculatorOptions {
+  extend mediapipe.CalculatorOptions {
+    // https://github.com/google/mediapipe/issues/634 have to be unique in app
+    // no rule to obtain this
+    optional KokoroCalculatorOptions ext = 116423799;
+    }
+
+    required string models_path = 1;
+    optional string target_device = 2;
+    optional string plugin_config = 3;
+    optional string language = 4;  // ISO 639-1 language code (en, es, fr, hi, it, ja, pt-br, zh)
+}
diff --git a/src/audio/kokoro/kokoro_servable.hpp b/src/audio/kokoro/kokoro_servable.hpp
new file mode 100644
index 0000000000..9a81f8f527
--- /dev/null
+++ b/src/audio/kokoro/kokoro_servable.hpp
@@ -0,0 +1,298 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#pragma warning(push)
+#pragma warning(disable : 4005 4309 6001 6385 6386 6326 6011 4005 4456 6246)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#include "mediapipe/framework/calculator_graph.h"
+#pragma GCC diagnostic pop
+#pragma warning(pop)
+
+#include "openvino/runtime/core.hpp"
+#include "../../ovinferrequestsqueue.hpp"
+
+#include <espeak-ng/speak_lib.h>
+#include <rapidjson/document.h>
+
+#include "src/audio/kokoro/kokoro_calculator.pb.h"
+#include "src/logging.hpp"
+
+namespace ovms {
+
+struct VocabIndex {
+    std::unordered_map<std::string, int> by_token;
+    size_t max_token_bytes = 1;
+};
+
+struct VoicePack {
+    std::vector<float> data;  // flat [numEntries * STYLE_DIM]
+    size_t numEntries = 0;
+};
+
+class EspeakInstance {
+public:
+    static EspeakInstance& instance() {
+        static EspeakInstance inst;
+        return inst;
+    }
+
+    bool isReady() const { return ready_; }
+    std::mutex& mutex() { return mutex_; }
+
+private:
+    EspeakInstance() {
+        ready_ = tryInit();
+        if (!ready_) {
+            SPDLOG_ERROR("eSpeak-NG initialization failed (data path or voice not found)");
+        } else {
+            SPDLOG_INFO("eSpeak-NG initialized successfully");
+        }
+    }
+
+    ~EspeakInstance() {
+        if (ready_) {
+            espeak_Terminate();
+        }
+    }
+
+    EspeakInstance(const EspeakInstance&) = delete;
+    EspeakInstance& operator=(const EspeakInstance&) = delete;
+
+    bool tryInit() {
+        auto try_path = [](const char* path) -> bool {
+            int sr = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS,
+                0, path,
+                espeakINITIALIZE_DONT_EXIT);
+            if (sr <= 0)
+                return false;
+            // Try to initialize with Kokoro's supported language voices
+            // Kokoro supports: en-us (American English), en (British English), es (Spanish), fr (French), hi (Hindi), it (Italian), ja (Japanese), pt (Brazilian Portuguese), cmn (Mandarin Chinese)
+            if (espeak_SetVoiceByName("en-us") != EE_OK &&
+                espeak_SetVoiceByName("en") != EE_OK &&
+                espeak_SetVoiceByName("es") != EE_OK &&
+                espeak_SetVoiceByName("fr") != EE_OK &&
+                espeak_SetVoiceByName("hi") != EE_OK &&
+                espeak_SetVoiceByName("it") != EE_OK &&
+                espeak_SetVoiceByName("ja") != EE_OK &&
+                espeak_SetVoiceByName("pt") != EE_OK &&
+                espeak_SetVoiceByName("cmn") != EE_OK) {
+                return false;
+            }
+            return true;
+        };
+
+        if (try_path(nullptr))
+            return true;
+
+        static const char* ngPaths[] = {
+            "/usr/share/espeak-ng-data",
+            "/opt/homebrew/share/espeak-ng-data",
+            "/usr/local/share/espeak-ng-data",
+            "espeak-ng-data",
+            nullptr};
+        for (int i = 0; ngPaths[i]; ++i)
+            if (try_path(ngPaths[i]))
+                return true;
+
+        static const char* esPaths[] = {
+            "/usr/share/espeak-data",
+            "/usr/local/share/espeak-data",
+            "espeak-data",
+            nullptr};
+        for (int i = 0; esPaths[i]; ++i)
+            if (try_path(esPaths[i]))
+                return true;
+
+        return false;
+    }
+
+    bool ready_ = false;
+    std::mutex mutex_;
+};
+
+struct KokoroServable {
+    static constexpr size_t STYLE_DIM = 256;
+
+    std::filesystem::path parsedModelsPath;
+    std::shared_ptr<ov::Model> model;
+    ov::CompiledModel compiledModel;
+    std::unique_ptr<OVInferRequestsQueue> inferRequestsQueue;
+    VocabIndex vocabIndex;
+    std::unordered_map<std::string, VoicePack> voicePacks;
+    std::string defaultVoiceName;
+
+    KokoroServable(const std::string& modelDir, const std::string& targetDevice, const std::string& graphPath) {
+        EspeakInstance::instance();
+
+        auto fsModelsPath = std::filesystem::path(modelDir);
+        if (fsModelsPath.is_relative()) {
+            parsedModelsPath = (std::filesystem::path(graphPath) / fsModelsPath);
+        } else {
+            parsedModelsPath = fsModelsPath;
+        }
+
+        vocabIndex = loadVocabFromConfig(parsedModelsPath);
+        loadVoicePacks(parsedModelsPath);
+
+        ov::AnyMap properties = {
+            // Use ACCURACY execution mode to avoid fast-math approximation errors
+            // that accumulate in the deep decoder network and cause energy fade.
+            ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY),
+        };
+        //properties["INFERENCE_PRECISION_HINT"] = "f32";
+        ov::Core core;
+        auto m_model = core.read_model(parsedModelsPath / std::filesystem::path("openvino_model.xml"), {}, properties);
+        compiledModel = core.compile_model(m_model, targetDevice, properties);
+        uint32_t numberOfParallelInferRequests = 1;
+        try {
+            numberOfParallelInferRequests = compiledModel.get_property(ov::optimal_number_of_infer_requests);
+        } catch (const ov::Exception& ex) {
+            SPDLOG_WARN("Failed to query OPTIMAL_NUMBER_OF_INFER_REQUESTS with error {}. Using 1 nireq.", ex.what());
+            numberOfParallelInferRequests = 1u;
+        }
+        inferRequestsQueue = std::make_unique<OVInferRequestsQueue>(compiledModel, numberOfParallelInferRequests);
+    }
+
+    OVInferRequestsQueue& getInferRequestsQueue() {
+        return *inferRequestsQueue;
+    }
+
+    const VocabIndex& getVocabIndex() const {
+        return vocabIndex;
+    }
+
+    // Returns pointer to 256 floats for the given voice and token count.
+    // voiceName: requested voice (e.g. "af_alloy"). Falls back to default voice if not found.
+    // numContentTokens: number of token IDs excluding BOS/EOS padding.
+    const float* getVoiceSlice(const std::string& voiceName, size_t numContentTokens) const {
+        auto it = voicePacks.find(voiceName);
+        if (it == voicePacks.end()) {
+            it = voicePacks.find(defaultVoiceName);
+            if (it == voicePacks.end()) {
+                return nullptr;
+            }
+        }
+        const auto& pack = it->second;
+        size_t idx = std::min(numContentTokens, pack.numEntries - 1);
+        return pack.data.data() + (idx * STYLE_DIM);
+    }
+
+    bool hasVoice(const std::string& voiceName) const {
+        return voicePacks.count(voiceName) > 0;
+    }
+
+    const std::string& getDefaultVoiceName() const {
+        return defaultVoiceName;
+    }
+
+private:
+    static VocabIndex loadVocabFromConfig(const std::filesystem::path& modelDir) {
+        VocabIndex ix;
+        auto configPath = modelDir / "config.json";
+        std::ifstream ifs(configPath);
+        if (!ifs.is_open()) {
+            SPDLOG_ERROR("Failed to open Kokoro config: {}", configPath.string());
+            return ix;
+        }
+
+        std::stringstream buffer;
+        buffer << ifs.rdbuf();
+        std::string jsonStr = buffer.str();
+
+        rapidjson::Document doc;
+        doc.Parse(jsonStr.c_str());
+        if (doc.HasParseError()) {
+            SPDLOG_ERROR("Failed to parse Kokoro config JSON: {}", configPath.string());
+            return ix;
+        }
+
+        if (!doc.HasMember("vocab") || !doc["vocab"].IsObject()) {
+            SPDLOG_ERROR("Kokoro config missing 'vocab' object: {}", configPath.string());
+            return ix;
+        }
+
+        const auto& vocab = doc["vocab"];
+        ix.by_token.reserve(vocab.MemberCount());
+        for (auto it = vocab.MemberBegin(); it != vocab.MemberEnd(); ++it) {
+            if (!it->name.IsString() || !it->value.IsInt())
+                continue;
+            std::string token = it->name.GetString();
+            int id = it->value.GetInt();
+            ix.by_token.emplace(token, id);
+            ix.max_token_bytes = std::max(ix.max_token_bytes, token.size());
+        }
+
+        SPDLOG_INFO("Loaded Kokoro vocabulary: {} tokens, max_token_bytes={}",
+            ix.by_token.size(), ix.max_token_bytes);
+        return ix;
+    }
+
+    void loadVoicePacks(const std::filesystem::path& modelDir) {
+        auto voicesDir = modelDir / "voices";
+        if (!std::filesystem::exists(voicesDir) || !std::filesystem::is_directory(voicesDir)) {
+            SPDLOG_WARN("No voices directory found at: {}", voicesDir.string());
+            return;
+        }
+
+        for (const auto& entry : std::filesystem::directory_iterator(voicesDir)) {
+            if (!entry.is_regular_file() || entry.path().extension() != ".bin")
+                continue;
+
+            std::string name = entry.path().stem().string();
+            auto fileSize = std::filesystem::file_size(entry.path());
+            if (fileSize == 0 || fileSize % (STYLE_DIM * sizeof(float)) != 0) {
+                SPDLOG_ERROR("Voice file {} has invalid size {} (must be multiple of {})",
+                    entry.path().string(), fileSize, STYLE_DIM * sizeof(float));
+                continue;
+            }
+
+            VoicePack pack;
+            pack.numEntries = fileSize / (STYLE_DIM * sizeof(float));
+            pack.data.resize(pack.numEntries * STYLE_DIM);
+
+            std::ifstream ifs(entry.path(), std::ios::binary);
+            if (!ifs.read(reinterpret_cast<char*>(pack.data.data()), fileSize)) {
+                SPDLOG_ERROR("Failed to read voice file: {}", entry.path().string());
+                continue;
+            }
+
+            SPDLOG_INFO("Loaded voice pack '{}': {} entries x {} dims from {}",
+                name, pack.numEntries, STYLE_DIM, entry.path().string());
+
+            if (defaultVoiceName.empty()) {
+                defaultVoiceName = name;
+            }
+            voicePacks.emplace(name, std::move(pack));
+        }
+
+        SPDLOG_INFO("Loaded {} voice pack(s), default: '{}'", voicePacks.size(), defaultVoiceName);
+    }
+};
+
+using KokoroServableMap = std::unordered_map<std::string, std::shared_ptr<KokoroServable>>;
+}  // namespace ovms
diff --git a/src/logging.cpp b/src/logging.cpp
index e89fce9a07..9d058d82dc 100644
--- a/src/logging.cpp
+++ b/src/logging.cpp
@@ -35,6 +35,7 @@ std::shared_ptr<spdlog::logger> llm_executor_logger = std::make_shared<spdlog::l
 std::shared_ptr<spdlog::logger> llm_calculator_logger = std::make_shared<spdlog::logger>("llm_calculator");
 std::shared_ptr<spdlog::logger> s2t_calculator_logger = std::make_shared<spdlog::logger>("s2t_calculator");
 std::shared_ptr<spdlog::logger> t2s_calculator_logger = std::make_shared<spdlog::logger>("t2s_calculator");
+std::shared_ptr<spdlog::logger> kokoro_calculator_logger = std::make_shared<spdlog::logger>("kokoro_calculator");
 std::shared_ptr<spdlog::logger> embeddings_calculator_logger = std::make_shared<spdlog::logger>("embeddings_calculator");
 std::shared_ptr<spdlog::logger> rerank_calculator_logger = std::make_shared<spdlog::logger>("rerank_calculator");
 #endif
@@ -78,6 +79,7 @@ static void register_loggers(const std::string& log_level, std::vector<spdlog::s
     llm_calculator_logger->set_pattern(default_pattern);
     s2t_calculator_logger->set_pattern(default_pattern);
     t2s_calculator_logger->set_pattern(default_pattern);
+    kokoro_calculator_logger->set_pattern(default_pattern);
     rerank_calculator_logger->set_pattern(default_pattern);
     embeddings_calculator_logger->set_pattern(default_pattern);
 #endif
@@ -98,6 +100,7 @@ static void register_loggers(const std::string& log_level, std::vector<spdlog::s
         llm_calculator_logger->sinks().push_back(sink);
         s2t_calculator_logger->sinks().push_back(sink);
         t2s_calculator_logger->sinks().push_back(sink);
+        kokoro_calculator_logger->sinks().push_back(sink);
         rerank_calculator_logger->sinks().push_back(sink);
         embeddings_calculator_logger->sinks().push_back(sink);
 #endif
@@ -119,6 +122,7 @@ static void register_loggers(const std::string& log_level, std::vector<spdlog::s
     set_log_level(log_level, llm_calculator_logger);
     set_log_level(log_level, s2t_calculator_logger);
     set_log_level(log_level, t2s_calculator_logger);
+    set_log_level(log_level, kokoro_calculator_logger);
     set_log_level(log_level, rerank_calculator_logger);
     set_log_level(log_level, embeddings_calculator_logger);
 #endif
diff --git a/src/logging.hpp b/src/logging.hpp
index 011458fe49..bcbf987f30 100644
--- a/src/logging.hpp
+++ b/src/logging.hpp
@@ -38,6 +38,7 @@ extern std::shared_ptr<spdlog::logger> llm_executor_logger;
 extern std::shared_ptr<spdlog::logger> llm_calculator_logger;
 extern std::shared_ptr<spdlog::logger> s2t_calculator_logger;
 extern std::shared_ptr<spdlog::logger> t2s_calculator_logger;
+extern std::shared_ptr<spdlog::logger> kokoro_calculator_logger;
 extern std::shared_ptr<spdlog::logger> embeddings_calculator_logger;
 extern std::shared_ptr<spdlog::logger> rerank_calculator_logger;
 #endif
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp
index 9047765e75..e1436b5891 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.cpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp
@@ -63,6 +63,7 @@ const std::string MediapipeGraphDefinition::LLM_NODE_CALCULATOR_NAME{"LLMCalcula
 const std::string MediapipeGraphDefinition::IMAGE_GEN_CALCULATOR_NAME{"ImageGenCalculator"};
 const std::string MediapipeGraphDefinition::STT_NODE_CALCULATOR_NAME{"S2tCalculator"};
 const std::string MediapipeGraphDefinition::TTS_NODE_CALCULATOR_NAME{"T2sCalculator"};
+const std::string MediapipeGraphDefinition::KOKORO_NODE_CALCULATOR_NAME{"KokoroCalculator"};
 const std::string MediapipeGraphDefinition::EMBEDDINGS_NODE_CALCULATOR_NAME{"EmbeddingsCalculatorOV"};
 const std::string MediapipeGraphDefinition::RERANK_NODE_CALCULATOR_NAME{"RerankCalculatorOV"};
 
@@ -625,6 +626,28 @@ Status MediapipeGraphDefinition::initializeNodes() {
                 return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID;
             }
         }
+        if (endsWith(config.node(i).calculator(), KOKORO_NODE_CALCULATOR_NAME)) {
+            auto& kokoroServableMap = this->sidePacketMaps.kokoroServableMap;
+            ResourcesCleaningGuard<KokoroServableMap> kokoroServablesCleaningGuard(kokoroServableMap);
+            if (!config.node(i).node_options().size()) {
+                SPDLOG_LOGGER_ERROR(modelmanager_logger, "Kokoro node missing options in graph: {}. ", this->name);
+                return StatusCode::LLM_NODE_MISSING_OPTIONS;
+            }
+            if (config.node(i).name().empty()) {
+                SPDLOG_LOGGER_ERROR(modelmanager_logger, "Kokoro node name is missing in graph: {}. ", this->name);
+                return StatusCode::LLM_NODE_MISSING_NAME;
+            }
+            std::string nodeName = config.node(i).name();
+            if (kokoroServableMap.find(nodeName) != kokoroServableMap.end()) {
+                SPDLOG_LOGGER_ERROR(modelmanager_logger, "Kokoro node name: {} already used in graph: {}. ", nodeName, this->name);
+                return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS;
+            }
+            mediapipe::KokoroCalculatorOptions nodeOptions;
+            config.node(i).node_options(0).UnpackTo(&nodeOptions);
+            std::shared_ptr<KokoroServable> servable = std::make_shared<KokoroServable>(nodeOptions.models_path(), nodeOptions.target_device(), mgconfig.getBasePath());
+            kokoroServableMap.insert(std::pair<std::string, std::shared_ptr<KokoroServable>>(nodeName, std::move(servable)));
+            kokoroServablesCleaningGuard.disableCleaning();
+        }
     }
     return StatusCode::OK;
 }
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.hpp b/src/mediapipe_internal/mediapipegraphdefinition.hpp
index 14c9e0679f..1067ca7d42 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.hpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.hpp
@@ -48,6 +48,7 @@
 #include "../rerank/rerank_servable.hpp"
 #include "../audio/speech_to_text/s2t_servable.hpp"
 #include "../audio/text_to_speech/t2s_servable.hpp"
+#include "../audio/kokoro/kokoro_servable.hpp"
 
 namespace ovms {
 class MediapipeGraphDefinitionUnloadGuard;
@@ -66,6 +67,7 @@ using GenAiServableMap = std::unordered_map<std::string, std::shared_ptr<GenAiSe
 using RerankServableMap = std::unordered_map<std::string, std::shared_ptr<RerankServable>>;
 using SttServableMap = std::unordered_map<std::string, std::shared_ptr<SttServable>>;
 using TtsServableMap = std::unordered_map<std::string, std::shared_ptr<TtsServable>>;
+using KokoroServableMap = std::unordered_map<std::string, std::shared_ptr<KokoroServable>>;
 using EmbeddingsServableMap = std::unordered_map<std::string, std::shared_ptr<EmbeddingsServable>>;
 using ImageGenerationPipelinesMap = std::unordered_map<std::string, std::shared_ptr<ImageGenerationPipelines>>;
 
@@ -77,6 +79,7 @@ struct GraphSidePackets {
     RerankServableMap rerankServableMap;
     SttServableMap sttServableMap;
     TtsServableMap ttsServableMap;
+    KokoroServableMap kokoroServableMap;
     void clear() {
         pythonNodeResourcesMap.clear();
         genAiServableMap.clear();
@@ -85,6 +88,7 @@ struct GraphSidePackets {
         rerankServableMap.clear();
         sttServableMap.clear();
         ttsServableMap.clear();
+        kokoroServableMap.clear();
     }
     bool empty() {
         return (pythonNodeResourcesMap.empty() &&
@@ -93,7 +97,8 @@ struct GraphSidePackets {
                 embeddingsServableMap.empty() &&
                 rerankServableMap.empty() &&
                 sttServableMap.empty() &&
-                ttsServableMap.empty());
+                ttsServableMap.empty() &&
+                kokoroServableMap.empty());
     }
 };
 
@@ -136,6 +141,7 @@ class MediapipeGraphDefinition {
     static const std::string RERANK_NODE_CALCULATOR_NAME;
     static const std::string STT_NODE_CALCULATOR_NAME;
     static const std::string TTS_NODE_CALCULATOR_NAME;
+    static const std::string KOKORO_NODE_CALCULATOR_NAME;
     Status waitForLoaded(std::unique_ptr<MediapipeGraphDefinitionUnloadGuard>& unloadGuard, const uint32_t waitForLoadedTimeoutMicroseconds = WAIT_FOR_LOADED_DEFAULT_TIMEOUT_MICROSECONDS);
 
     // Pipelines are not versioned and any available definition has constant version equal 1.
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.cpp b/src/mediapipe_internal/mediapipegraphexecutor.cpp
index 93b53fdf8e..b2016ac3aa 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.cpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.cpp
@@ -49,6 +49,7 @@ MediapipeGraphExecutor::MediapipeGraphExecutor(
     const RerankServableMap& rerankServableMap,
     const SttServableMap& sttServableMap,
     const TtsServableMap& ttsServableMap,
+    const KokoroServableMap& kokoroServableMap,
     PythonBackend* pythonBackend,
     MediapipeServableMetricReporter* mediapipeServableMetricReporter) :
     name(name),
@@ -58,7 +59,7 @@ MediapipeGraphExecutor::MediapipeGraphExecutor(
     outputTypes(std::move(outputTypes)),
     inputNames(std::move(inputNames)),
     outputNames(std::move(outputNames)),
-    sidePacketMaps({pythonNodeResourcesMap, llmNodeResourcesMap, {}, embeddingsServableMap, rerankServableMap, sttServableMap, ttsServableMap}),
+    sidePacketMaps({pythonNodeResourcesMap, llmNodeResourcesMap, {}, embeddingsServableMap, rerankServableMap, sttServableMap, ttsServableMap, kokoroServableMap}),
     pythonBackend(pythonBackend),
     currentStreamTimestamp(STARTING_TIMESTAMP),
     mediapipeServableMetricReporter(mediapipeServableMetricReporter) {}
@@ -92,6 +93,7 @@ const std::string MediapipeGraphExecutor::EMBEDDINGS_SESSION_SIDE_PACKET_TAG = "
 const std::string MediapipeGraphExecutor::RERANK_SESSION_SIDE_PACKET_TAG = "rerank_servable";
 const std::string MediapipeGraphExecutor::STT_SESSION_SIDE_PACKET_TAG = "s2t_servable";
 const std::string MediapipeGraphExecutor::TTS_SESSION_SIDE_PACKET_TAG = "t2s_servable";
+const std::string MediapipeGraphExecutor::KOKORO_SESSION_SIDE_PACKET_TAG = "kokoro_servable";
 const ::mediapipe::Timestamp MediapipeGraphExecutor::STARTING_TIMESTAMP = ::mediapipe::Timestamp(0);
 
 }  // namespace ovms
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp
index c165469395..af2e8d08e6 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.hpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp
@@ -95,6 +95,7 @@ class MediapipeGraphExecutor {
     static const std::string RERANK_SESSION_SIDE_PACKET_TAG;
     static const std::string STT_SESSION_SIDE_PACKET_TAG;
     static const std::string TTS_SESSION_SIDE_PACKET_TAG;
+    static const std::string KOKORO_SESSION_SIDE_PACKET_TAG;
     static const ::mediapipe::Timestamp STARTING_TIMESTAMP;
 
     MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config,
@@ -107,6 +108,7 @@ class MediapipeGraphExecutor {
         const RerankServableMap& rerankServableMap,
         const SttServableMap& sttServableMap,
         const TtsServableMap& ttsServableMap,
+        const KokoroServableMap& kokoroServableMap,
         PythonBackend* pythonBackend,
         MediapipeServableMetricReporter* mediapipeServableMetricReporter);
     MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config,
@@ -157,6 +159,7 @@ class MediapipeGraphExecutor {
         inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<RerankServableMap>(this->sidePacketMaps.rerankServableMap).At(STARTING_TIMESTAMP);
         inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<SttServableMap>(this->sidePacketMaps.sttServableMap).At(STARTING_TIMESTAMP);
         inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<TtsServableMap>(this->sidePacketMaps.ttsServableMap).At(STARTING_TIMESTAMP);
+        inputSidePackets[KOKORO_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<KokoroServableMap>(this->sidePacketMaps.kokoroServableMap).At(STARTING_TIMESTAMP);
 
         MP_RETURN_ON_FAIL(graph.StartRun(inputSidePackets), std::string("start MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_START_ERROR);
 
diff --git a/third_party/espeak_ng/BUILD b/third_party/espeak_ng/BUILD
new file mode 100644
index 0000000000..31f51b73da
--- /dev/null
+++ b/third_party/espeak_ng/BUILD
@@ -0,0 +1,20 @@
+
+# third_party/espeak_ng/BUILD
+
+config_setting(
+    name = "is_macos",
+    values = {"cpu": "darwin"},
+)
+
+cc_library(
+    name = "espeak_ng",
+    linkopts = [
+        "-L/usr/local/lib",
+        "-lespeak-ng",
+    ],
+    includes = [
+        "/usr/local/include",
+        "/usr/local/include/espeak-ng",
+    ],
+    visibility = ["//visibility:public"],
+)