Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,12 @@ The `tts` method takes the following arguments:
- `voice_engine`: The voice engine to use for the TTS request; a string (default `Play3.0-mini-http`).
- `PlayDialog`: Our large, expressive English model, which also supports multi-turn two-speaker dialogues.
- `PlayDialogMultilingual`: Our large, expressive multilingual model, which also supports multi-turn two-speaker dialogues.
- `PlayDialogArabic`: Our large, expressive Arabic model, which also supports multi-turn two-speaker dialogues.
- `Play3.0-mini`: Our small, fast multilingual model.
- `PlayHT2.0-turbo`: Our legacy English-only model
- `protocol`: The protocol to use to communicate with the Play API (`http` by default except for `PlayHT2.0-turbo` which is `grpc` by default).
- `http`: Streaming and non-streaming audio over HTTP (supports `Play3.0-mini`, `PlayDialog`, and `PlayDialogMultilingual`).
- `ws`: Streaming audio over WebSockets (supports `Play3.0-mini`, `PlayDialog`, and `PlayDialogMultilingual`).
- `http`: Streaming and non-streaming audio over HTTP (supports `Play3.0-mini` and `PlayDialog*`).
- `ws`: Streaming audio over WebSockets (supports `Play3.0-mini` and `PlayDialog*`).
- `grpc`: Streaming audio over gRPC (supports `PlayHT2.0-turbo` for all, and `Play3.0-mini` ONLY for Play On-Prem customers).
- `streaming`: Whether or not to stream the audio in chunks (default True); non-streaming is only enabled for HTTP endpoints.

Expand Down Expand Up @@ -157,7 +158,7 @@ The `TTSOptions` class is used to specify the options for the TTS request. It ha
- `UKRAINIAN`
- `URDU`
- `XHOSA`
- The following options are additional inference-time hyperparameters which only apply to the `PlayDialog` and `PlayDialogMultilingual` models; if unset, the model will use default values chosen by Play.
- The following options are additional inference-time hyperparameters which only apply to the `PlayDialog*` models; if unset, the model will use default values chosen by Play.
- `voice_2` (multi-turn dialogue only): The second voice to use for a multi-turn TTS request; a string.
- A URL pointing to a Play voice manifest file.
- `turn_prefix` (multi-turn dialogue only): The prefix for the first speaker's turns in a multi-turn TTS request; a string.
Expand Down
4 changes: 2 additions & 2 deletions pyht/async_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ async def _tts_http(
streaming: bool = True,
context: Optional[AsyncContext] = None
) -> AsyncIterable[bytes]:
supported_voice_engines = ["Play3.0-mini", "PlayDialog", "PlayDialogMultilingual"]
supported_voice_engines = ["Play3.0-mini", "PlayDialog", "PlayDialogMultilingual", "PlayDialogArabic"]
if voice_engine not in supported_voice_engines:
raise ValueError(f"Only {supported_voice_engines} are supported in the HTTP API; got {voice_engine}")

Expand Down Expand Up @@ -436,7 +436,7 @@ async def _tts_ws(
metrics: Metrics,
context: Optional[AsyncContext] = None
) -> AsyncIterable[bytes]:
supported_voice_engines = ["Play3.0-mini", "PlayDialog", "PlayDialogMultilingual"]
supported_voice_engines = ["Play3.0-mini", "PlayDialog", "PlayDialogMultilingual", "PlayDialogArabic"]
if voice_engine not in supported_voice_engines:
raise ValueError(f"Only {supported_voice_engines} are supported in the WebSocket API; got {voice_engine}")

Expand Down
10 changes: 5 additions & 5 deletions pyht/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class HTTPFormat(Enum):
FORMAT_PCM = "pcm"


# PlayDialog and PlayDialogMultilingual only
# PlayDialog* only
class CandidateRankingMethod(Enum):
# non-streaming only
DescriptionASRWithMeanProbRank = "description_asr_with_mean_prob"
Expand Down Expand Up @@ -199,7 +199,7 @@ class TTSOptions:
# only applies to Play3.0 and PlayDialogMultilingual
language: Optional[Language] = None

# only apply to PlayDialog and PlayDialogMultilingual
# only apply to PlayDialog*
# leave the _2 params None if generating single-speaker audio
voice_2: Optional[str] = None
turn_prefix: Optional[str] = None
Expand Down Expand Up @@ -293,7 +293,7 @@ def http_prepare_dict(text: List[str], options: TTSOptions, voice_engine: str) -
"language": options.language.value if options.language is not None else None,
"version": version,

# PlayDialog and PlayDialogMultilingual
# PlayDialog*
# leave the _2 params None if generating single-speaker audio
"voice_2": options.voice_2,
"turn_prefix": options.turn_prefix,
Expand Down Expand Up @@ -640,7 +640,7 @@ def _tts_http(
metrics: Metrics,
streaming: bool = True
) -> Iterable[bytes]:
supported_voice_engines = ["Play3.0-mini", "PlayDialog", "PlayDialogMultilingual"]
supported_voice_engines = ["Play3.0-mini", "PlayDialog", "PlayDialogMultilingual", "PlayDialogArabic"]
if voice_engine not in supported_voice_engines:
raise ValueError(f"Only {supported_voice_engines} are supported in the HTTP API; got {voice_engine}")

Expand Down Expand Up @@ -705,7 +705,7 @@ def _tts_ws(
voice_engine: Optional[str],
metrics: Metrics
) -> Iterable[bytes]:
supported_voice_engines = ["Play3.0-mini", "PlayDialog", "PlayDialogMultilingual"]
supported_voice_engines = ["Play3.0-mini", "PlayDialog", "PlayDialogMultilingual", "PlayDialogArabic"]
if voice_engine not in supported_voice_engines:
raise ValueError(f"Only {supported_voice_engines} are supported in the WebSocket API; got {voice_engine}")

Expand Down
2 changes: 1 addition & 1 deletion pyht/inference_coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import aiohttp

REQUIRED_MODELS = ["Play3.0-mini", "PlayDialog", "PlayDialogMultilingual"]
REQUIRED_MODELS = ["Play3.0-mini", "PlayDialog", "PlayDialogMultilingual", "PlayDialogArabic"]
REQUIRED_URLS = ["http_streaming_url", "websocket_url"]


Expand Down
18 changes: 15 additions & 3 deletions pyht/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,9 @@ def get_voice_engine_and_protocol(voice_engine: Optional[str], protocol: Optiona
voice_engine, protocol = _convert_deprecated_voice_engine(voice_engine, protocol)

elif voice_engine in ["PlayDialog", "PlayDialog-http", "PlayDialog-ws", "PlayDialogMultilingual",
"PlayDialogMultilingual-http", "PlayDialogMultilingual-ws"]:
if voice_engine in ["PlayDialog", "PlayDialogMultilingual"]:
"PlayDialogMultilingual-http", "PlayDialogMultilingual-ws", "PlayDialogArabic",
"PlayDialogArabic-http", "PlayDialogArabic-ws"]:
if voice_engine in ["PlayDialog", "PlayDialogMultilingual", "PlayDialogArabic"]:
if not protocol:
logging.warning("No protocol specified; using http")
protocol = "http"
Expand All @@ -81,7 +82,7 @@ def get_voice_engine_and_protocol(voice_engine: Optional[str], protocol: Optiona

else:
raise ValueError(f"Invalid voice engine: {voice_engine} (must be Play3.0-mini, PlayDialog, \
PlayDialogMultilingual, or PlayHT2.0-turbo).")
PlayDialogMultilingual, PlayDialogArabic, or PlayHT2.0-turbo).")

return voice_engine, protocol

Expand Down Expand Up @@ -146,6 +147,17 @@ def main():
assert get_voice_engine_and_protocol("PlayDialogMultilingual-ws", None) == ("PlayDialogMultilingual", "ws")
assert get_voice_engine_and_protocol("PlayDialogMultilingual-ws", "") == ("PlayDialogMultilingual", "ws")

assert get_voice_engine_and_protocol("PlayDialogArabic", "http") == ("PlayDialogArabic", "http")
assert get_voice_engine_and_protocol("PlayDialogArabic", "ws") == ("PlayDialogArabic", "ws")
assert get_voice_engine_and_protocol("PlayDialogArabic", None) == ("PlayDialogArabic", "http")
assert get_voice_engine_and_protocol("PlayDialogArabic", "") == ("PlayDialogArabic", "http")
assert get_voice_engine_and_protocol("PlayDialogArabic-http", "http") == ("PlayDialogArabic", "http")
assert get_voice_engine_and_protocol("PlayDialogArabic-http", None) == ("PlayDialogArabic", "http")
assert get_voice_engine_and_protocol("PlayDialogArabic-http", "") == ("PlayDialogArabic", "http")
assert get_voice_engine_and_protocol("PlayDialogArabic-ws", "ws") == ("PlayDialogArabic", "ws")
assert get_voice_engine_and_protocol("PlayDialogArabic-ws", None) == ("PlayDialogArabic", "ws")
assert get_voice_engine_and_protocol("PlayDialogArabic-ws", "") == ("PlayDialogArabic", "ws")

assert get_voice_engine_and_protocol(None, "grpc") == ("PlayHT2.0-turbo", "grpc")
assert get_voice_engine_and_protocol("", "grpc") == ("PlayHT2.0-turbo", "grpc")
assert get_voice_engine_and_protocol("PlayHT2.0-turbo", "grpc") == ("PlayHT2.0-turbo", "grpc")
Expand Down