diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e990e6..b2e643f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.0.5] - 2025-05-15 + +- cli: fix some config options not being set when defined in a config file: `topic_detection_config` and `speaker_diarization_config` + ## [3.0.4] - 2025-04-16 - Support for new parameters `prefer_current_speaker` and `speaker_sensitivity` in Speaker Diarization diff --git a/VERSION b/VERSION index b0f2dcb..eca690e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.0.4 +3.0.5 diff --git a/asr_metrics/cli.py b/asr_metrics/cli.py index 79a3e40..a4ea25f 100644 --- a/asr_metrics/cli.py +++ b/asr_metrics/cli.py @@ -1,4 +1,5 @@ """Entrypoint for SM metrics""" + import argparse import asr_metrics.diarization.sm_diarization_metrics.cookbook as diarization_metrics diff --git a/asr_metrics/wer/__main__.py b/asr_metrics/wer/__main__.py index 70ce5b0..3d220b7 100644 --- a/asr_metrics/wer/__main__.py +++ b/asr_metrics/wer/__main__.py @@ -2,6 +2,7 @@ Simple script to run WER analysis using Whisper normalisers Prints results to terminal """ + import difflib import json from pathlib import Path diff --git a/asr_metrics/wer/normalizers/basic.py b/asr_metrics/wer/normalizers/basic.py index 697daf3..c8fabb1 100644 --- a/asr_metrics/wer/normalizers/basic.py +++ b/asr_metrics/wer/normalizers/basic.py @@ -50,15 +50,19 @@ def remove_symbols_and_diacritics(self, s: str, keep=""): and drop any diacritics (category 'Mn' and some manual mappings) """ return "".join( - c - if c in keep - else self.additional_diacritics[c] - if c in self.additional_diacritics - else "" - if unicodedata.category(c) == "Mn" - else " " - if unicodedata.category(c)[0] in "MSP" - else c + ( + c + if c in keep + else ( + self.additional_diacritics[c] + if c in self.additional_diacritics + else ( + "" + if unicodedata.category(c) == "Mn" + else " " if unicodedata.category(c)[0] in "MSP" else c + ) + ) + ) for c in unicodedata.normalize("NFKD", s) ) diff --git a/examples/notification_flow/batch_transcription_client.py b/examples/notification_flow/batch_transcription_client.py index d9dc809..21d5a8b 100644 --- a/examples/notification_flow/batch_transcription_client.py +++ b/examples/notification_flow/batch_transcription_client.py @@ -1,6 +1,7 @@ """ Client module which calls the Speechmatics API """ + import sqlite3 from speechmatics.batch_client import BatchClient diff --git a/examples/notification_flow/cronjob.py b/examples/notification_flow/cronjob.py index dddff96..aabef27 100644 --- a/examples/notification_flow/cronjob.py +++ b/examples/notification_flow/cronjob.py @@ -3,6 +3,7 @@ Async fallback checking in case of webhook failure is a common pattern in production systems """ + import sqlite3 from speechmatics.batch_client import BatchClient diff --git a/examples/notification_flow/notification_server.py b/examples/notification_flow/notification_server.py index 2d57109..463f874 100644 --- a/examples/notification_flow/notification_server.py +++ b/examples/notification_flow/notification_server.py @@ -3,6 +3,7 @@ This is a dev example - DO NOT USE IN PRODUCTION """ + import json import sqlite3 diff --git a/speechmatics/cli.py b/speechmatics/cli.py index 42aa7db..8178b37 100755 --- a/speechmatics/cli.py +++ b/speechmatics/cli.py @@ -36,8 +36,8 @@ ConnectionSettings, RTSpeakerDiarizationConfig, RTTranslationConfig, - ServerMessageType, SentimentAnalysisConfig, + ServerMessageType, SummarizationConfig, TopicDetectionConfig, TranscriptionConfig, @@ -271,8 +271,6 @@ def get_transcription_config( "max_delay_mode", "diarization", "channel_diarization_labels", - "speaker_diarization_sensitivity", - "speaker_diarization_max_speakers", ]: if args.get(option) is not None: config[option] = args[option] @@ -280,9 +278,6 @@ def get_transcription_config( "streaming_mode", "enable_partials", "enable_entities", - "enable_translation_partials", - "enable_transcription_partials", - "speaker_diarization_prefer_current_speaker", ]: config[option] = True if args.get(option) else config.get(option) @@ -355,17 +350,15 @@ def get_transcription_config( diarization_config = config.get("speaker_diarization_config", {}) if diarization_config or args.get("diarization") == "speaker": max_speakers = args.get( - "speaker_diarization_max_speakers", - diarization_config.get("speaker_diarization_max_speakers", None), - ) + "speaker_diarization_max_speakers" + ) or diarization_config.get("max_speakers", None) prefer_current_speaker = args.get( - "speaker_diarization_prefer_current_speaker", - diarization_config.get("speaker_diarization_prefer_current_speaker", None), - ) + "speaker_diarization_prefer_current_speaker" + ) or diarization_config.get("prefer_current_speaker", None) speaker_sensitivity = args.get( - "speaker_diarization_sensitivity", - diarization_config.get("speaker_diarization_sensitivity", None), - ) + "speaker_diarization_sensitivity" + ) or diarization_config.get("speaker_sensitivity", None) + if args["mode"] == "rt": config["speaker_diarization_config"] = RTSpeakerDiarizationConfig( max_speakers=max_speakers, @@ -434,7 +427,7 @@ def get_transcription_config( args_topic_detection = args.get("detect_topics") if args_topic_detection or file_topic_detection_config is not None: topic_detection_config = TopicDetectionConfig() - topics = args.get("topics", file_topic_detection_config.get("topics")) + topics = args.get("topics") or file_topic_detection_config.get("topics") if topics: topic_detection_config.topics = topics config["topic_detection_config"] = topic_detection_config diff --git a/speechmatics/client.py b/speechmatics/client.py index 842883b..f7fae26 100644 --- a/speechmatics/client.py +++ b/speechmatics/client.py @@ -140,13 +140,13 @@ def _set_recognition_config(self): "transcription_config": self.transcription_config.as_config(), } if self.transcription_config.translation_config is not None: - msg[ - "translation_config" - ] = self.transcription_config.translation_config.asdict() + msg["translation_config"] = ( + self.transcription_config.translation_config.asdict() + ) if self.transcription_config.audio_events_config is not None: - msg[ - "audio_events_config" - ] = self.transcription_config.audio_events_config.asdict() + msg["audio_events_config"] = ( + self.transcription_config.audio_events_config.asdict() + ) self._call_middleware(ClientMessageType.SetRecognitionConfig, msg, False) return msg @@ -168,13 +168,13 @@ def _start_recognition(self, audio_settings): "transcription_config": self.transcription_config.as_config(), } if self.transcription_config.translation_config is not None: - msg[ - "translation_config" - ] = self.transcription_config.translation_config.asdict() + msg["translation_config"] = ( + self.transcription_config.translation_config.asdict() + ) if self.transcription_config.audio_events_config is not None: - msg[ - "audio_events_config" - ] = self.transcription_config.audio_events_config.asdict() + msg["audio_events_config"] = ( + self.transcription_config.audio_events_config.asdict() + ) self.session_running = True self._call_middleware(ClientMessageType.StartRecognition, msg, False) LOGGER.debug(msg) diff --git a/tests/data/transcription_config.json b/tests/data/transcription_config.json index 8eb95df..bcaef84 100644 --- a/tests/data/transcription_config.json +++ b/tests/data/transcription_config.json @@ -2,6 +2,12 @@ "language": "xy", "domain": "fake", "enable_entities": true, + "diarization": "speaker", + "speaker_diarization_config": { + "prefer_current_speaker": true, + "max_speakers": 5, + "speaker_sensitivity": 0.3 + }, "translation_config": { "target_languages": ["es"], "enable_partials": false diff --git a/tests/test_cli.py b/tests/test_cli.py index 141d858..7a19d59 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,8 +6,7 @@ import pytest import toml -from speechmatics import cli -from speechmatics import cli_parser +from speechmatics import cli, cli_parser from speechmatics.constants import ( BATCH_SELF_SERVICE_URL, RT_SELF_SERVICE_URL, @@ -771,6 +770,12 @@ def test_rt_main_with_config_file(mock_server): assert msg["transcription_config"]["domain"] == "fake" assert msg["transcription_config"]["enable_entities"] is True assert msg["transcription_config"].get("operating_point") is None + assert msg["transcription_config"]["diarization"] == "speaker" + assert msg["transcription_config"]["speaker_diarization_config"] == { + "prefer_current_speaker": True, + "max_speakers": 5, + "speaker_sensitivity": 0.3, + } assert msg["translation_config"] is not None assert msg["translation_config"]["enable_partials"] is False assert msg["translation_config"]["target_languages"] == ["es"] @@ -795,6 +800,8 @@ def test_rt_main_with_config_file_cmdline_override(mock_server): "--output-locale=en-US", "--domain=different", "--operating-point=enhanced", + "--speaker-diarization-max-speakers=3", + "--speaker-diarization-sensitivity=0.7", audio_path, ] @@ -816,6 +823,12 @@ def test_rt_main_with_config_file_cmdline_override(mock_server): assert msg["transcription_config"]["enable_entities"] is True assert msg["transcription_config"]["output_locale"] == "en-US" assert msg["transcription_config"]["operating_point"] == "enhanced" + assert msg["transcription_config"]["diarization"] == "speaker" + assert msg["transcription_config"]["speaker_diarization_config"] == { + "prefer_current_speaker": True, + "max_speakers": 3, + "speaker_sensitivity": 0.7, + } assert msg["translation_config"] is not None assert msg["translation_config"]["enable_partials"] is True assert msg["translation_config"]["target_languages"] == ["fr"]