From aeb9e6350849ed45bb8166c9e9bb249bc74b9d30 Mon Sep 17 00:00:00 2001 From: Claire Schaefer Date: Thu, 15 May 2025 11:34:48 +0100 Subject: [PATCH 1/4] cli: fix some config options not taken into account when set in a config file `speaker_diarization_config` and `topic_detection_config` could be set individually from dedicated cli options but not from config file passed via --config option. --- CHANGELOG.md | 4 ++++ VERSION | 2 +- speechmatics/cli.py | 25 +++++++++---------------- 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e990e6..b2e643f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.0.5] - 2025-05-15 + +- cli: fix some config options not being set when defined in a config file: `topic_detection_config` and `speaker_diarization_config` + ## [3.0.4] - 2025-04-16 - Support for new parameters `prefer_current_speaker` and `speaker_sensitivity` in Speaker Diarization diff --git a/VERSION b/VERSION index b0f2dcb..eca690e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.0.4 +3.0.5 diff --git a/speechmatics/cli.py b/speechmatics/cli.py index 42aa7db..8178b37 100755 --- a/speechmatics/cli.py +++ b/speechmatics/cli.py @@ -36,8 +36,8 @@ ConnectionSettings, RTSpeakerDiarizationConfig, RTTranslationConfig, - ServerMessageType, SentimentAnalysisConfig, + ServerMessageType, SummarizationConfig, TopicDetectionConfig, TranscriptionConfig, @@ -271,8 +271,6 @@ def get_transcription_config( "max_delay_mode", "diarization", "channel_diarization_labels", - "speaker_diarization_sensitivity", - "speaker_diarization_max_speakers", ]: if args.get(option) is not None: config[option] = args[option] @@ -280,9 +278,6 @@ def get_transcription_config( "streaming_mode", "enable_partials", "enable_entities", - "enable_translation_partials", - "enable_transcription_partials", - "speaker_diarization_prefer_current_speaker", ]: config[option] = True if args.get(option) else config.get(option) @@ -355,17 +350,15 @@ def get_transcription_config( diarization_config = config.get("speaker_diarization_config", {}) if diarization_config or args.get("diarization") == "speaker": max_speakers = args.get( - "speaker_diarization_max_speakers", - diarization_config.get("speaker_diarization_max_speakers", None), - ) + "speaker_diarization_max_speakers" + ) or diarization_config.get("max_speakers", None) prefer_current_speaker = args.get( - "speaker_diarization_prefer_current_speaker", - diarization_config.get("speaker_diarization_prefer_current_speaker", None), - ) + "speaker_diarization_prefer_current_speaker" + ) or diarization_config.get("prefer_current_speaker", None) speaker_sensitivity = args.get( - "speaker_diarization_sensitivity", - diarization_config.get("speaker_diarization_sensitivity", None), - ) + "speaker_diarization_sensitivity" + ) or diarization_config.get("speaker_sensitivity", None) + if args["mode"] == "rt": config["speaker_diarization_config"] = RTSpeakerDiarizationConfig( max_speakers=max_speakers, @@ -434,7 +427,7 @@ def get_transcription_config( args_topic_detection = args.get("detect_topics") if args_topic_detection or file_topic_detection_config is not None: topic_detection_config = TopicDetectionConfig() - topics = args.get("topics", file_topic_detection_config.get("topics")) + topics = args.get("topics") or file_topic_detection_config.get("topics") if topics: topic_detection_config.topics = topics config["topic_detection_config"] = topic_detection_config From 61ced98db7786d06b752ef842b5738c6a4374442 Mon Sep 17 00:00:00 2001 From: Claire Schaefer Date: Thu, 15 May 2025 12:11:29 +0100 Subject: [PATCH 2/4] Fix linting following black version update --- asr_metrics/cli.py | 1 + asr_metrics/wer/__main__.py | 1 + asr_metrics/wer/normalizers/basic.py | 22 ++++++++++------- .../batch_transcription_client.py | 1 + examples/notification_flow/cronjob.py | 1 + .../notification_flow/notification_server.py | 1 + speechmatics/client.py | 24 +++++++++---------- 7 files changed, 30 insertions(+), 21 deletions(-) diff --git a/asr_metrics/cli.py b/asr_metrics/cli.py index 79a3e40..a4ea25f 100644 --- a/asr_metrics/cli.py +++ b/asr_metrics/cli.py @@ -1,4 +1,5 @@ """Entrypoint for SM metrics""" + import argparse import asr_metrics.diarization.sm_diarization_metrics.cookbook as diarization_metrics diff --git a/asr_metrics/wer/__main__.py b/asr_metrics/wer/__main__.py index 70ce5b0..3d220b7 100644 --- a/asr_metrics/wer/__main__.py +++ b/asr_metrics/wer/__main__.py @@ -2,6 +2,7 @@ Simple script to run WER analysis using Whisper normalisers Prints results to terminal """ + import difflib import json from pathlib import Path diff --git a/asr_metrics/wer/normalizers/basic.py b/asr_metrics/wer/normalizers/basic.py index 697daf3..c8fabb1 100644 --- a/asr_metrics/wer/normalizers/basic.py +++ b/asr_metrics/wer/normalizers/basic.py @@ -50,15 +50,19 @@ def remove_symbols_and_diacritics(self, s: str, keep=""): and drop any diacritics (category 'Mn' and some manual mappings) """ return "".join( - c - if c in keep - else self.additional_diacritics[c] - if c in self.additional_diacritics - else "" - if unicodedata.category(c) == "Mn" - else " " - if unicodedata.category(c)[0] in "MSP" - else c + ( + c + if c in keep + else ( + self.additional_diacritics[c] + if c in self.additional_diacritics + else ( + "" + if unicodedata.category(c) == "Mn" + else " " if unicodedata.category(c)[0] in "MSP" else c + ) + ) + ) for c in unicodedata.normalize("NFKD", s) ) diff --git a/examples/notification_flow/batch_transcription_client.py b/examples/notification_flow/batch_transcription_client.py index d9dc809..21d5a8b 100644 --- a/examples/notification_flow/batch_transcription_client.py +++ b/examples/notification_flow/batch_transcription_client.py @@ -1,6 +1,7 @@ """ Client module which calls the Speechmatics API """ + import sqlite3 from speechmatics.batch_client import BatchClient diff --git a/examples/notification_flow/cronjob.py b/examples/notification_flow/cronjob.py index dddff96..aabef27 100644 --- a/examples/notification_flow/cronjob.py +++ b/examples/notification_flow/cronjob.py @@ -3,6 +3,7 @@ Async fallback checking in case of webhook failure is a common pattern in production systems """ + import sqlite3 from speechmatics.batch_client import BatchClient diff --git a/examples/notification_flow/notification_server.py b/examples/notification_flow/notification_server.py index 2d57109..463f874 100644 --- a/examples/notification_flow/notification_server.py +++ b/examples/notification_flow/notification_server.py @@ -3,6 +3,7 @@ This is a dev example - DO NOT USE IN PRODUCTION """ + import json import sqlite3 diff --git a/speechmatics/client.py b/speechmatics/client.py index 842883b..f7fae26 100644 --- a/speechmatics/client.py +++ b/speechmatics/client.py @@ -140,13 +140,13 @@ def _set_recognition_config(self): "transcription_config": self.transcription_config.as_config(), } if self.transcription_config.translation_config is not None: - msg[ - "translation_config" - ] = self.transcription_config.translation_config.asdict() + msg["translation_config"] = ( + self.transcription_config.translation_config.asdict() + ) if self.transcription_config.audio_events_config is not None: - msg[ - "audio_events_config" - ] = self.transcription_config.audio_events_config.asdict() + msg["audio_events_config"] = ( + self.transcription_config.audio_events_config.asdict() + ) self._call_middleware(ClientMessageType.SetRecognitionConfig, msg, False) return msg @@ -168,13 +168,13 @@ def _start_recognition(self, audio_settings): "transcription_config": self.transcription_config.as_config(), } if self.transcription_config.translation_config is not None: - msg[ - "translation_config" - ] = self.transcription_config.translation_config.asdict() + msg["translation_config"] = ( + self.transcription_config.translation_config.asdict() + ) if self.transcription_config.audio_events_config is not None: - msg[ - "audio_events_config" - ] = self.transcription_config.audio_events_config.asdict() + msg["audio_events_config"] = ( + self.transcription_config.audio_events_config.asdict() + ) self.session_running = True self._call_middleware(ClientMessageType.StartRecognition, msg, False) LOGGER.debug(msg) From cf5be7ffbdaf60bbafa379a59a2451267f838953 Mon Sep 17 00:00:00 2001 From: Claire Schaefer Date: Fri, 16 May 2025 09:37:44 +0100 Subject: [PATCH 3/4] Add tests to check speaker diarization config from file is used --- tests/data/transcription_config.json | 6 ++++++ tests/test_cli.py | 17 +++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/tests/data/transcription_config.json b/tests/data/transcription_config.json index 8eb95df..bcaef84 100644 --- a/tests/data/transcription_config.json +++ b/tests/data/transcription_config.json @@ -2,6 +2,12 @@ "language": "xy", "domain": "fake", "enable_entities": true, + "diarization": "speaker", + "speaker_diarization_config": { + "prefer_current_speaker": true, + "max_speakers": 5, + "speaker_sensitivity": 0.3 + }, "translation_config": { "target_languages": ["es"], "enable_partials": false diff --git a/tests/test_cli.py b/tests/test_cli.py index 141d858..f9b1f70 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,8 +6,7 @@ import pytest import toml -from speechmatics import cli -from speechmatics import cli_parser +from speechmatics import cli, cli_parser from speechmatics.constants import ( BATCH_SELF_SERVICE_URL, RT_SELF_SERVICE_URL, @@ -771,6 +770,12 @@ def test_rt_main_with_config_file(mock_server): assert msg["transcription_config"]["domain"] == "fake" assert msg["transcription_config"]["enable_entities"] is True assert msg["transcription_config"].get("operating_point") is None + assert msg["transcription_config"]["diarization"] == "speaker" + assert msg["transcription_config"]["speaker_diarization_config"] == { + "prefer_current_speaker": True, + "max_speakers": 5, + "speaker_sensitivity": 0.3 + } assert msg["translation_config"] is not None assert msg["translation_config"]["enable_partials"] is False assert msg["translation_config"]["target_languages"] == ["es"] @@ -795,6 +800,8 @@ def test_rt_main_with_config_file_cmdline_override(mock_server): "--output-locale=en-US", "--domain=different", "--operating-point=enhanced", + "--speaker-diarization-max-speakers=3", + "--speaker-diarization-sensitivity=0.7", audio_path, ] @@ -816,6 +823,12 @@ def test_rt_main_with_config_file_cmdline_override(mock_server): assert msg["transcription_config"]["enable_entities"] is True assert msg["transcription_config"]["output_locale"] == "en-US" assert msg["transcription_config"]["operating_point"] == "enhanced" + assert msg["transcription_config"]["diarization"] == "speaker" + assert msg["transcription_config"]["speaker_diarization_config"] == { + "prefer_current_speaker": True, + "max_speakers": 3, + "speaker_sensitivity": 0.7 + } assert msg["translation_config"] is not None assert msg["translation_config"]["enable_partials"] is True assert msg["translation_config"]["target_languages"] == ["fr"] From 0853e8bee88de356eaa920744737b2dc0a4a4eb2 Mon Sep 17 00:00:00 2001 From: Claire Schaefer Date: Fri, 16 May 2025 09:40:06 +0100 Subject: [PATCH 4/4] lint for updated test --- tests/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index f9b1f70..7a19d59 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -774,7 +774,7 @@ def test_rt_main_with_config_file(mock_server): assert msg["transcription_config"]["speaker_diarization_config"] == { "prefer_current_speaker": True, "max_speakers": 5, - "speaker_sensitivity": 0.3 + "speaker_sensitivity": 0.3, } assert msg["translation_config"] is not None assert msg["translation_config"]["enable_partials"] is False @@ -827,7 +827,7 @@ def test_rt_main_with_config_file_cmdline_override(mock_server): assert msg["transcription_config"]["speaker_diarization_config"] == { "prefer_current_speaker": True, "max_speakers": 3, - "speaker_sensitivity": 0.7 + "speaker_sensitivity": 0.7, } assert msg["translation_config"] is not None assert msg["translation_config"]["enable_partials"] is True