From 25c62997041c134b03ca82731ce2f6fd2cae1eb9 Mon Sep 17 00:00:00 2001 From: Andrey Leonov Date: Fri, 1 May 2026 16:11:40 -0400 Subject: [PATCH 1/2] Release v1.0.0 --- Sources/ArgmaxCLI/ArgmaxCLI.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/ArgmaxCLI/ArgmaxCLI.swift b/Sources/ArgmaxCLI/ArgmaxCLI.swift index ddd2f08e..775f1209 100644 --- a/Sources/ArgmaxCLI/ArgmaxCLI.swift +++ b/Sources/ArgmaxCLI/ArgmaxCLI.swift @@ -4,7 +4,7 @@ import ArgumentParser import Foundation -let VERSION: String = "development" +let VERSION: String = "v1.0.0" var subcommands: [ParsableCommand.Type] { #if BUILD_SERVER_CLI From a18d861c9b406897fa04f4cb6f577ef9369af6fd Mon Sep 17 00:00:00 2001 From: yemreak Date: Tue, 19 May 2026 02:57:50 +0300 Subject: [PATCH 2/2] WhisperKit: lock audio sample/energy buffers in AudioProcessor `AudioProcessor.audioSamples` and `audioEnergy` are written from the AVAudioEngine tap thread and read from arbitrary threads (VAD polling on main, transcription on a background queue). Under Swift 6 Strict Concurrency the unsynchronised access is flagged; in practice it also produces sporadic data races detectable with TSan. Introduce an `audioLock` (NSLock) and expose `audioSamples` / `audioEnergy` through locked getters and setters. `processBuffer` holds the lock only across the shared-state mutation and releases it before invoking `audioBufferCallback` to avoid potential re-entrant deadlocks. Public surface and semantics are unchanged. --- .../Core/Audio/AudioProcessor.swift | 62 +++++++++++++------ 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/Sources/WhisperKit/Core/Audio/AudioProcessor.swift b/Sources/WhisperKit/Core/Audio/AudioProcessor.swift index c1074160..5c4a9061 100644 --- a/Sources/WhisperKit/Core/Audio/AudioProcessor.swift +++ b/Sources/WhisperKit/Core/Audio/AudioProcessor.swift @@ -204,11 +204,27 @@ public extension AudioProcessing { open class AudioProcessor: NSObject, AudioProcessing { private var lastInputDevice: DeviceID? public var audioEngine: AVAudioEngine? - public var audioSamples: ContiguousArray = [] - public var audioEnergy: [(rel: Float, avg: Float, max: Float, min: Float)] = [] + + /// Lock protecting `_audioSamples` and `_audioEnergy` from concurrent access. + /// These properties are written from the audio tap callback thread and read + /// from arbitrary threads (e.g. main thread for VAD polling). + private let audioLock = NSLock() + private var _audioSamples: ContiguousArray = [] + private var _audioEnergy: [(rel: Float, avg: Float, max: Float, min: Float)] = [] + + public var audioSamples: ContiguousArray { + get { audioLock.withLock { _audioSamples } } + set { audioLock.withLock { _audioSamples = newValue } } + } + + public var audioEnergy: [(rel: Float, avg: Float, max: Float, min: Float)] { + get { audioLock.withLock { _audioEnergy } } + set { audioLock.withLock { _audioEnergy = newValue } } + } + public var relativeEnergyWindow: Int = 20 public var relativeEnergy: [Float] { - return self.audioEnergy.map { $0.rel } + return audioLock.withLock { _audioEnergy.map { $0.rel } } } public var audioBufferCallback: (([Float]) -> Void)? @@ -905,23 +921,29 @@ public extension AudioProcessor { /// We have a new buffer, process and store it. /// NOTE: Assumes audio is 16khz mono func processBuffer(_ buffer: [Float]) { - audioSamples.append(contentsOf: buffer) + // Calculate energy values outside the lock (pure computation) + let signalEnergy = Self.calculateEnergy(of: buffer) - // Find the lowest average energy of the last 20 buffers ~2 seconds - let minAvgEnergy = self.audioEnergy.suffix(20).reduce(Float.infinity) { min($0, $1.avg) } - let relativeEnergy = Self.calculateRelativeEnergy(of: buffer, relativeTo: minAvgEnergy) + // Hold the lock only for reading/writing shared state + let (newEnergy, sampleCount) = audioLock.withLock { () -> ((rel: Float, avg: Float, max: Float, min: Float), Int) in + _audioSamples.append(contentsOf: buffer) - // Update energy for buffers with valid data - let signalEnergy = Self.calculateEnergy(of: buffer) - let newEnergy = (relativeEnergy, signalEnergy.avg, signalEnergy.max, signalEnergy.min) - self.audioEnergy.append(newEnergy) + // Find the lowest average energy of the last 20 buffers ~2 seconds + let minAvgEnergy = _audioEnergy.suffix(20).reduce(Float.infinity) { min($0, $1.avg) } + let relativeEnergy = Self.calculateRelativeEnergy(of: buffer, relativeTo: minAvgEnergy) + + let energy = (relativeEnergy, signalEnergy.avg, signalEnergy.max, signalEnergy.min) + _audioEnergy.append(energy) + + return (energy, _audioSamples.count) + } - // Call the callback with the new buffer + // Call the callback outside the lock to avoid potential deadlocks audioBufferCallback?(buffer) // Print the current size of the audio buffer - if self.audioSamples.count % (minBufferLength * Int(relativeEnergyWindow)) == 0 { - Logging.debug("Current audio size: \(self.audioSamples.count) samples, most recent buffer: \(buffer.count) samples, most recent energy: \(newEnergy)") + if sampleCount % (minBufferLength * Int(relativeEnergyWindow)) == 0 { + Logging.debug("Current audio size: \(sampleCount) samples, most recent buffer: \(buffer.count) samples, most recent energy: \(newEnergy)") } } @@ -1022,14 +1044,18 @@ public extension AudioProcessor { } func purgeAudioSamples(keepingLast keep: Int) { - if audioSamples.count > keep { - audioSamples.removeFirst(audioSamples.count - keep) + audioLock.withLock { + if _audioSamples.count > keep { + _audioSamples.removeFirst(_audioSamples.count - keep) + } } } func startRecordingLive(inputDeviceID: DeviceID? = nil, callback: (([Float]) -> Void)? = nil) throws { - audioSamples = [] - audioEnergy = [] + audioLock.withLock { + _audioSamples = [] + _audioEnergy = [] + } try? setupAudioSessionForDevice()