From 5d6a7ab0e2dcd8191c1cb0d119a5dd1e3b5b12d2 Mon Sep 17 00:00:00 2001 From: altic-dev Date: Wed, 1 Apr 2026 21:20:50 -0700 Subject: [PATCH 1/4] Format lint change --- Sources/Fluid/Services/ParakeetRealtimeProvider.swift | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Sources/Fluid/Services/ParakeetRealtimeProvider.swift b/Sources/Fluid/Services/ParakeetRealtimeProvider.swift index c350caf..9ddefe0 100644 --- a/Sources/Fluid/Services/ParakeetRealtimeProvider.swift +++ b/Sources/Fluid/Services/ParakeetRealtimeProvider.swift @@ -137,10 +137,10 @@ final class ParakeetRealtimeProvider: TranscriptionProvider { private static func cacheRootDirectory() -> URL { let baseDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask).first - ?? FileManager.default.homeDirectoryForCurrentUser.appendingPathComponent( - "Library/Application Support", - isDirectory: true - ) + ?? FileManager.default.homeDirectoryForCurrentUser.appendingPathComponent( + "Library/Application Support", + isDirectory: true + ) return baseDirectory .appendingPathComponent("FluidAudio", isDirectory: true) From f4feb32b472658b9f95f9f41599727acba58cbab Mon Sep 17 00:00:00 2001 From: altic-dev Date: Thu, 2 Apr 2026 01:06:48 -0700 Subject: [PATCH 2/4] Harden Cohere artifact loading and language selection --- Package.resolved | 2 +- Sources/Fluid/Persistence/SettingsStore.swift | 66 +++++++++- .../ExternalCoreMLModelRegistry.swift | 44 ++++++- .../ExternalCoreMLTranscriptionProvider.swift | 121 +++++++++++++++++- .../VoiceEngineSettingsViewModel.swift | 2 +- .../UI/AISettingsView+SpeechRecognition.swift | 37 +++--- 6 files changed, 237 insertions(+), 35 deletions(-) diff --git a/Package.resolved b/Package.resolved index 5e0ed9a..2085790 100644 --- a/Package.resolved +++ b/Package.resolved @@ -24,7 +24,7 @@ "location" : "https://github.com/altic-dev/FluidAudio.git", "state" : { "branch" : "B/cohere-coreml-asr", - "revision" : "1502a6a8095bb1fc9831cc239d69a9a837d665a7" + "revision" : "ba6e4359fbb0d00b63e789354acc3f005641cfe4" } }, { diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index db68e0f..cfafda6 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2359,7 +2359,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDTv2: return "Parakeet TDT v2 (English Only)" case .parakeetRealtime: return "Parakeet Flash (Beta)" case .qwen3Asr: return "Qwen3 ASR (Beta)" - case .cohereTranscribeSixBit: return "Cohere Transcribe 6-bit" + case .cohereTranscribeSixBit: return "Cohere Transcribe" case .appleSpeech: return "Apple ASR Legacy" case .appleSpeechAnalyzer: return "Apple Speech - macOS 26+" case .whisperTiny: return "Whisper Tiny" @@ -2378,7 +2378,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDTv2: return "English Only (Higher Accuracy)" case .parakeetRealtime: return "English Only (Live Streaming)" case .qwen3Asr: return "30 Languages" - case .cohereTranscribeSixBit: return "14 Languages" + case .cohereTranscribeSixBit: return "14 Languages (Select Manually)" case .appleSpeech: return "System Languages" case .appleSpeechAnalyzer: return "EN, ES, FR, DE, IT, JA, KO, PT, ZH" case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge: @@ -2532,7 +2532,7 @@ final class SettingsStore: ObservableObject { case .qwen3Asr: return "Qwen3 multilingual ASR via FluidAudio. Higher quality, heavier memory footprint." case .cohereTranscribeSixBit: - return "High-accuracy multilingual transcription. Supports English, French, German, Italian, Spanish, Portuguese, Greek, Dutch, Polish, Mandarin, Japanese, Korean, Vietnamese, and Arabic." + return "High-accuracy multilingual transcription. Select the language manually before dictation for best results." case .appleSpeech: return "Built-in macOS speech recognition. No download required." case .appleSpeechAnalyzer: @@ -2999,6 +2999,7 @@ private extension SettingsStore { // Unified Speech Model (replaces above two) static let selectedSpeechModel = "SelectedSpeechModel" + static let selectedCohereLanguage = "SelectedCohereLanguage" static let externalCoreMLArtifactsDirectories = "ExternalCoreMLArtifactsDirectories" // Overlay Position @@ -3130,7 +3131,7 @@ extension SettingsStore.SpeechModel { case .parakeetRealtime: return "EN" case .cohereTranscribeSixBit: - return "14 Languages" + return "AR, DE, EL, EN, ES, FR, IT, JA, KO, NL, PL, PT, VI, ZH" case .appleSpeechAnalyzer: return "EN, ES, FR, DE, IT, JA, KO, PT, ZH" default: @@ -3144,6 +3145,8 @@ extension SettingsStore.SpeechModel { return """ Bulgarian, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Hungarian, Italian, Latvian, Lithuanian, Maltese, Polish, Portuguese, Romanian, Slovak, Slovenian, Spanish, Swedish, Russian, and Ukrainian """ + case .cohereTranscribeSixBit: + return "Arabic, German, Greek, English, Spanish, French, Italian, Japanese, Korean, Dutch, Polish, Portuguese, Vietnamese, and Mandarin Chinese" default: return nil } @@ -3151,6 +3154,46 @@ extension SettingsStore.SpeechModel { } extension SettingsStore { + enum CohereLanguage: String, CaseIterable, Identifiable, Codable { + case arabic = "ar" + case german = "de" + case greek = "el" + case english = "en" + case spanish = "es" + case french = "fr" + case italian = "it" + case japanese = "ja" + case korean = "ko" + case dutch = "nl" + case polish = "pl" + case portuguese = "pt" + case vietnamese = "vi" + case mandarinChinese = "zh" + + var id: String { self.rawValue } + + var displayName: String { + switch self { + case .arabic: return "Arabic" + case .german: return "German" + case .greek: return "Greek" + case .english: return "English" + case .spanish: return "Spanish" + case .french: return "French" + case .italian: return "Italian" + case .japanese: return "Japanese" + case .korean: return "Korean" + case .dutch: return "Dutch" + case .polish: return "Polish" + case .portuguese: return "Portuguese" + case .vietnamese: return "Vietnamese" + case .mandarinChinese: return "Mandarin Chinese" + } + } + + var tokenString: String { "<|\(self.rawValue)|>" } + } + // MARK: - Unified Speech Model Selection /// The selected speech recognition model. @@ -3187,6 +3230,21 @@ extension SettingsStore { } } + var selectedCohereLanguage: CohereLanguage { + get { + if let rawValue = self.defaults.string(forKey: Keys.selectedCohereLanguage), + let language = CohereLanguage(rawValue: rawValue) + { + return language + } + return .english + } + set { + objectWillChange.send() + self.defaults.set(newValue.rawValue, forKey: Keys.selectedCohereLanguage) + } + } + func externalCoreMLArtifactsDirectory(for model: SpeechModel) -> URL? { guard let spec = model.externalCoreMLSpec else { return nil } let paths = self.defaults.dictionary(forKey: Keys.externalCoreMLArtifactsDirectories) as? [String: String] ?? [:] diff --git a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift index bacd581..7126210 100644 --- a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift +++ b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift @@ -28,6 +28,10 @@ enum ExternalCoreMLArtifactsValidationError: LocalizedError { case manifestUnreadable(URL, Error) case unexpectedModelID(expected: String, actual: String) case unexpectedSampleRate(expected: Int, actual: Int) + case invalidMaxAudioSeconds(Double) + case invalidMaxAudioSamples(Int) + case inconsistentAudioWindow(samples: Int, seconds: Double, sampleRate: Int) + case invalidOverlapSamples(Int, maxAudioSamples: Int) var errorDescription: String? { switch self { @@ -41,6 +45,14 @@ enum ExternalCoreMLArtifactsValidationError: LocalizedError { return "Unexpected model_id '\(actual)'. Expected '\(expected)'." case let .unexpectedSampleRate(expected, actual): return "Unexpected sample rate \(actual). Expected \(expected)." + case let .invalidMaxAudioSeconds(seconds): + return "Invalid max_audio_seconds \(seconds)." + case let .invalidMaxAudioSamples(samples): + return "Invalid max_audio_samples \(samples)." + case let .inconsistentAudioWindow(samples, seconds, sampleRate): + return "Manifest audio window is inconsistent: \(samples) samples vs \(seconds)s at \(sampleRate) Hz." + case let .invalidOverlapSamples(overlapSamples, maxAudioSamples): + return "Invalid overlap_samples \(overlapSamples) for max_audio_samples \(maxAudioSamples)." } } } @@ -61,6 +73,8 @@ struct ExternalCoreMLASRModelSpec { let repositoryOwner: String? let repositoryName: String? let repositoryRevision: String + let artifactBundleVersion: String + private let maximumAudioWindowSeconds: Double = 60 var requiredEntries: [String] { [ @@ -129,6 +143,33 @@ struct ExternalCoreMLASRModelSpec { actual: manifest.sampleRate ) } + + guard manifest.maxAudioSeconds > 0, manifest.maxAudioSeconds <= self.maximumAudioWindowSeconds else { + throw ExternalCoreMLArtifactsValidationError.invalidMaxAudioSeconds(manifest.maxAudioSeconds) + } + + let maximumAudioSamples = Int((Double(self.expectedSampleRate) * self.maximumAudioWindowSeconds).rounded()) + guard manifest.maxAudioSamples > 0, manifest.maxAudioSamples <= maximumAudioSamples else { + throw ExternalCoreMLArtifactsValidationError.invalidMaxAudioSamples(manifest.maxAudioSamples) + } + + let expectedSamples = Int((manifest.maxAudioSeconds * Double(manifest.sampleRate)).rounded()) + guard abs(expectedSamples - manifest.maxAudioSamples) <= 1 else { + throw ExternalCoreMLArtifactsValidationError.inconsistentAudioWindow( + samples: manifest.maxAudioSamples, + seconds: manifest.maxAudioSeconds, + sampleRate: manifest.sampleRate + ) + } + + if let overlapSamples = manifest.overlapSamples { + guard overlapSamples >= 0, overlapSamples < manifest.maxAudioSamples else { + throw ExternalCoreMLArtifactsValidationError.invalidOverlapSamples( + overlapSamples, + maxAudioSamples: manifest.maxAudioSamples + ) + } + } } } @@ -151,7 +192,8 @@ enum ExternalCoreMLModelRegistry { sourceURL: URL(string: "https://huggingface.co/BarathwajAnandan/cohere-transcribe-03-2026-CoreML-6bit"), repositoryOwner: "BarathwajAnandan", repositoryName: "cohere-transcribe-03-2026-CoreML-6bit", - repositoryRevision: "main" + repositoryRevision: "main", + artifactBundleVersion: "2026-04-02-cohere-refresh-1" ) default: return nil diff --git a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift index 34a12df..b66391d 100644 --- a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift +++ b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift @@ -15,6 +15,8 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { private var cohereManager: CohereTranscribeAsrManager? private let modelOverride: SettingsStore.SpeechModel? private var loadedManifest: ExternalCoreMLManifestIdentity? + private var coherePromptTemplate: [Int] = [] + private var cohereLanguageTokenIDs: [SettingsStore.CohereLanguage: Int] = [:] init(modelOverride: SettingsStore.SpeechModel? = nil) { self.modelOverride = modelOverride @@ -53,6 +55,7 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { progressHandler?(0.85) self.loadedManifest = try spec.loadManifest(at: directory) + try self.loadCoherePromptConfigurationIfNeeded(at: directory, backend: spec.backend) switch spec.backend { case .cohereTranscribe: @@ -100,7 +103,11 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { throw Self.makeError("External CoreML model is not initialized.") } - let text = try await manager.transcribe(audioSamples: self.paddedSamplesToModelLimit(previewSamples)) + let promptIDs = self.coherePromptIDsForCurrentLanguage() + let text = try await manager.transcribe( + audioSamples: self.paddedSamplesToModelLimit(previewSamples), + promptIDs: promptIDs.isEmpty ? nil : promptIDs + ) return ASRTranscriptionResult(text: text, confidence: 1.0) } @@ -118,7 +125,11 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { "ExternalCoreML: native file transcription start [file=\(fileURL.lastPathComponent)]", source: "ExternalCoreML" ) - let text = try await manager.transcribe(audioFileAt: fileURL) + let promptIDs = self.coherePromptIDsForCurrentLanguage() + let text = try await manager.transcribe( + audioFileAt: fileURL, + promptIDs: promptIDs.isEmpty ? nil : promptIDs + ) let elapsed = Date().timeIntervalSince(startedAt) DebugLogger.shared.info( "ExternalCoreML: native file transcription finished in \(String(format: "%.2f", elapsed))s [chars=\(text.count)]", @@ -142,7 +153,8 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { "ExternalCoreML: transcribing \(samples.count) samples [audioSeconds=\(String(format: "%.2f", audioSeconds))]", source: "ExternalCoreML" ) - let text = try await self.transcribeByManifestWindow(samples, manager: manager) + let promptIDs = self.coherePromptIDsForCurrentLanguage() + let text = try await self.transcribeByManifestWindow(samples, manager: manager, promptIDs: promptIDs) let elapsed = Date().timeIntervalSince(startedAt) let rtf = audioSeconds > 0 ? elapsed / audioSeconds : 0 DebugLogger.shared.info( @@ -198,6 +210,8 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { self.isReady = false self.cohereManager = nil self.loadedManifest = nil + self.coherePromptTemplate = [] + self.cohereLanguageTokenIDs = [:] DebugLogger.shared.info( "ExternalCoreML: provider reset after cache clear", source: "ExternalCoreML" @@ -211,6 +225,25 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { progressHandler: ((Double) -> Void)? ) async throws { try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true) + let isManagedDirectory = Self.isAppManagedArtifactsDirectory(directory, spec: spec) + + if spec.validateArtifacts(at: directory) { + if isManagedDirectory, self.artifactBundleStampMatches(spec: spec, directory: directory) == false { + DebugLogger.shared.warning( + "ExternalCoreML: refreshing managed artifacts for \(directory.lastPathComponent) due to outdated bundle stamp", + source: "ExternalCoreML" + ) + try FileManager.default.removeItem(at: directory) + try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true) + } else { + DebugLogger.shared.info( + "ExternalCoreML: artifact validation passed for \(directory.lastPathComponent)", + source: "ExternalCoreML" + ) + progressHandler?(0.8) + return + } + } if spec.validateArtifacts(at: directory) { DebugLogger.shared.info( @@ -250,6 +283,9 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { throw Self.makeError(error.localizedDescription) } + if isManagedDirectory { + self.persistArtifactBundleStamp(spec: spec, directory: directory) + } SettingsStore.shared.setExternalCoreMLArtifactsDirectory(directory, for: model) } @@ -276,6 +312,26 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { ) } + private func artifactBundleStampMatches(spec: ExternalCoreMLASRModelSpec, directory: URL) -> Bool { + let stampURL = Self.artifactBundleStampURL(for: directory) + guard + let currentStamp = try? String(contentsOf: stampURL, encoding: .utf8) + .trimmingCharacters(in: .whitespacesAndNewlines) + else { + return false + } + return currentStamp == spec.artifactBundleVersion + } + + private func persistArtifactBundleStamp(spec: ExternalCoreMLASRModelSpec, directory: URL) { + let stampURL = Self.artifactBundleStampURL(for: directory) + try? spec.artifactBundleVersion.write(to: stampURL, atomically: true, encoding: .utf8) + } + + private static func artifactBundleStampURL(for directory: URL) -> URL { + directory.appendingPathComponent(".fluid_artifact_bundle_version", isDirectory: false) + } + private func invalidateCompiledCohereCacheIfNeeded(at directory: URL) throws { guard let manifest = self.loadedManifest else { return } @@ -318,6 +374,46 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { ].joined(separator: "|") } + private func loadCoherePromptConfigurationIfNeeded(at directory: URL, backend: ExternalCoreMLASRBackend) throws { + guard backend == .cohereTranscribe else { return } + + let manifestURL = directory.appendingPathComponent("coreml_manifest.json", isDirectory: false) + let data = try Data(contentsOf: manifestURL) + guard + let rawManifest = try JSONSerialization.jsonObject(with: data) as? [String: Any], + let rawPromptIDs = rawManifest["prompt_ids"] as? [Any], + let idToToken = rawManifest["id_to_token"] as? [String] + else { + return + } + let promptIDs = rawPromptIDs.compactMap { ($0 as? NSNumber)?.intValue } + guard promptIDs.count == rawPromptIDs.count else { return } + + let tokenToID = Dictionary(uniqueKeysWithValues: idToToken.enumerated().map { ($0.element, $0.offset) }) + self.coherePromptTemplate = promptIDs + self.cohereLanguageTokenIDs = Dictionary( + uniqueKeysWithValues: SettingsStore.CohereLanguage.allCases.compactMap { language in + tokenToID[language.tokenString].map { (language, $0) } + } + ) + } + + private func coherePromptIDsForCurrentLanguage() -> [Int] { + let promptTemplate = self.coherePromptTemplate + guard promptTemplate.isEmpty == false else { return [] } + + let languageTokenIDs = self.cohereLanguageTokenIDs + guard languageTokenIDs.isEmpty == false else { return promptTemplate } + + let targetLanguage = SettingsStore.shared.selectedCohereLanguage + guard let targetTokenID = languageTokenIDs[targetLanguage] else { return promptTemplate } + + let supportedTokenIDs = Set(languageTokenIDs.values) + return promptTemplate.map { tokenID in + supportedTokenIDs.contains(tokenID) ? targetTokenID : tokenID + } + } + private func previewSamples(for samples: [Float]) -> [Float] { let sampleRate = self.loadedManifest?.sampleRate ?? (self.modelOverride ?? SettingsStore.shared.selectedSpeechModel).externalCoreMLSpec?.expectedSampleRate @@ -329,15 +425,23 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { private func transcribeByManifestWindow( _ samples: [Float], - manager: CohereTranscribeAsrManager + manager: CohereTranscribeAsrManager, + promptIDs: [Int] ) async throws -> String { + let runtimePromptIDs = promptIDs.isEmpty ? nil : promptIDs let maxAudioSamples = self.loadedManifest?.maxAudioSamples ?? 0 guard maxAudioSamples > 0 else { - return try await manager.transcribe(audioSamples: samples) + return try await manager.transcribe( + audioSamples: samples, + promptIDs: runtimePromptIDs + ) } if samples.count <= maxAudioSamples { - return try await manager.transcribe(audioSamples: self.paddedSamplesToModelLimit(samples)) + return try await manager.transcribe( + audioSamples: self.paddedSamplesToModelLimit(samples), + promptIDs: runtimePromptIDs + ) } let overlapSamples = min(self.loadedManifest?.overlapSamples ?? 0, maxAudioSamples / 2) @@ -348,7 +452,10 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { while startIndex < samples.count { let endIndex = min(startIndex + maxAudioSamples, samples.count) let chunk = Array(samples[startIndex.. Date: Thu, 2 Apr 2026 01:23:12 -0700 Subject: [PATCH 3/4] Update workspace FluidAudio package pin --- .../project.xcworkspace/xcshareddata/swiftpm/Package.resolved | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index 89dec41..ae75139 100644 --- a/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -34,7 +34,7 @@ "location" : "https://github.com/altic-dev/FluidAudio.git", "state" : { "branch" : "B/cohere-coreml-asr", - "revision" : "1502a6a8095bb1fc9831cc239d69a9a837d665a7" + "revision" : "ba6e4359fbb0d00b63e789354acc3f005641cfe4" } }, { From c2c57787061975a364f2f4b7f228c43fe235a16b Mon Sep 17 00:00:00 2001 From: altic-dev Date: Thu, 2 Apr 2026 01:30:53 -0700 Subject: [PATCH 4/4] Bump app version to 1.5.11-beta.3 --- Info.plist | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Info.plist b/Info.plist index fe22e63..9148484 100644 --- a/Info.plist +++ b/Info.plist @@ -13,9 +13,9 @@ CFBundleIdentifier $(PRODUCT_BUNDLE_IDENTIFIER) CFBundleVersion - 9 + 10 CFBundleShortVersionString - 1.5.11-beta.2 + 1.5.11-beta.3 LSMinimumSystemVersion $(MACOSX_DEPLOYMENT_TARGET) LSApplicationCategoryType