From 9be2c05b01b18940ae1b4a20b3d0b22e36526598 Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sat, 28 Mar 2026 00:51:30 -0700 Subject: [PATCH 01/22] Add Cohere CoreML provider integration --- Sources/Fluid/Persistence/SettingsStore.swift | 72 +++++- Sources/Fluid/Services/ASRService.swift | 31 ++- .../ExternalCoreMLModelRegistry.swift | 154 +++++++++++++ .../ExternalCoreMLTranscriptionProvider.swift | 212 ++++++++++++++++++ .../MeetingTranscriptionService.swift | 37 +++ .../Services/TranscriptionProvider.swift | 17 ++ .../VoiceEngineSettingsViewModel.swift | 90 +++++++- .../UI/AISettingsView+SpeechRecognition.swift | 69 +++++- Sources/Fluid/UI/AISettingsView.swift | 1 + 9 files changed, 662 insertions(+), 21 deletions(-) create mode 100644 Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift create mode 100644 Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index bc0f1a6e..491596e0 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2332,6 +2332,7 @@ final class SettingsStore: ObservableObject { case parakeetTDT = "parakeet-tdt" case parakeetTDTv2 = "parakeet-tdt-v2" case qwen3Asr = "qwen3-asr" + case cohereTranscribeSixBit = "cohere-transcribe-6bit" // MARK: - Apple Native @@ -2356,6 +2357,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDT: return "Parakeet TDT v3 (Multilingual)" case .parakeetTDTv2: return "Parakeet TDT v2 (English Only)" case .qwen3Asr: return "Qwen3 ASR (Beta)" + case .cohereTranscribeSixBit: return "Cohere Transcribe 6-bit" case .appleSpeech: return "Apple ASR Legacy" case .appleSpeechAnalyzer: return "Apple Speech - macOS 26+" case .whisperTiny: return "Whisper Tiny" @@ -2373,6 +2375,7 @@ final class SettingsStore: ObservableObject { return "25 European Languages" case .parakeetTDTv2: return "English Only (Higher Accuracy)" case .qwen3Asr: return "30 Languages" + case .cohereTranscribeSixBit: return "14 Languages" case .appleSpeech: return "System Languages" case .appleSpeechAnalyzer: return "EN, ES, FR, DE, IT, JA, KO, PT, ZH" case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge: @@ -2385,6 +2388,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDT: return "~500 MB" case .parakeetTDTv2: return "~500 MB" case .qwen3Asr: return "~2.0 GB" + case .cohereTranscribeSixBit: return "~1.4 GB (manual)" case .appleSpeech: return "Built-in (Zero Download)" case .appleSpeechAnalyzer: return "Built-in" case .whisperTiny: return "~75 MB" @@ -2398,14 +2402,14 @@ final class SettingsStore: ObservableObject { var requiresAppleSilicon: Bool { switch self { - case .parakeetTDT, .parakeetTDTv2, .qwen3Asr: return true + case .parakeetTDT, .parakeetTDTv2, .qwen3Asr, .cohereTranscribeSixBit: return true default: return false } } var isWhisperModel: Bool { switch self { - case .parakeetTDT, .parakeetTDTv2, .qwen3Asr, .appleSpeech, .appleSpeechAnalyzer: return false + case .parakeetTDT, .parakeetTDTv2, .qwen3Asr, .cohereTranscribeSixBit, .appleSpeech, .appleSpeechAnalyzer: return false default: return true } } @@ -2449,7 +2453,7 @@ final class SettingsStore: ObservableObject { /// Requires macOS 15 or later. var requiresMacOS15: Bool { switch self { - case .qwen3Asr: return true + case .qwen3Asr, .cohereTranscribeSixBit: return true default: return false } } @@ -2496,6 +2500,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDT: return "Blazing Fast - Multilingual" case .parakeetTDTv2: return "Blazing Fast - English" case .qwen3Asr: return "Qwen3 - Multilingual" + case .cohereTranscribeSixBit: return "Cohere - High Accuracy" case .appleSpeech: return "Apple ASR Legacy" case .appleSpeechAnalyzer: return "Apple Speech - macOS 26+" case .whisperTiny: return "Fast & Light" @@ -2516,6 +2521,8 @@ final class SettingsStore: ObservableObject { return "Optimized for English accuracy and fastest transcription." case .qwen3Asr: return "Qwen3 multilingual ASR via FluidAudio. Higher quality, heavier memory footprint." + case .cohereTranscribeSixBit: + return "External CoreML pipeline with strong accuracy. Load it from a local artifacts folder." case .appleSpeech: return "Built-in macOS speech recognition. No download required." case .appleSpeechAnalyzer: @@ -2542,6 +2549,8 @@ final class SettingsStore: ObservableObject { return 4.0 case .qwen3Asr: return 8.0 + case .cohereTranscribeSixBit: + return 8.0 case .appleSpeech, .appleSpeechAnalyzer: return 2.0 // Built-in, minimal overhead case .whisperTiny: @@ -2564,6 +2573,8 @@ final class SettingsStore: ObservableObject { switch self { case .qwen3Asr: return "⚠️ Requires 8GB+ RAM. Best on newer Apple Silicon Macs." + case .cohereTranscribeSixBit: + return "⚠️ Requires local CoreML artifacts and 8GB+ RAM. Best on newer Apple Silicon Macs." case .whisperLarge: return "⚠️ Requires 10GB+ RAM. May crash on systems with limited memory." case .whisperLargeTurbo: @@ -2581,6 +2592,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDT: return 5 case .parakeetTDTv2: return 5 case .qwen3Asr: return 3 + case .cohereTranscribeSixBit: return 3 case .appleSpeech: return 4 case .appleSpeechAnalyzer: return 4 case .whisperTiny: return 4 @@ -2598,6 +2610,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDT: return 5 case .parakeetTDTv2: return 5 case .qwen3Asr: return 4 + case .cohereTranscribeSixBit: return 5 case .appleSpeech: return 4 case .appleSpeechAnalyzer: return 4 case .whisperTiny: return 2 @@ -2615,6 +2628,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDT: return 1.0 case .parakeetTDTv2: return 1.0 case .qwen3Asr: return 0.45 + case .cohereTranscribeSixBit: return 0.50 case .appleSpeech: return 0.60 case .appleSpeechAnalyzer: return 0.85 case .whisperTiny: return 0.90 @@ -2632,6 +2646,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDT: return 0.95 case .parakeetTDTv2: return 0.98 case .qwen3Asr: return 0.90 + case .cohereTranscribeSixBit: return 0.96 case .appleSpeech: return 0.60 case .appleSpeechAnalyzer: return 0.80 case .whisperTiny: return 0.40 @@ -2649,6 +2664,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDT: return "FluidVoice Pick" case .parakeetTDTv2: return "FluidVoice Pick" case .qwen3Asr: return "Beta" + case .cohereTranscribeSixBit: return "Manual Import" case .appleSpeechAnalyzer: return "New" default: return nil } @@ -2657,7 +2673,7 @@ final class SettingsStore: ObservableObject { /// Optimization level for Apple Silicon (for display) var appleSiliconOptimized: Bool { switch self { - case .parakeetTDT, .parakeetTDTv2, .qwen3Asr, .appleSpeechAnalyzer: + case .parakeetTDT, .parakeetTDTv2, .qwen3Asr, .cohereTranscribeSixBit, .appleSpeechAnalyzer: return true default: return false @@ -2668,7 +2684,7 @@ final class SettingsStore: ObservableObject { /// Large Whisper models are too slow for streaming, so they only do final transcription on stop. var supportsStreaming: Bool { switch self { - case .qwen3Asr, .whisperMedium, .whisperLargeTurbo, .whisperLarge: + case .qwen3Asr, .cohereTranscribeSixBit, .whisperMedium, .whisperLargeTurbo, .whisperLarge: return false // Too slow for real-time chunk processing default: return true // All other models support streaming @@ -2681,6 +2697,7 @@ final class SettingsStore: ObservableObject { case apple = "Apple" case openai = "OpenAI" case qwen = "Qwen" + case cohere = "Cohere" } /// Which provider this model belongs to @@ -2692,6 +2709,8 @@ final class SettingsStore: ObservableObject { return .apple case .qwen3Asr: return .qwen + case .cohereTranscribeSixBit: + return .cohere case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge: return .openai } @@ -2722,6 +2741,14 @@ final class SettingsStore: ObservableObject { #else return false #endif + case .cohereTranscribeSixBit: + guard + let spec = self.externalCoreMLSpec, + let directory = SettingsStore.shared.externalCoreMLArtifactsDirectory(for: self) + else { + return false + } + return spec.validateArtifacts(at: directory) default: // Whisper models guard let whisperFile = self.whisperModelFile else { return false } @@ -2751,6 +2778,8 @@ final class SettingsStore: ObservableObject { return "NVIDIA" case .qwen3Asr: return "Qwen" + case .cohereTranscribeSixBit: + return "Cohere" case .appleSpeech, .appleSpeechAnalyzer: return "Apple" case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge: @@ -2773,6 +2802,8 @@ final class SettingsStore: ObservableObject { return "#76B900" case .qwen3Asr: return "#E67E22" + case .cohereTranscribeSixBit: + return "#FA6B3C" case .appleSpeech, .appleSpeechAnalyzer: return "#A2AAAD" // Apple Gray case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge: @@ -2927,6 +2958,7 @@ private extension SettingsStore { // Unified Speech Model (replaces above two) static let selectedSpeechModel = "SelectedSpeechModel" + static let externalCoreMLArtifactsDirectories = "ExternalCoreMLArtifactsDirectories" // Overlay Position static let overlayPosition = "OverlayPosition" @@ -3054,6 +3086,8 @@ extension SettingsStore.SpeechModel { switch self { case .parakeetTDT: return "BG, HR, CS, DA, NL, EN, ET, FI, FR, DE, EL, HU, IT, LV, LT, MT, PL, PT, RO, SK, SL, ES, SV, RU, UK" + case .cohereTranscribeSixBit: + return "14 Languages" case .appleSpeechAnalyzer: return "EN, ES, FR, DE, IT, JA, KO, PT, ZH" default: @@ -3110,6 +3144,34 @@ extension SettingsStore { } } + func externalCoreMLArtifactsDirectory(for model: SpeechModel) -> URL? { + guard let spec = model.externalCoreMLSpec else { return nil } + let paths = self.defaults.dictionary(forKey: Keys.externalCoreMLArtifactsDirectories) as? [String: String] ?? [:] + if let storedPath = paths[model.rawValue], storedPath.isEmpty == false { + return URL(fileURLWithPath: storedPath, isDirectory: true) + } + + let cachesDirectory = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first + let fallback = cachesDirectory?.appendingPathComponent(spec.artifactFolderHint, isDirectory: true) + guard let fallback else { return nil } + if FileManager.default.fileExists(atPath: fallback.path) { + return fallback + } + return nil + } + + func setExternalCoreMLArtifactsDirectory(_ directory: URL?, for model: SpeechModel) { + guard model.requiresExternalArtifacts else { return } + objectWillChange.send() + var paths = self.defaults.dictionary(forKey: Keys.externalCoreMLArtifactsDirectories) as? [String: String] ?? [:] + if let directory { + paths[model.rawValue] = directory.standardizedFileURL.path + } else { + paths.removeValue(forKey: model.rawValue) + } + self.defaults.set(paths, forKey: Keys.externalCoreMLArtifactsDirectories) + } + /// Migrates old TranscriptionProviderOption + WhisperModelSize settings to new SpeechModel private func migrateToSpeechModel() -> SpeechModel { let oldProvider = self.defaults.string(forKey: Keys.selectedTranscriptionProvider) ?? "auto" diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index c695651e..e00e906e 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -126,17 +126,19 @@ final class ASRService: ObservableObject { /// Returns a user-friendly status message for model loading state var modelStatusMessage: String { + let usesExternalArtifacts = SettingsStore.shared.selectedSpeechModel.requiresExternalArtifacts if self.isAsrReady { return "Model ready" } - if self.isDownloadingModel { return "Downloading model..." } + if self.isDownloadingModel { return usesExternalArtifacts ? "Importing model..." : "Downloading model..." } if self.isLoadingModel { return "Loading model into memory..." } - if self.modelsExistOnDisk { return "Model cached, needs loading" } - return "Model not downloaded" + if self.modelsExistOnDisk { return usesExternalArtifacts ? "Model imported, needs loading" : "Model cached, needs loading" } + return usesExternalArtifacts ? "Model not imported" : "Model not downloaded" } // MARK: - Transcription Provider (Settable) /// Cached providers to avoid re-instantiation private var fluidAudioProvider: FluidAudioProvider? + private var externalCoreMLProvider: ExternalCoreMLTranscriptionProvider? private var whisperProvider: WhisperProvider? private var appleSpeechProvider: AppleSpeechProvider? /// Stored as Any? because @available cannot be applied to stored properties @@ -164,6 +166,8 @@ final class ASRService: ObservableObject { return self.getAppleSpeechProvider() case .parakeetTDT, .parakeetTDTv2: return self.getFluidAudioProvider() + case .cohereTranscribeSixBit: + return self.getExternalCoreMLProvider() case .qwen3Asr: DebugLogger.shared.warning( "ASRService: Qwen provider removed; falling back to FluidAudio Parakeet path", @@ -190,6 +194,16 @@ final class ASRService: ObservableObject { return provider } + private func getExternalCoreMLProvider() -> ExternalCoreMLTranscriptionProvider { + if let existing = externalCoreMLProvider { + return existing + } + let provider = ExternalCoreMLTranscriptionProvider() + self.externalCoreMLProvider = provider + DebugLogger.shared.info("ASRService: Created external CoreML provider", source: "ASRService") + return provider + } + private func getWhisperProvider() -> WhisperProvider { if let existing = whisperProvider { return existing @@ -248,6 +262,8 @@ final class ASRService: ObservableObject { // Create a new provider configured for the specific model let provider = FluidAudioProvider(modelOverride: model, configureWordBoosting: false) return provider + case .cohereTranscribeSixBit: + return ExternalCoreMLTranscriptionProvider(modelOverride: model) case .qwen3Asr: // Qwen support removed; route legacy requests to Parakeet v3. return FluidAudioProvider(modelOverride: .parakeetTDT, configureWordBoosting: false) @@ -323,6 +339,7 @@ final class ASRService: ObservableObject { // Reset cached providers to force re-initialization with new settings self.fluidAudioProvider = nil + self.externalCoreMLProvider = nil self.whisperProvider = nil self.appleSpeechProvider = nil self._appleSpeechAnalyzerProvider = nil @@ -440,7 +457,7 @@ final class ASRService: ObservableObject { @MainActor private func handleParakeetVocabularyDidChange() { let model = SettingsStore.shared.selectedSpeechModel - guard model == .parakeetTDT || model == .parakeetTDTv2 else { return } + guard model.supportsCustomVocabulary else { return } guard self.isRunning == false else { self.hasPendingParakeetVocabularyReload = true DebugLogger.shared.info( @@ -459,7 +476,7 @@ final class ASRService: ObservableObject { self.hasPendingParakeetVocabularyReload = false let model = SettingsStore.shared.selectedSpeechModel - guard model == .parakeetTDT || model == .parakeetTDTv2 else { return } + guard model.supportsCustomVocabulary else { return } DebugLogger.shared.info( "ASRService: Applying queued vocabulary reload after recording stopped.", @@ -470,7 +487,7 @@ final class ASRService: ObservableObject { private func refreshWordBoostStatus() { let model = SettingsStore.shared.selectedSpeechModel - guard model == .parakeetTDT || model == .parakeetTDTv2, + guard model.supportsCustomVocabulary, let provider = self.fluidAudioProvider, provider.isReady else { @@ -492,7 +509,7 @@ final class ASRService: ObservableObject { private func recordWordBoostHitIfAny(transcribedText: String) { let model = SettingsStore.shared.selectedSpeechModel - guard model == .parakeetTDT || model == .parakeetTDTv2, + guard model.supportsCustomVocabulary, let provider = self.fluidAudioProvider, provider.isWordBoostingActive else { return } diff --git a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift new file mode 100644 index 00000000..3d39b527 --- /dev/null +++ b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift @@ -0,0 +1,154 @@ +import CoreML +import Foundation + +enum ExternalCoreMLASRBackend { + case cohereTranscribe +} + +struct ExternalCoreMLManifestIdentity: Decodable { + let modelID: String + let sampleRate: Int + + private enum CodingKeys: String, CodingKey { + case modelID = "model_id" + case sampleRate = "sample_rate" + } +} + +enum ExternalCoreMLArtifactsValidationError: LocalizedError { + case missingEntries([String]) + case manifestMissing(URL) + case manifestUnreadable(URL, Error) + case unexpectedModelID(expected: String, actual: String) + case unexpectedSampleRate(expected: Int, actual: Int) + + var errorDescription: String? { + switch self { + case .missingEntries(let entries): + return "Missing required files: \(entries.joined(separator: ", "))" + case .manifestMissing(let url): + return "Manifest file not found at \(url.path)" + case .manifestUnreadable(let url, let error): + return "Failed to read manifest at \(url.path): \(error.localizedDescription)" + case .unexpectedModelID(let expected, let actual): + return "Unexpected model_id '\(actual)'. Expected '\(expected)'." + case .unexpectedSampleRate(let expected, let actual): + return "Unexpected sample rate \(actual). Expected \(expected)." + } + } +} + +struct ExternalCoreMLASRModelSpec { + let backend: ExternalCoreMLASRBackend + let artifactFolderHint: String + let manifestFileName: String + let frontendFileName: String + let encoderFileName: String + let decoderFileName: String + let cachedDecoderFileName: String + let expectedModelID: String + let expectedSampleRate: Int + let computeUnits: MLComputeUnits + let sourceURL: URL? + + var requiredEntries: [String] { + [ + self.manifestFileName, + self.frontendFileName, + self.encoderFileName, + self.decoderFileName, + self.cachedDecoderFileName, + ] + } + + func url(for entry: String, in directory: URL) -> URL { + directory.appendingPathComponent(entry, isDirectory: entry.hasSuffix(".mlpackage")) + } + + func validateArtifacts(at directory: URL) -> Bool { + (try? self.validateArtifactsOrThrow(at: directory)) != nil + } + + func missingEntries(at directory: URL) -> [String] { + self.requiredEntries.filter { entry in + let url = self.url(for: entry, in: directory) + return FileManager.default.fileExists(atPath: url.path) == false + } + } + + func validateArtifactsOrThrow(at directory: URL) throws { + let missingEntries = self.missingEntries(at: directory) + guard missingEntries.isEmpty else { + throw ExternalCoreMLArtifactsValidationError.missingEntries(missingEntries) + } + + let manifestURL = self.url(for: self.manifestFileName, in: directory) + guard FileManager.default.fileExists(atPath: manifestURL.path) else { + throw ExternalCoreMLArtifactsValidationError.manifestMissing(manifestURL) + } + + let manifest: ExternalCoreMLManifestIdentity + do { + let data = try Data(contentsOf: manifestURL) + manifest = try JSONDecoder().decode(ExternalCoreMLManifestIdentity.self, from: data) + } catch { + throw ExternalCoreMLArtifactsValidationError.manifestUnreadable(manifestURL, error) + } + + guard manifest.modelID == self.expectedModelID else { + throw ExternalCoreMLArtifactsValidationError.unexpectedModelID( + expected: self.expectedModelID, + actual: manifest.modelID + ) + } + + guard manifest.sampleRate == self.expectedSampleRate else { + throw ExternalCoreMLArtifactsValidationError.unexpectedSampleRate( + expected: self.expectedSampleRate, + actual: manifest.sampleRate + ) + } + } +} + +enum ExternalCoreMLModelRegistry { + static func spec(for model: SettingsStore.SpeechModel) -> ExternalCoreMLASRModelSpec? { + switch model { + case .cohereTranscribeSixBit: + return ExternalCoreMLASRModelSpec( + backend: .cohereTranscribe, + artifactFolderHint: "cohere-transcribe-03-2026-CoreML-6bit", + manifestFileName: "coreml_manifest.json", + frontendFileName: "cohere_frontend.mlpackage", + encoderFileName: "cohere_encoder.mlpackage", + decoderFileName: "cohere_decoder_fullseq_masked.mlpackage", + cachedDecoderFileName: "cohere_decoder_cached.mlpackage", + expectedModelID: "CohereLabs/cohere-transcribe-03-2026", + expectedSampleRate: 16000, + computeUnits: .cpuAndGPU, + sourceURL: URL(string: "https://huggingface.co/BarathwajAnandan/cohere-transcribe-03-2026-CoreML-6bit") + ) + default: + return nil + } + } +} + +extension SettingsStore.SpeechModel { + var externalCoreMLSpec: ExternalCoreMLASRModelSpec? { + ExternalCoreMLModelRegistry.spec(for: self) + } + + var requiresExternalArtifacts: Bool { + self.externalCoreMLSpec != nil + } + + var supportsCustomVocabulary: Bool { + switch self { + case .parakeetTDT, .parakeetTDTv2: + return true + default: + return false + } + } +} diff --git a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift new file mode 100644 index 00000000..3fed2ceb --- /dev/null +++ b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift @@ -0,0 +1,212 @@ +import Foundation + +#if arch(arm64) +import FluidAudio + +@available(macOS 15.0, *) +final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { + let name = "External CoreML" + + var isAvailable: Bool { true } + private(set) var isReady: Bool = false + var prefersNativeFileTranscription: Bool { true } + + private var cohereManager: CohereTranscribeAsrManager? + private let modelOverride: SettingsStore.SpeechModel? + + init(modelOverride: SettingsStore.SpeechModel? = nil) { + self.modelOverride = modelOverride + } + + func prepare(progressHandler: ((Double) -> Void)? = nil) async throws { + guard self.isReady == false else { return } + + let model = self.modelOverride ?? SettingsStore.shared.selectedSpeechModel + DebugLogger.shared.info( + "ExternalCoreML: prepare requested for model=\(model.rawValue)", + source: "ExternalCoreML" + ) + guard let spec = model.externalCoreMLSpec else { + DebugLogger.shared.error( + "ExternalCoreML: missing spec for model=\(model.rawValue)", + source: "ExternalCoreML" + ) + throw Self.makeError("No external CoreML spec registered for \(model.displayName).") + } + guard let directory = SettingsStore.shared.externalCoreMLArtifactsDirectory(for: model) else { + DebugLogger.shared.error( + "ExternalCoreML: no artifacts directory configured for model=\(model.rawValue)", + source: "ExternalCoreML" + ) + throw Self.makeError("Select the \(model.displayName) artifacts folder before loading the model.") + } + + DebugLogger.shared.info( + "ExternalCoreML: validating artifacts at \(directory.path)", + source: "ExternalCoreML" + ) + + do { + try spec.validateArtifactsOrThrow(at: directory) + DebugLogger.shared.info( + "ExternalCoreML: artifact validation passed for \(directory.lastPathComponent)", + source: "ExternalCoreML" + ) + } catch { + DebugLogger.shared.error( + "ExternalCoreML: artifact validation failed: \(error.localizedDescription)", + source: "ExternalCoreML" + ) + throw Self.makeError(error.localizedDescription) + } + + progressHandler?(0.1) + + switch spec.backend { + case .cohereTranscribe: + let manager = CohereTranscribeAsrManager() + progressHandler?(0.35) + DebugLogger.shared.info( + "ExternalCoreML: loading Cohere models [computeUnits=\(String(describing: spec.computeUnits))]", + source: "ExternalCoreML" + ) + try await manager.loadModels(from: directory, computeUnits: spec.computeUnits) + self.cohereManager = manager + } + + self.isReady = true + DebugLogger.shared.info( + "ExternalCoreML: provider ready for model=\(model.rawValue)", + source: "ExternalCoreML" + ) + progressHandler?(1.0) + } + + func transcribe(_ samples: [Float]) async throws -> ASRTranscriptionResult { + try await self.transcribeFinal(samples) + } + + func transcribeFile(at fileURL: URL) async throws -> ASRTranscriptionResult { + guard let manager = self.cohereManager else { + DebugLogger.shared.error( + "ExternalCoreML: file transcription requested before manager initialization", + source: "ExternalCoreML" + ) + throw Self.makeError("External CoreML model is not initialized.") + } + + let startedAt = Date() + DebugLogger.shared.info( + "ExternalCoreML: native file transcription start [file=\(fileURL.lastPathComponent)]", + source: "ExternalCoreML" + ) + let text = try await manager.transcribe(audioFileAt: fileURL) + let elapsed = Date().timeIntervalSince(startedAt) + DebugLogger.shared.info( + "ExternalCoreML: native file transcription finished in \(String(format: "%.2f", elapsed))s [chars=\(text.count)]", + source: "ExternalCoreML" + ) + return ASRTranscriptionResult(text: text, confidence: 1.0) + } + + func transcribeFinal(_ samples: [Float]) async throws -> ASRTranscriptionResult { + guard let manager = self.cohereManager else { + DebugLogger.shared.error( + "ExternalCoreML: transcribe requested before manager initialization", + source: "ExternalCoreML" + ) + throw Self.makeError("External CoreML model is not initialized.") + } + let startedAt = Date() + let sampleRate = Double((self.modelOverride ?? SettingsStore.shared.selectedSpeechModel).externalCoreMLSpec?.expectedSampleRate ?? 16_000) + let audioSeconds = sampleRate > 0 ? Double(samples.count) / sampleRate : 0 + DebugLogger.shared.debug( + "ExternalCoreML: transcribing \(samples.count) samples [audioSeconds=\(String(format: "%.2f", audioSeconds))]", + source: "ExternalCoreML" + ) + let text = try await manager.transcribe(audioSamples: samples) + let elapsed = Date().timeIntervalSince(startedAt) + let rtf = audioSeconds > 0 ? elapsed / audioSeconds : 0 + DebugLogger.shared.info( + "ExternalCoreML: transcription finished in \(String(format: "%.2f", elapsed))s [audioSeconds=\(String(format: "%.2f", audioSeconds)), rtf=\(String(format: "%.2fx", rtf)), chars=\(text.count)]", + source: "ExternalCoreML" + ) + return ASRTranscriptionResult(text: text, confidence: 1.0) + } + + func modelsExistOnDisk() -> Bool { + let model = self.modelOverride ?? SettingsStore.shared.selectedSpeechModel + guard + let spec = model.externalCoreMLSpec, + let directory = SettingsStore.shared.externalCoreMLArtifactsDirectory(for: model) + else { + return false + } + return spec.validateArtifacts(at: directory) + } + + func clearCache() async throws { + let model = self.modelOverride ?? SettingsStore.shared.selectedSpeechModel + guard + model.externalCoreMLSpec != nil, + let directory = SettingsStore.shared.externalCoreMLArtifactsDirectory(for: model) + else { + self.isReady = false + self.cohereManager = nil + return + } + + let compiledDirectory = CohereTranscribeAsrModels.compiledArtifactsDirectory(for: directory) + + if FileManager.default.fileExists(atPath: compiledDirectory.path) { + DebugLogger.shared.info( + "ExternalCoreML: clearing compiled cache at \(compiledDirectory.path)", + source: "ExternalCoreML" + ) + try FileManager.default.removeItem(at: compiledDirectory) + } + + self.isReady = false + self.cohereManager = nil + DebugLogger.shared.info( + "ExternalCoreML: provider reset after cache clear", + source: "ExternalCoreML" + ) + } + + private static func makeError(_ description: String) -> NSError { + NSError( + domain: "ExternalCoreMLTranscriptionProvider", + code: -1, + userInfo: [NSLocalizedDescriptionKey: description] + ) + } +} + +#else + +final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { + let name = "External CoreML" + let isAvailable = false + let isReady = false + + init(modelOverride: SettingsStore.SpeechModel? = nil) {} + + func prepare(progressHandler: ((Double) -> Void)? = nil) async throws { + throw NSError( + domain: "ExternalCoreMLTranscriptionProvider", + code: -1, + userInfo: [NSLocalizedDescriptionKey: "External CoreML models are only supported on Apple Silicon Macs."] + ) + } + + func transcribe(_ samples: [Float]) async throws -> ASRTranscriptionResult { + throw NSError( + domain: "ExternalCoreMLTranscriptionProvider", + code: -1, + userInfo: [NSLocalizedDescriptionKey: "External CoreML models are only supported on Apple Silicon Macs."] + ) + } +} + +#endif diff --git a/Sources/Fluid/Services/MeetingTranscriptionService.swift b/Sources/Fluid/Services/MeetingTranscriptionService.swift index d9e479dd..31900de8 100644 --- a/Sources/Fluid/Services/MeetingTranscriptionService.swift +++ b/Sources/Fluid/Services/MeetingTranscriptionService.swift @@ -181,6 +181,43 @@ final class MeetingTranscriptionService: ObservableObject { DebugLogger.shared.warning("Could not determine audio duration: \(error.localizedDescription)", source: "MeetingTranscriptionService") } + if provider.prefersNativeFileTranscription { + self.currentStatus = duration > 0 ? "Transcribing audio (\(Int(duration))s)..." : "Transcribing audio..." + self.progress = 0.3 + + DebugLogger.shared.info( + "MeetingTranscriptionService: using native file transcription path for provider=\(provider.name)", + source: "MeetingTranscriptionService" + ) + + let nativeResult = try await provider.transcribeFile(at: fileURL) + let processingTime = Date().timeIntervalSince(startTime) + let result = TranscriptionResult( + text: nativeResult.text, + confidence: nativeResult.confidence, + duration: duration, + processingTime: processingTime, + fileName: fileURL.lastPathComponent + ) + + self.currentStatus = "Complete!" + self.progress = 1.0 + + AnalyticsService.shared.capture( + .meetingTranscriptionCompleted, + properties: [ + "success": true, + "file_type": fileURL.pathExtension.lowercased(), + "audio_duration_bucket": AnalyticsBuckets.bucketSeconds(duration), + "processing_time_bucket": AnalyticsBuckets.bucketSeconds(processingTime), + ] + ) + + self.result = result + FileTranscriptionHistoryStore.shared.addEntry(result) + return result + } + // Transcribe using chunked processing for long files // This reads audio in ~20 minute segments to avoid memory overflow on 3+ hour files let chunkDurationSeconds: Double = 20 * 60 // 20 minutes per chunk (well under 24min model limit) diff --git a/Sources/Fluid/Services/TranscriptionProvider.swift b/Sources/Fluid/Services/TranscriptionProvider.swift index 46393a6a..acaa2505 100644 --- a/Sources/Fluid/Services/TranscriptionProvider.swift +++ b/Sources/Fluid/Services/TranscriptionProvider.swift @@ -45,6 +45,14 @@ protocol TranscriptionProvider { /// Providers can use higher-quality passes (e.g., vocabulary rescoring) here. func transcribeFinal(_ samples: [Float]) async throws -> ASRTranscriptionResult + /// Whether this provider prefers to handle long-form file transcription itself. + /// This is useful when the backend already has model-native long-audio chunking/reassembly. + var prefersNativeFileTranscription: Bool { get } + + /// Transcribe a complete audio/video file. + /// Providers that do not implement this can rely on MeetingTranscriptionService fallback chunking. + func transcribeFile(at fileURL: URL) async throws -> ASRTranscriptionResult + /// Check if models exist on disk (without loading them) func modelsExistOnDisk() -> Bool @@ -56,6 +64,7 @@ protocol TranscriptionProvider { extension TranscriptionProvider { func modelsExistOnDisk() -> Bool { return false } func clearCache() async throws {} + var prefersNativeFileTranscription: Bool { false } func transcribeStreaming(_ samples: [Float]) async throws -> ASRTranscriptionResult { try await self.transcribe(samples) } @@ -63,6 +72,14 @@ extension TranscriptionProvider { func transcribeFinal(_ samples: [Float]) async throws -> ASRTranscriptionResult { try await self.transcribe(samples) } + + func transcribeFile(at fileURL: URL) async throws -> ASRTranscriptionResult { + throw NSError( + domain: "TranscriptionProvider", + code: -1, + userInfo: [NSLocalizedDescriptionKey: "\(self.name) does not implement native file transcription."] + ) + } } // MARK: - Architecture Detection diff --git a/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift b/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift index 884f5724..2187853c 100644 --- a/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift +++ b/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift @@ -1,3 +1,4 @@ +import AppKit import Combine import SwiftUI @@ -65,6 +66,8 @@ final class VoiceEngineSettingsViewModel: ObservableObject { models = models.filter { $0.provider == .nvidia } case .apple: models = models.filter { $0.provider == .apple } + case .cohere: + models = models.filter { $0.provider == .cohere } case .openai: models = models.filter { $0.provider == .openai } } @@ -95,6 +98,9 @@ final class VoiceEngineSettingsViewModel: ObservableObject { func activateSpeechModel(_ model: SettingsStore.SpeechModel) { guard !self.asr.isRunning else { return } + if model.requiresExternalArtifacts && self.ensureExternalArtifactsConfigured(for: model) == false { + return + } withAnimation(.spring(response: 0.3, dampingFraction: 0.7)) { self.settings.selectedSpeechModel = model self.previewSpeechModel = model @@ -106,12 +112,19 @@ final class VoiceEngineSettingsViewModel: ObservableObject { try await self.asr.ensureAsrReady() } catch { DebugLogger.shared.error("Failed to prepare model after activation: \(error)", source: "AISettingsView") + self.asr.errorTitle = "Model Activation Failed" + self.asr.errorMessage = error.localizedDescription + self.asr.showError = true } } } func downloadSpeechModel(_ model: SettingsStore.SpeechModel) { guard !self.asr.isRunning else { return } + if model.requiresExternalArtifacts { + _ = self.ensureExternalArtifactsConfigured(for: model) + return + } guard self.downloadingModel == nil else { return } // Prevent concurrent downloads self.downloadingModel = model self.downloadProgress = 0.0 @@ -137,7 +150,7 @@ final class VoiceEngineSettingsViewModel: ObservableObject { } catch { DebugLogger.shared.error("Failed to download model \(model.displayName): \(error)", source: "VoiceEngineVM") await MainActor.run { - self.asr.errorTitle = "Model Download Failed" + self.asr.errorTitle = model.requiresExternalArtifacts ? "Model Import Failed" : "Model Download Failed" self.asr.errorMessage = error.localizedDescription self.asr.showError = true } @@ -197,6 +210,8 @@ final class VoiceEngineSettingsViewModel: ObservableObject { return "Parakeet TDT v2 is an English-only model optimized for accuracy and consistency on Apple Silicon." case .qwen3Asr: return "Qwen3 ASR is a multilingual FluidAudio model with strong quality, but higher memory usage. Requires macOS 15+." + case .cohereTranscribeSixBit: + return "Cohere Transcribe uses an external CoreML pipeline loaded from a local folder. Best on Apple Silicon with 8GB+ RAM." default: return "Whisper models support 99 languages and work on any Mac." } @@ -207,12 +222,20 @@ final class VoiceEngineSettingsViewModel: ObservableObject { try await self.asr.ensureAsrReady() } catch { DebugLogger.shared.error("Failed to download models: \(error)", source: "AISettingsView") + self.asr.errorTitle = self.settings.selectedSpeechModel.requiresExternalArtifacts ? "Model Import Failed" : "Model Download Failed" + self.asr.errorMessage = error.localizedDescription + self.asr.showError = true } } func deleteModels() async { do { try await self.asr.clearModelCache() + let model = self.settings.selectedSpeechModel + if model.requiresExternalArtifacts { + self.settings.setExternalCoreMLArtifactsDirectory(nil, for: model) + self.asr.resetTranscriptionProvider() + } } catch { DebugLogger.shared.error("Failed to delete models: \(error)", source: "AISettingsView") } @@ -221,4 +244,69 @@ final class VoiceEngineSettingsViewModel: ObservableObject { func setSelectedSpeechProvider(_ provider: SettingsStore.SpeechModel.Provider) { self.selectedSpeechProvider = provider } + + func externalArtifactsDirectoryDisplay(for model: SettingsStore.SpeechModel) -> String? { + self.settings.externalCoreMLArtifactsDirectory(for: model)?.path + } + + func chooseExternalArtifactsDirectory(for model: SettingsStore.SpeechModel) { + _ = self.ensureExternalArtifactsConfigured(for: model, forceChooser: true) + } + + func clearExternalArtifactsDirectory(for model: SettingsStore.SpeechModel) { + self.settings.setExternalCoreMLArtifactsDirectory(nil, for: model) + if self.settings.selectedSpeechModel == model { + self.asr.resetTranscriptionProvider() + } + } + + @discardableResult + private func ensureExternalArtifactsConfigured( + for model: SettingsStore.SpeechModel, + forceChooser: Bool = false + ) -> Bool { + guard let spec = model.externalCoreMLSpec else { return true } + if forceChooser == false, + let directory = self.settings.externalCoreMLArtifactsDirectory(for: model), + spec.validateArtifacts(at: directory) + { + return true + } + + let panel = NSOpenPanel() + panel.title = "Select \(model.displayName) Artifacts Folder" + panel.message = "Choose the folder containing \(spec.artifactFolderHint) and its CoreML files." + panel.canChooseDirectories = true + panel.canChooseFiles = false + panel.allowsMultipleSelection = false + panel.canCreateDirectories = false + panel.prompt = "Use Folder" + + guard panel.runModal() == .OK, let selectedDirectory = panel.url?.standardizedFileURL else { + return false + } + + let candidateDirectory = spec.validateArtifacts(at: selectedDirectory) + ? selectedDirectory + : selectedDirectory.appendingPathComponent(spec.artifactFolderHint, isDirectory: true) + + do { + try spec.validateArtifactsOrThrow(at: candidateDirectory) + } catch { + self.asr.errorTitle = "Artifacts Folder Invalid" + self.asr.errorMessage = error.localizedDescription + self.asr.showError = true + return false + } + + self.settings.setExternalCoreMLArtifactsDirectory(candidateDirectory, for: model) + if self.settings.selectedSpeechModel == model { + self.asr.resetTranscriptionProvider() + } else { + Task { + await self.asr.checkIfModelsExistAsync() + } + } + return true + } } diff --git a/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift b/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift index 75a1bb86..1b3af5fe 100644 --- a/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift +++ b/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift @@ -164,7 +164,7 @@ extension VoiceEngineSettingsView { /// Stats panel showing speed/accuracy bars that animate when model changes var modelStatsPanel: some View { let model = self.viewModel.previewSpeechModel - let supportsParakeetCustomWords = model == .parakeetTDT || model == .parakeetTDTv2 + let supportsCustomWords = model.supportsCustomVocabulary return VStack(alignment: .leading, spacing: 12) { HStack(alignment: .center, spacing: 20) { @@ -270,7 +270,58 @@ extension VoiceEngineSettingsView { .animation(.spring(response: 0.5, dampingFraction: 0.7), value: model.id) } - if supportsParakeetCustomWords { + if model.requiresExternalArtifacts { + VStack(alignment: .leading, spacing: 8) { + HStack(alignment: .center, spacing: 10) { + Image(systemName: "folder.badge.gearshape") + .font(.caption) + .foregroundStyle(self.theme.palette.accent) + + VStack(alignment: .leading, spacing: 3) { + Text("External CoreML artifacts") + .font(.caption) + .fontWeight(.semibold) + Text(self.viewModel.externalArtifactsDirectoryDisplay(for: model) ?? "Choose the Hugging Face model folder before activating this model.") + .font(.caption2) + .foregroundStyle(.secondary) + .lineLimit(3) + } + + Spacer(minLength: 8) + + Button(self.viewModel.externalArtifactsDirectoryDisplay(for: model) == nil ? "Import Folder" : "Change Folder") { + self.viewModel.chooseExternalArtifactsDirectory(for: model) + } + .buttonStyle(.borderedProminent) + .tint(self.theme.palette.accent) + .controlSize(.small) + } + + if self.viewModel.externalArtifactsDirectoryDisplay(for: model) != nil { + HStack { + Button("Clear Folder") { + self.viewModel.clearExternalArtifactsDirectory(for: model) + } + .buttonStyle(.plain) + .font(.caption) + .foregroundStyle(.red.opacity(0.8)) + Spacer() + } + } + } + .padding(.horizontal, 10) + .padding(.vertical, 8) + .background( + RoundedRectangle(cornerRadius: 8) + .fill(self.theme.palette.accent.opacity(0.08)) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(self.theme.palette.accent.opacity(0.20), lineWidth: 1) + ) + ) + } + + if supportsCustomWords { HStack(alignment: .center, spacing: 10) { Image(systemName: "checkmark.seal.fill") .font(.caption) @@ -448,12 +499,12 @@ extension VoiceEngineSettingsView { } } else { ZStack(alignment: .trailing) { - Text("Not downloaded") + Text(model.requiresExternalArtifacts ? "Not imported" : "Not downloaded") .font(.caption2) .foregroundStyle(.secondary) .opacity(isSelected ? 0 : 1) - Button("Download") { + Button(model.requiresExternalArtifacts ? "Import Folder" : "Download") { self.viewModel.previewSpeechModel = model self.viewModel.downloadSpeechModel(model) } @@ -515,7 +566,7 @@ extension VoiceEngineSettingsView { Button(action: { Task { await self.viewModel.deleteModels() } }) { HStack(spacing: 4) { Image(systemName: "trash") - Text("Delete") + Text(self.settings.selectedSpeechModel.requiresExternalArtifacts ? "Forget" : "Delete") } .font(.caption) .foregroundStyle(.red) @@ -523,12 +574,14 @@ extension VoiceEngineSettingsView { .buttonStyle(.plain) } else if self.viewModel.asr.modelsExistOnDisk { Image(systemName: "doc.fill").foregroundStyle(self.theme.palette.accent).font(.caption) - Text("Cached").font(.caption).foregroundStyle(.secondary) + Text(self.settings.selectedSpeechModel.requiresExternalArtifacts ? "Imported" : "Cached") + .font(.caption) + .foregroundStyle(.secondary) Button(action: { Task { await self.viewModel.deleteModels() } }) { HStack(spacing: 4) { Image(systemName: "trash") - Text("Delete") + Text(self.settings.selectedSpeechModel.requiresExternalArtifacts ? "Forget" : "Delete") } .font(.caption) .foregroundStyle(.red) @@ -538,7 +591,7 @@ extension VoiceEngineSettingsView { Button(action: { Task { await self.viewModel.downloadModels() } }) { HStack(spacing: 4) { Image(systemName: "arrow.down.circle.fill") - Text("Download") + Text(self.settings.selectedSpeechModel.requiresExternalArtifacts ? "Import" : "Download") } .font(.caption) } diff --git a/Sources/Fluid/UI/AISettingsView.swift b/Sources/Fluid/UI/AISettingsView.swift index d60268ba..273a447a 100644 --- a/Sources/Fluid/UI/AISettingsView.swift +++ b/Sources/Fluid/UI/AISettingsView.swift @@ -58,6 +58,7 @@ enum SpeechProviderFilter: String, CaseIterable, Identifiable { case all = "All" case nvidia = "NVIDIA" case apple = "Apple" + case cohere = "Cohere" case openai = "OpenAI" var id: String { self.rawValue } From b3a927a0df4d5d45f8e13697714d7f9fd49a496c Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sat, 28 Mar 2026 00:52:43 -0700 Subject: [PATCH 02/22] Enable Cohere preview streaming --- Sources/Fluid/Persistence/SettingsStore.swift | 24 +++++++++++++++++- Sources/Fluid/Services/ASRService.swift | 25 ++++++++++++++----- .../ExternalCoreMLTranscriptionProvider.swift | 8 ++++++ 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index 491596e0..d8464d34 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2684,13 +2684,35 @@ final class SettingsStore: ObservableObject { /// Large Whisper models are too slow for streaming, so they only do final transcription on stop. var supportsStreaming: Bool { switch self { - case .qwen3Asr, .cohereTranscribeSixBit, .whisperMedium, .whisperLargeTurbo, .whisperLarge: + case .qwen3Asr, .whisperMedium, .whisperLargeTurbo, .whisperLarge: return false // Too slow for real-time chunk processing default: return true // All other models support streaming } } + /// Preview update cadence for real-time transcription. + /// Models without native incremental decoding should use a slower interval. + var streamingPreviewIntervalSeconds: Double { + switch self { + case .cohereTranscribeSixBit: + return 2.0 + default: + return 0.6 + } + } + + /// Minimum audio required before attempting a preview decode. + /// Cohere performs better with a slightly larger prefix than the default 1 second. + var minimumStreamingPreviewSeconds: Double { + switch self { + case .cohereTranscribeSixBit: + return 2.0 + default: + return 1.0 + } + } + /// Provider category for tab grouping enum Provider: String, CaseIterable { case nvidia = "NVIDIA" diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index e00e906e..df9906e6 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -398,7 +398,6 @@ final class ASRService: ObservableObject { // Streaming transcription state (no VAD) private var streamingTask: Task? private var lastProcessedSampleCount: Int = 0 - private let chunkDurationSeconds: Double = 0.6 // Fast interval - TranscriptionExecutor actor handles CoreML serialization private var isProcessingChunk: Bool = false private var skipNextChunk: Bool = false private var previousFullTranscription: String = "" @@ -412,6 +411,14 @@ final class ASRService: ObservableObject { var audioLevelPublisher: AnyPublisher { self.audioLevelSubject.eraseToAnyPublisher() } private var lastAudioLevelSentAt: TimeInterval = 0 + private var streamingChunkDurationSeconds: Double { + SettingsStore.shared.selectedSpeechModel.streamingPreviewIntervalSeconds + } + + private var minimumStreamingPreviewSamples: Int { + Int(SettingsStore.shared.selectedSpeechModel.minimumStreamingPreviewSeconds * 16_000) + } + /// Handles AVAudioEngine tap processing off the @MainActor to avoid touching main-actor state /// from CoreAudio's realtime callback thread. private lazy var audioCapturePipeline: AudioCapturePipeline = .init( @@ -2190,7 +2197,10 @@ final class ASRService: ObservableObject { self.streamingTask?.cancel() guard self.isAsrReady else { return } - DebugLogger.shared.debug("Starting streaming transcription task (interval: \(self.chunkDurationSeconds)s)", source: "ASRService") + DebugLogger.shared.debug( + "Starting streaming transcription task (interval: \(self.streamingChunkDurationSeconds)s, minSamples: \(self.minimumStreamingPreviewSamples))", + source: "ASRService" + ) self.streamingTask = Task { [weak self] in await self?.runStreamingLoop() @@ -2251,7 +2261,7 @@ final class ASRService: ObservableObject { } do { - try await Task.sleep(nanoseconds: UInt64(self.chunkDurationSeconds * 1_000_000_000)) + try await Task.sleep(nanoseconds: UInt64(self.streamingChunkDurationSeconds * 1_000_000_000)) } catch { DebugLogger.shared.debug("Streaming transcription task cancelled", source: "ASRService") break @@ -2281,7 +2291,7 @@ final class ASRService: ObservableObject { // Thread-safe count check let currentSampleCount = self.audioBuffer.count // Most ASR models require at least 1 second of 16kHz audio (16,000 samples) to transcribe - let minSamples = 16_000 // 1 second minimum required by transcription providers + let minSamples = self.minimumStreamingPreviewSamples guard currentSampleCount >= minSamples else { // Only log once per recording session to avoid spam if currentSampleCount > 0, self.lastProcessedSampleCount == 0 { @@ -2342,8 +2352,11 @@ final class ASRService: ObservableObject { // If transcription takes longer than the interval, skip next to prevent queue buildup // This allows slower machines to still work without overwhelming the system - if duration > self.chunkDurationSeconds { - DebugLogger.shared.debug("⚠️ Transcription slow (\(String(format: "%.2f", duration))s > \(self.chunkDurationSeconds)s), skipping next chunk", source: "ASRService") + if duration > self.streamingChunkDurationSeconds { + DebugLogger.shared.debug( + "⚠️ Transcription slow (\(String(format: "%.2f", duration))s > \(self.streamingChunkDurationSeconds)s), skipping next chunk", + source: "ASRService" + ) self.skipNextChunk = true } } catch { diff --git a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift index 3fed2ceb..7add2b71 100644 --- a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift +++ b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift @@ -86,6 +86,14 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { try await self.transcribeFinal(samples) } + func transcribeStreaming(_ samples: [Float]) async throws -> ASRTranscriptionResult { + DebugLogger.shared.debug( + "ExternalCoreML: streaming preview request [samples=\(samples.count)]", + source: "ExternalCoreML" + ) + return try await self.transcribeFinal(samples) + } + func transcribeFile(at fileURL: URL) async throws -> ASRTranscriptionResult { guard let manager = self.cohereManager else { DebugLogger.shared.error( From 208f20e7048901e4f4491a6d5f18a990f7c3ebd4 Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sat, 28 Mar 2026 00:55:11 -0700 Subject: [PATCH 03/22] Add Hugging Face links for external ASR models --- .../VoiceEngineSettingsViewModel.swift | 5 + .../UI/AISettingsView+SpeechRecognition.swift | 107 ++++++++++++++---- 2 files changed, 90 insertions(+), 22 deletions(-) diff --git a/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift b/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift index 2187853c..f6aa6ce1 100644 --- a/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift +++ b/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift @@ -260,6 +260,11 @@ final class VoiceEngineSettingsViewModel: ObservableObject { } } + func openExternalModelSource(for model: SettingsStore.SpeechModel) { + guard let url = model.externalCoreMLSpec?.sourceURL else { return } + NSWorkspace.shared.open(url) + } + @discardableResult private func ensureExternalArtifactsConfigured( for model: SettingsStore.SpeechModel, diff --git a/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift b/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift index 1b3af5fe..680533ab 100644 --- a/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift +++ b/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift @@ -297,6 +297,26 @@ extension VoiceEngineSettingsView { .controlSize(.small) } + if model.externalCoreMLSpec?.sourceURL != nil { + HStack(spacing: 10) { + Button { + self.viewModel.openExternalModelSource(for: model) + } label: { + Label("Open Hugging Face", systemImage: "arrow.up.right.square") + .font(.caption) + } + .buttonStyle(.plain) + .foregroundStyle(self.theme.palette.accent) + + Text("Download the full model folder there, then import it here.") + .font(.caption2) + .foregroundStyle(.secondary) + .lineLimit(2) + + Spacer() + } + } + if self.viewModel.externalArtifactsDirectoryDisplay(for: model) != nil { HStack { Button("Clear Folder") { @@ -499,23 +519,50 @@ extension VoiceEngineSettingsView { } } else { ZStack(alignment: .trailing) { - Text(model.requiresExternalArtifacts ? "Not imported" : "Not downloaded") - .font(.caption2) - .foregroundStyle(.secondary) - .opacity(isSelected ? 0 : 1) + if model.requiresExternalArtifacts { + HStack(spacing: 8) { + if let _ = model.externalCoreMLSpec?.sourceURL { + Button { + self.viewModel.openExternalModelSource(for: model) + } label: { + Image(systemName: "arrow.up.right.square") + .font(.system(size: 14)) + } + .buttonStyle(.plain) + .foregroundStyle(.secondary) + .disabled(self.viewModel.asr.isRunning || self.viewModel.downloadingModel != nil) + } + + Button("Import Folder") { + self.viewModel.previewSpeechModel = model + self.viewModel.downloadSpeechModel(model) + } + .buttonStyle(.borderedProminent) + .controlSize(.small) + .tint(.blue) + .disabled(self.viewModel.asr.isRunning || self.viewModel.downloadingModel != nil) + } + .offset(x: isSelected ? 0 : 16) + .opacity(isSelected ? 1 : 0) + } else { + Text("Not downloaded") + .font(.caption2) + .foregroundStyle(.secondary) + .opacity(isSelected ? 0 : 1) - Button(model.requiresExternalArtifacts ? "Import Folder" : "Download") { - self.viewModel.previewSpeechModel = model - self.viewModel.downloadSpeechModel(model) + Button("Download") { + self.viewModel.previewSpeechModel = model + self.viewModel.downloadSpeechModel(model) + } + .buttonStyle(.borderedProminent) + .controlSize(.small) + .tint(.blue) + .disabled(self.viewModel.asr.isRunning || self.viewModel.downloadingModel != nil) + .offset(x: isSelected ? 0 : 16) + .opacity(isSelected ? 1 : 0) } - .buttonStyle(.borderedProminent) - .controlSize(.small) - .tint(.blue) - .disabled(self.viewModel.asr.isRunning || self.viewModel.downloadingModel != nil) - .offset(x: isSelected ? 0 : 16) - .opacity(isSelected ? 1 : 0) } - .frame(width: 120, alignment: .trailing) + .frame(width: model.requiresExternalArtifacts ? 168 : 120, alignment: .trailing) } } .padding(.horizontal, 12) @@ -588,16 +635,32 @@ extension VoiceEngineSettingsView { } .buttonStyle(.plain) } else { - Button(action: { Task { await self.viewModel.downloadModels() } }) { - HStack(spacing: 4) { - Image(systemName: "arrow.down.circle.fill") - Text(self.settings.selectedSpeechModel.requiresExternalArtifacts ? "Import" : "Download") + HStack(spacing: 8) { + if self.settings.selectedSpeechModel.requiresExternalArtifacts, + self.settings.selectedSpeechModel.externalCoreMLSpec?.sourceURL != nil + { + Button(action: { self.viewModel.openExternalModelSource(for: self.settings.selectedSpeechModel) }) { + HStack(spacing: 4) { + Image(systemName: "arrow.up.right.square") + Text("Hugging Face") + } + .font(.caption) + } + .buttonStyle(.plain) + .foregroundStyle(self.theme.palette.accent) } - .font(.caption) + + Button(action: { Task { await self.viewModel.downloadModels() } }) { + HStack(spacing: 4) { + Image(systemName: "arrow.down.circle.fill") + Text(self.settings.selectedSpeechModel.requiresExternalArtifacts ? "Import" : "Download") + } + .font(.caption) + } + .buttonStyle(.borderedProminent) + .controlSize(.small) + .tint(.blue) } - .buttonStyle(.borderedProminent) - .controlSize(.small) - .tint(.blue) } } .padding(.horizontal, 12) From 92d7e4b453099f4dfc6f5ab7e8c06ba6a1c5ec2a Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sat, 28 Mar 2026 09:32:46 -0700 Subject: [PATCH 04/22] Download Cohere CoreML model from Hugging Face --- .../Fluid/Networking/ModelDownloader.swift | 23 ++-- Sources/Fluid/Persistence/SettingsStore.swift | 2 +- Sources/Fluid/Services/ASRService.swift | 6 +- .../ExternalCoreMLModelRegistry.swift | 13 ++- .../ExternalCoreMLTranscriptionProvider.swift | 106 +++++++++++++----- .../VoiceEngineSettingsViewModel.swift | 78 +------------ .../UI/AISettingsView+SpeechRecognition.swift | 40 ++----- 7 files changed, 123 insertions(+), 145 deletions(-) diff --git a/Sources/Fluid/Networking/ModelDownloader.swift b/Sources/Fluid/Networking/ModelDownloader.swift index 4bf88c9d..a1f3a69c 100644 --- a/Sources/Fluid/Networking/ModelDownloader.swift +++ b/Sources/Fluid/Networking/ModelDownloader.swift @@ -23,6 +23,7 @@ final class HuggingFaceModelDownloader { private let owner: String private let repo: String private let revision: String + private let requiredItemsList: [ModelItem] private var baseApiURL: URL private var baseResolveURL: URL @@ -32,6 +33,12 @@ final class HuggingFaceModelDownloader { self.owner = "FluidInference" self.repo = "parakeet-tdt-0.6b-v3-coreml" self.revision = "main" + self.requiredItemsList = [ + ModelItem(path: "MelEncoder.mlmodelc", isDirectory: true), + ModelItem(path: "Decoder.mlmodelc", isDirectory: true), + ModelItem(path: "JointDecision.mlmodelc", isDirectory: true), + ModelItem(path: "parakeet_v3_vocab.json", isDirectory: false), + ] guard var apiBase = URL(string: "https://huggingface.co/api/models/") else { preconditionFailure("Invalid base Hugging Face API URL") } @@ -56,10 +63,16 @@ final class HuggingFaceModelDownloader { /// - owner: Hugging Face username or organization /// - repo: Repository name containing the models /// - revision: Branch or commit hash (default: "main") - init(owner: String, repo: String, revision: String = "main") { + init(owner: String, repo: String, revision: String = "main", requiredItems: [ModelItem]? = nil) { self.owner = owner self.repo = repo self.revision = revision + self.requiredItemsList = requiredItems ?? [ + ModelItem(path: "MelEncoder.mlmodelc", isDirectory: true), + ModelItem(path: "Decoder.mlmodelc", isDirectory: true), + ModelItem(path: "JointDecision.mlmodelc", isDirectory: true), + ModelItem(path: "parakeet_v3_vocab.json", isDirectory: false), + ] guard var apiBase = URL(string: "https://huggingface.co/api/models/") else { preconditionFailure("Invalid base Hugging Face API URL") } @@ -152,13 +165,7 @@ final class HuggingFaceModelDownloader { } private func requiredItems() -> [ModelItem] { - return [ - // Preferred v3 unified model file names used by FluidAudio 0.5+ - ModelItem(path: "MelEncoder.mlmodelc", isDirectory: true), - ModelItem(path: "Decoder.mlmodelc", isDirectory: true), - ModelItem(path: "JointDecision.mlmodelc", isDirectory: true), - ModelItem(path: "parakeet_v3_vocab.json", isDirectory: false), - ] + self.requiredItemsList } private func downloadDirectory(relativePath: String, to destination: URL) async throws { diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index d8464d34..f39a5cf3 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2664,7 +2664,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDT: return "FluidVoice Pick" case .parakeetTDTv2: return "FluidVoice Pick" case .qwen3Asr: return "Beta" - case .cohereTranscribeSixBit: return "Manual Import" + case .cohereTranscribeSixBit: return "New" case .appleSpeechAnalyzer: return "New" default: return nil } diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index df9906e6..e9893470 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -128,10 +128,10 @@ final class ASRService: ObservableObject { var modelStatusMessage: String { let usesExternalArtifacts = SettingsStore.shared.selectedSpeechModel.requiresExternalArtifacts if self.isAsrReady { return "Model ready" } - if self.isDownloadingModel { return usesExternalArtifacts ? "Importing model..." : "Downloading model..." } + if self.isDownloadingModel { return "Downloading model..." } if self.isLoadingModel { return "Loading model into memory..." } - if self.modelsExistOnDisk { return usesExternalArtifacts ? "Model imported, needs loading" : "Model cached, needs loading" } - return usesExternalArtifacts ? "Model not imported" : "Model not downloaded" + if self.modelsExistOnDisk { return usesExternalArtifacts ? "Model cached, needs loading" : "Model cached, needs loading" } + return "Model not downloaded" } // MARK: - Transcription Provider (Settable) diff --git a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift index 3d39b527..38e38d3f 100644 --- a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift +++ b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift @@ -50,6 +50,9 @@ struct ExternalCoreMLASRModelSpec { let expectedSampleRate: Int let computeUnits: MLComputeUnits let sourceURL: URL? + let repositoryOwner: String? + let repositoryName: String? + let repositoryRevision: String var requiredEntries: [String] { [ @@ -65,6 +68,11 @@ struct ExternalCoreMLASRModelSpec { directory.appendingPathComponent(entry, isDirectory: entry.hasSuffix(".mlpackage")) } + var defaultCacheDirectory: URL? { + FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first? + .appendingPathComponent(self.artifactFolderHint, isDirectory: true) + } + func validateArtifacts(at directory: URL) -> Bool { (try? self.validateArtifactsOrThrow(at: directory)) != nil } @@ -126,7 +134,10 @@ enum ExternalCoreMLModelRegistry { expectedModelID: "CohereLabs/cohere-transcribe-03-2026", expectedSampleRate: 16000, computeUnits: .cpuAndGPU, - sourceURL: URL(string: "https://huggingface.co/BarathwajAnandan/cohere-transcribe-03-2026-CoreML-6bit") + sourceURL: URL(string: "https://huggingface.co/BarathwajAnandan/cohere-transcribe-03-2026-CoreML-6bit"), + repositoryOwner: "BarathwajAnandan", + repositoryName: "cohere-transcribe-03-2026-CoreML-6bit", + repositoryRevision: "main" ) default: return nil diff --git a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift index 7add2b71..8e7acf2b 100644 --- a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift +++ b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift @@ -33,39 +33,27 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { ) throw Self.makeError("No external CoreML spec registered for \(model.displayName).") } - guard let directory = SettingsStore.shared.externalCoreMLArtifactsDirectory(for: model) else { + guard let directory = Self.artifactsDirectory(for: model, spec: spec) else { DebugLogger.shared.error( - "ExternalCoreML: no artifacts directory configured for model=\(model.rawValue)", + "ExternalCoreML: unable to resolve cache directory for model=\(model.rawValue)", source: "ExternalCoreML" ) - throw Self.makeError("Select the \(model.displayName) artifacts folder before loading the model.") + throw Self.makeError("Unable to resolve a cache directory for \(model.displayName).") } - DebugLogger.shared.info( - "ExternalCoreML: validating artifacts at \(directory.path)", - source: "ExternalCoreML" + try await self.ensureArtifactsPresent( + for: model, + spec: spec, + at: directory, + progressHandler: progressHandler ) - do { - try spec.validateArtifactsOrThrow(at: directory) - DebugLogger.shared.info( - "ExternalCoreML: artifact validation passed for \(directory.lastPathComponent)", - source: "ExternalCoreML" - ) - } catch { - DebugLogger.shared.error( - "ExternalCoreML: artifact validation failed: \(error.localizedDescription)", - source: "ExternalCoreML" - ) - throw Self.makeError(error.localizedDescription) - } - - progressHandler?(0.1) + progressHandler?(0.85) switch spec.backend { case .cohereTranscribe: let manager = CohereTranscribeAsrManager() - progressHandler?(0.35) + progressHandler?(0.9) DebugLogger.shared.info( "ExternalCoreML: loading Cohere models [computeUnits=\(String(describing: spec.computeUnits))]", source: "ExternalCoreML" @@ -144,9 +132,8 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { func modelsExistOnDisk() -> Bool { let model = self.modelOverride ?? SettingsStore.shared.selectedSpeechModel - guard - let spec = model.externalCoreMLSpec, - let directory = SettingsStore.shared.externalCoreMLArtifactsDirectory(for: model) + guard let spec = model.externalCoreMLSpec, + let directory = Self.artifactsDirectory(for: model, spec: spec) else { return false } @@ -155,9 +142,8 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { func clearCache() async throws { let model = self.modelOverride ?? SettingsStore.shared.selectedSpeechModel - guard - model.externalCoreMLSpec != nil, - let directory = SettingsStore.shared.externalCoreMLArtifactsDirectory(for: model) + guard let spec = model.externalCoreMLSpec, + let directory = Self.artifactsDirectory(for: model, spec: spec) else { self.isReady = false self.cohereManager = nil @@ -174,6 +160,14 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { try FileManager.default.removeItem(at: compiledDirectory) } + if FileManager.default.fileExists(atPath: directory.path) { + DebugLogger.shared.info( + "ExternalCoreML: removing downloaded artifacts at \(directory.path)", + source: "ExternalCoreML" + ) + try FileManager.default.removeItem(at: directory) + } + self.isReady = false self.cohereManager = nil DebugLogger.shared.info( @@ -182,6 +176,62 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { ) } + private func ensureArtifactsPresent( + for model: SettingsStore.SpeechModel, + spec: ExternalCoreMLASRModelSpec, + at directory: URL, + progressHandler: ((Double) -> Void)? + ) async throws { + try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true) + + if spec.validateArtifacts(at: directory) { + DebugLogger.shared.info( + "ExternalCoreML: artifact validation passed for \(directory.lastPathComponent)", + source: "ExternalCoreML" + ) + progressHandler?(0.8) + return + } + + guard let owner = spec.repositoryOwner, let repo = spec.repositoryName else { + throw Self.makeError("Missing repository metadata for \(model.displayName).") + } + + DebugLogger.shared.info( + "ExternalCoreML: downloading missing artifacts from \(owner)/\(repo)", + source: "ExternalCoreML" + ) + + let downloader = HuggingFaceModelDownloader( + owner: owner, + repo: repo, + revision: spec.repositoryRevision, + requiredItems: spec.requiredEntries.map { .init(path: $0, isDirectory: $0.hasSuffix(".mlpackage")) } + ) + try await downloader.ensureModelsPresent(at: directory) { progress, item in + DebugLogger.shared.debug( + "ExternalCoreML: download progress \(Int(progress * 100))% [\(item)]", + source: "ExternalCoreML" + ) + progressHandler?(progress * 0.8) + } + + do { + try spec.validateArtifactsOrThrow(at: directory) + } catch { + throw Self.makeError(error.localizedDescription) + } + + SettingsStore.shared.setExternalCoreMLArtifactsDirectory(directory, for: model) + } + + private static func artifactsDirectory( + for model: SettingsStore.SpeechModel, + spec: ExternalCoreMLASRModelSpec + ) -> URL? { + SettingsStore.shared.externalCoreMLArtifactsDirectory(for: model) ?? spec.defaultCacheDirectory + } + private static func makeError(_ description: String) -> NSError { NSError( domain: "ExternalCoreMLTranscriptionProvider", diff --git a/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift b/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift index f6aa6ce1..6959b847 100644 --- a/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift +++ b/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift @@ -98,9 +98,6 @@ final class VoiceEngineSettingsViewModel: ObservableObject { func activateSpeechModel(_ model: SettingsStore.SpeechModel) { guard !self.asr.isRunning else { return } - if model.requiresExternalArtifacts && self.ensureExternalArtifactsConfigured(for: model) == false { - return - } withAnimation(.spring(response: 0.3, dampingFraction: 0.7)) { self.settings.selectedSpeechModel = model self.previewSpeechModel = model @@ -121,10 +118,6 @@ final class VoiceEngineSettingsViewModel: ObservableObject { func downloadSpeechModel(_ model: SettingsStore.SpeechModel) { guard !self.asr.isRunning else { return } - if model.requiresExternalArtifacts { - _ = self.ensureExternalArtifactsConfigured(for: model) - return - } guard self.downloadingModel == nil else { return } // Prevent concurrent downloads self.downloadingModel = model self.downloadProgress = 0.0 @@ -150,7 +143,7 @@ final class VoiceEngineSettingsViewModel: ObservableObject { } catch { DebugLogger.shared.error("Failed to download model \(model.displayName): \(error)", source: "VoiceEngineVM") await MainActor.run { - self.asr.errorTitle = model.requiresExternalArtifacts ? "Model Import Failed" : "Model Download Failed" + self.asr.errorTitle = "Model Download Failed" self.asr.errorMessage = error.localizedDescription self.asr.showError = true } @@ -211,7 +204,7 @@ final class VoiceEngineSettingsViewModel: ObservableObject { case .qwen3Asr: return "Qwen3 ASR is a multilingual FluidAudio model with strong quality, but higher memory usage. Requires macOS 15+." case .cohereTranscribeSixBit: - return "Cohere Transcribe uses an external CoreML pipeline loaded from a local folder. Best on Apple Silicon with 8GB+ RAM." + return "Cohere Transcribe downloads a CoreML pipeline from Hugging Face and caches it locally. Best on Apple Silicon with 8GB+ RAM." default: return "Whisper models support 99 languages and work on any Mac." } @@ -222,7 +215,7 @@ final class VoiceEngineSettingsViewModel: ObservableObject { try await self.asr.ensureAsrReady() } catch { DebugLogger.shared.error("Failed to download models: \(error)", source: "AISettingsView") - self.asr.errorTitle = self.settings.selectedSpeechModel.requiresExternalArtifacts ? "Model Import Failed" : "Model Download Failed" + self.asr.errorTitle = "Model Download Failed" self.asr.errorMessage = error.localizedDescription self.asr.showError = true } @@ -245,73 +238,8 @@ final class VoiceEngineSettingsViewModel: ObservableObject { self.selectedSpeechProvider = provider } - func externalArtifactsDirectoryDisplay(for model: SettingsStore.SpeechModel) -> String? { - self.settings.externalCoreMLArtifactsDirectory(for: model)?.path - } - - func chooseExternalArtifactsDirectory(for model: SettingsStore.SpeechModel) { - _ = self.ensureExternalArtifactsConfigured(for: model, forceChooser: true) - } - - func clearExternalArtifactsDirectory(for model: SettingsStore.SpeechModel) { - self.settings.setExternalCoreMLArtifactsDirectory(nil, for: model) - if self.settings.selectedSpeechModel == model { - self.asr.resetTranscriptionProvider() - } - } - func openExternalModelSource(for model: SettingsStore.SpeechModel) { guard let url = model.externalCoreMLSpec?.sourceURL else { return } NSWorkspace.shared.open(url) } - - @discardableResult - private func ensureExternalArtifactsConfigured( - for model: SettingsStore.SpeechModel, - forceChooser: Bool = false - ) -> Bool { - guard let spec = model.externalCoreMLSpec else { return true } - if forceChooser == false, - let directory = self.settings.externalCoreMLArtifactsDirectory(for: model), - spec.validateArtifacts(at: directory) - { - return true - } - - let panel = NSOpenPanel() - panel.title = "Select \(model.displayName) Artifacts Folder" - panel.message = "Choose the folder containing \(spec.artifactFolderHint) and its CoreML files." - panel.canChooseDirectories = true - panel.canChooseFiles = false - panel.allowsMultipleSelection = false - panel.canCreateDirectories = false - panel.prompt = "Use Folder" - - guard panel.runModal() == .OK, let selectedDirectory = panel.url?.standardizedFileURL else { - return false - } - - let candidateDirectory = spec.validateArtifacts(at: selectedDirectory) - ? selectedDirectory - : selectedDirectory.appendingPathComponent(spec.artifactFolderHint, isDirectory: true) - - do { - try spec.validateArtifactsOrThrow(at: candidateDirectory) - } catch { - self.asr.errorTitle = "Artifacts Folder Invalid" - self.asr.errorMessage = error.localizedDescription - self.asr.showError = true - return false - } - - self.settings.setExternalCoreMLArtifactsDirectory(candidateDirectory, for: model) - if self.settings.selectedSpeechModel == model { - self.asr.resetTranscriptionProvider() - } else { - Task { - await self.asr.checkIfModelsExistAsync() - } - } - return true - } } diff --git a/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift b/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift index 680533ab..5cf8952f 100644 --- a/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift +++ b/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift @@ -273,28 +273,21 @@ extension VoiceEngineSettingsView { if model.requiresExternalArtifacts { VStack(alignment: .leading, spacing: 8) { HStack(alignment: .center, spacing: 10) { - Image(systemName: "folder.badge.gearshape") + Image(systemName: "arrow.down.circle") .font(.caption) .foregroundStyle(self.theme.palette.accent) VStack(alignment: .leading, spacing: 3) { - Text("External CoreML artifacts") + Text("Hosted on Hugging Face") .font(.caption) .fontWeight(.semibold) - Text(self.viewModel.externalArtifactsDirectoryDisplay(for: model) ?? "Choose the Hugging Face model folder before activating this model.") + Text("Downloads automatically on first use and stays cached locally.") .font(.caption2) .foregroundStyle(.secondary) - .lineLimit(3) + .lineLimit(2) } Spacer(minLength: 8) - - Button(self.viewModel.externalArtifactsDirectoryDisplay(for: model) == nil ? "Import Folder" : "Change Folder") { - self.viewModel.chooseExternalArtifactsDirectory(for: model) - } - .buttonStyle(.borderedProminent) - .tint(self.theme.palette.accent) - .controlSize(.small) } if model.externalCoreMLSpec?.sourceURL != nil { @@ -308,7 +301,7 @@ extension VoiceEngineSettingsView { .buttonStyle(.plain) .foregroundStyle(self.theme.palette.accent) - Text("Download the full model folder there, then import it here.") + Text("The app downloads the artifacts directly from this repo.") .font(.caption2) .foregroundStyle(.secondary) .lineLimit(2) @@ -317,17 +310,6 @@ extension VoiceEngineSettingsView { } } - if self.viewModel.externalArtifactsDirectoryDisplay(for: model) != nil { - HStack { - Button("Clear Folder") { - self.viewModel.clearExternalArtifactsDirectory(for: model) - } - .buttonStyle(.plain) - .font(.caption) - .foregroundStyle(.red.opacity(0.8)) - Spacer() - } - } } .padding(.horizontal, 10) .padding(.vertical, 8) @@ -533,7 +515,7 @@ extension VoiceEngineSettingsView { .disabled(self.viewModel.asr.isRunning || self.viewModel.downloadingModel != nil) } - Button("Import Folder") { + Button("Download") { self.viewModel.previewSpeechModel = model self.viewModel.downloadSpeechModel(model) } @@ -562,7 +544,7 @@ extension VoiceEngineSettingsView { .opacity(isSelected ? 1 : 0) } } - .frame(width: model.requiresExternalArtifacts ? 168 : 120, alignment: .trailing) + .frame(width: model.requiresExternalArtifacts ? 150 : 120, alignment: .trailing) } } .padding(.horizontal, 12) @@ -613,7 +595,7 @@ extension VoiceEngineSettingsView { Button(action: { Task { await self.viewModel.deleteModels() } }) { HStack(spacing: 4) { Image(systemName: "trash") - Text(self.settings.selectedSpeechModel.requiresExternalArtifacts ? "Forget" : "Delete") + Text("Delete") } .font(.caption) .foregroundStyle(.red) @@ -621,14 +603,14 @@ extension VoiceEngineSettingsView { .buttonStyle(.plain) } else if self.viewModel.asr.modelsExistOnDisk { Image(systemName: "doc.fill").foregroundStyle(self.theme.palette.accent).font(.caption) - Text(self.settings.selectedSpeechModel.requiresExternalArtifacts ? "Imported" : "Cached") + Text("Cached") .font(.caption) .foregroundStyle(.secondary) Button(action: { Task { await self.viewModel.deleteModels() } }) { HStack(spacing: 4) { Image(systemName: "trash") - Text(self.settings.selectedSpeechModel.requiresExternalArtifacts ? "Forget" : "Delete") + Text("Delete") } .font(.caption) .foregroundStyle(.red) @@ -653,7 +635,7 @@ extension VoiceEngineSettingsView { Button(action: { Task { await self.viewModel.downloadModels() } }) { HStack(spacing: 4) { Image(systemName: "arrow.down.circle.fill") - Text(self.settings.selectedSpeechModel.requiresExternalArtifacts ? "Import" : "Download") + Text("Download") } .font(.caption) } From 2a7582ed1b583a21eedc20242380d8e4c51ce7e4 Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sat, 28 Mar 2026 09:34:15 -0700 Subject: [PATCH 05/22] Show Cohere in multilingual onboarding --- Sources/Fluid/UI/WelcomeView.swift | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/Sources/Fluid/UI/WelcomeView.swift b/Sources/Fluid/UI/WelcomeView.swift index 54f9f8bd..2afb8583 100644 --- a/Sources/Fluid/UI/WelcomeView.swift +++ b/Sources/Fluid/UI/WelcomeView.swift @@ -669,7 +669,7 @@ struct OnboardingFlowView: View { case .englishOnly: return "Best if you mainly speak English. Lower complexity and tuned for English dictation." case .multipleLanguages: - return "Best if you switch languages. Broader support with Parakeet TDT v3." + return "Best if you switch languages. Broader support with Parakeet TDT v3. Cohere is also available below if you want a higher-accuracy, larger model." case .other: return "Choose a different model below if neither of the default language paths fits." } @@ -679,7 +679,7 @@ struct OnboardingFlowView: View { private var onboardingModelOptions: [SettingsStore.SpeechModel] { let candidates: [SettingsStore.SpeechModel] = CPUArchitecture.isAppleSilicon - ? [.parakeetTDT, .parakeetTDTv2, .whisperBase, .whisperSmall] + ? [.parakeetTDT, .cohereTranscribeSixBit, .parakeetTDTv2, .whisperBase, .whisperSmall] : [.whisperBase, .whisperTiny, .whisperSmall, .whisperMedium] var seenModelIDs = Set() @@ -691,11 +691,27 @@ struct OnboardingFlowView: View { private var onboardingAlternativeModels: [SettingsStore.SpeechModel] { let filtered = self.onboardingModelOptions.filter { $0 != self.recommendedOnboardingModel } - guard self.preferredLanguageChoice == .other, self.shouldShowLanguageChoice else { + guard self.shouldShowLanguageChoice else { return filtered } - return filtered.filter { model in + switch self.preferredLanguageChoice { + case .englishOnly: + return [] + case .multipleLanguages: + let multilingualOptions = filtered.filter { model in + model != .parakeetTDTv2 + } + let preferredOrder: [SettingsStore.SpeechModel] = [.cohereTranscribeSixBit, .whisperBase, .whisperSmall] + return multilingualOptions.sorted { lhs, rhs in + let lhsIndex = preferredOrder.firstIndex(of: lhs) ?? preferredOrder.count + let rhsIndex = preferredOrder.firstIndex(of: rhs) ?? preferredOrder.count + if lhsIndex != rhsIndex { return lhsIndex < rhsIndex } + return lhs.displayName.localizedCaseInsensitiveCompare(rhs.displayName) == .orderedAscending + } + case .other: + return filtered.filter { model in model != .parakeetTDT && model != .parakeetTDTv2 + } } } @@ -708,7 +724,7 @@ struct OnboardingFlowView: View { } private var shouldShowAlternativeModels: Bool { - !self.shouldShowLanguageChoice || self.preferredLanguageChoice == .other + !self.shouldShowLanguageChoice || self.preferredLanguageChoice != .englishOnly } private var isRecommendedModelSelected: Bool { From 80bf0cd5c05b927e34ef68183a8fc573b6c09dcd Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sat, 28 Mar 2026 09:35:26 -0700 Subject: [PATCH 06/22] Promote Cohere as a FluidVoice pick --- Sources/Fluid/Persistence/SettingsStore.swift | 2 +- .../AISettings/VoiceEngineSettingsViewModel.swift | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index f39a5cf3..06777413 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2663,8 +2663,8 @@ final class SettingsStore: ObservableObject { switch self { case .parakeetTDT: return "FluidVoice Pick" case .parakeetTDTv2: return "FluidVoice Pick" + case .cohereTranscribeSixBit: return "FluidVoice Pick" case .qwen3Asr: return "Beta" - case .cohereTranscribeSixBit: return "New" case .appleSpeechAnalyzer: return "New" default: return nil } diff --git a/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift b/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift index 6959b847..6a63b2e6 100644 --- a/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift +++ b/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift @@ -86,7 +86,20 @@ final class VoiceEngineSettingsViewModel: ObservableObject { switch self.modelSortOption { case .provider: - models.sort { $0.brandName.localizedCaseInsensitiveCompare($1.brandName) == .orderedAscending } + models.sort { lhs, rhs in + let lhsRecommended = lhs.badgeText == "FluidVoice Pick" + let rhsRecommended = rhs.badgeText == "FluidVoice Pick" + if lhsRecommended != rhsRecommended { + return lhsRecommended && !rhsRecommended + } + + let brandOrder = lhs.brandName.localizedCaseInsensitiveCompare(rhs.brandName) + if brandOrder != .orderedSame { + return brandOrder == .orderedAscending + } + + return lhs.displayName.localizedCaseInsensitiveCompare(rhs.displayName) == .orderedAscending + } case .accuracy: models.sort { $0.accuracyPercent > $1.accuracyPercent } case .speed: From 024880385426cdc7017f1b61d471625e473a1c0d Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sat, 28 Mar 2026 09:36:05 -0700 Subject: [PATCH 07/22] Revert "Promote Cohere as a FluidVoice pick" This reverts commit cb500984fc1ab6ccbc57f9598fcb8c62bb8e19e2. --- Sources/Fluid/Persistence/SettingsStore.swift | 2 +- .../AISettings/VoiceEngineSettingsViewModel.swift | 15 +-------------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index 06777413..f39a5cf3 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2663,8 +2663,8 @@ final class SettingsStore: ObservableObject { switch self { case .parakeetTDT: return "FluidVoice Pick" case .parakeetTDTv2: return "FluidVoice Pick" - case .cohereTranscribeSixBit: return "FluidVoice Pick" case .qwen3Asr: return "Beta" + case .cohereTranscribeSixBit: return "New" case .appleSpeechAnalyzer: return "New" default: return nil } diff --git a/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift b/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift index 6a63b2e6..6959b847 100644 --- a/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift +++ b/Sources/Fluid/UI/AISettings/VoiceEngineSettingsViewModel.swift @@ -86,20 +86,7 @@ final class VoiceEngineSettingsViewModel: ObservableObject { switch self.modelSortOption { case .provider: - models.sort { lhs, rhs in - let lhsRecommended = lhs.badgeText == "FluidVoice Pick" - let rhsRecommended = rhs.badgeText == "FluidVoice Pick" - if lhsRecommended != rhsRecommended { - return lhsRecommended && !rhsRecommended - } - - let brandOrder = lhs.brandName.localizedCaseInsensitiveCompare(rhs.brandName) - if brandOrder != .orderedSame { - return brandOrder == .orderedAscending - } - - return lhs.displayName.localizedCaseInsensitiveCompare(rhs.displayName) == .orderedAscending - } + models.sort { $0.brandName.localizedCaseInsensitiveCompare($1.brandName) == .orderedAscending } case .accuracy: models.sort { $0.accuracyPercent > $1.accuracyPercent } case .speed: From cbaff4c668ae1f3c2272de7c15bc6bbd4833c80c Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sat, 28 Mar 2026 10:04:27 -0700 Subject: [PATCH 08/22] Fix onboarding categories for Cohere --- Sources/Fluid/UI/WelcomeView.swift | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Sources/Fluid/UI/WelcomeView.swift b/Sources/Fluid/UI/WelcomeView.swift index 2afb8583..59af6ea1 100644 --- a/Sources/Fluid/UI/WelcomeView.swift +++ b/Sources/Fluid/UI/WelcomeView.swift @@ -592,9 +592,9 @@ struct OnboardingFlowView: View { case .englishOnly: return "Uses Parakeet TDT v2" case .multipleLanguages: - return "Uses Parakeet TDT v3" + return "Uses Parakeet TDT v3 or Cohere" case .other: - return "Choose a different model manually" + return "Whisper and other manual choices" } } } @@ -710,7 +710,7 @@ struct OnboardingFlowView: View { } case .other: return filtered.filter { model in - model != .parakeetTDT && model != .parakeetTDTv2 + model != .parakeetTDT && model != .parakeetTDTv2 && model != .cohereTranscribeSixBit } } } @@ -1488,7 +1488,7 @@ struct OnboardingFlowView: View { switch self.settings.selectedSpeechModel { case .parakeetTDTv2: self.preferredLanguageChoice = .englishOnly - case .parakeetTDT: + case .parakeetTDT, .cohereTranscribeSixBit: self.preferredLanguageChoice = .multipleLanguages default: self.preferredLanguageChoice = .other From cd7c5c83a8ccae6e710e36712ad10f42533b6443 Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sat, 28 Mar 2026 10:07:18 -0700 Subject: [PATCH 09/22] Refine multilingual onboarding recommendations --- Sources/Fluid/UI/WelcomeView.swift | 156 +++++++++++++++++++++++++---- 1 file changed, 135 insertions(+), 21 deletions(-) diff --git a/Sources/Fluid/UI/WelcomeView.swift b/Sources/Fluid/UI/WelcomeView.swift index 59af6ea1..2a554ad4 100644 --- a/Sources/Fluid/UI/WelcomeView.swift +++ b/Sources/Fluid/UI/WelcomeView.swift @@ -583,7 +583,7 @@ struct OnboardingFlowView: View { case .multipleLanguages: return "Multiple languages" case .other: - return "Other" + return "More options" } } @@ -594,7 +594,7 @@ struct OnboardingFlowView: View { case .multipleLanguages: return "Uses Parakeet TDT v3 or Cohere" case .other: - return "Whisper and other manual choices" + return "Whisper and manual choices" } } } @@ -663,13 +663,20 @@ struct OnboardingFlowView: View { self.recommendedOnboardingModel.displayName } + private var recommendedOnboardingModels: [SettingsStore.SpeechModel] { + if CPUArchitecture.isAppleSilicon, self.preferredLanguageChoice == .multipleLanguages { + return [.parakeetTDT, .cohereTranscribeSixBit].filter { SettingsStore.SpeechModel.availableModels.contains($0) } + } + return [self.recommendedOnboardingModel] + } + private var recommendedModelReasonText: String { if CPUArchitecture.isAppleSilicon { switch self.preferredLanguageChoice { case .englishOnly: return "Best if you mainly speak English. Lower complexity and tuned for English dictation." case .multipleLanguages: - return "Best if you switch languages. Broader support with Parakeet TDT v3. Cohere is also available below if you want a higher-accuracy, larger model." + return "Best if you switch languages. Parakeet TDT v3 is the lighter default. Cohere is also recommended if you want the larger, higher-accuracy option." case .other: return "Choose a different model below if neither of the default language paths fits." } @@ -698,16 +705,7 @@ struct OnboardingFlowView: View { case .englishOnly: return [] case .multipleLanguages: - let multilingualOptions = filtered.filter { model in - model != .parakeetTDTv2 - } - let preferredOrder: [SettingsStore.SpeechModel] = [.cohereTranscribeSixBit, .whisperBase, .whisperSmall] - return multilingualOptions.sorted { lhs, rhs in - let lhsIndex = preferredOrder.firstIndex(of: lhs) ?? preferredOrder.count - let rhsIndex = preferredOrder.firstIndex(of: rhs) ?? preferredOrder.count - if lhsIndex != rhsIndex { return lhsIndex < rhsIndex } - return lhs.displayName.localizedCaseInsensitiveCompare(rhs.displayName) == .orderedAscending - } + return [] case .other: return filtered.filter { model in model != .parakeetTDT && model != .parakeetTDTv2 && model != .cohereTranscribeSixBit @@ -724,11 +722,14 @@ struct OnboardingFlowView: View { } private var shouldShowAlternativeModels: Bool { - !self.shouldShowLanguageChoice || self.preferredLanguageChoice != .englishOnly + if self.shouldShowLanguageChoice { + return self.preferredLanguageChoice == .other + } + return true } private var isRecommendedModelSelected: Bool { - self.isOnboardingModelSelected(self.recommendedOnboardingModel) + self.recommendedOnboardingModels.contains(self.settings.selectedSpeechModel) } private var isRecommendedModelDownloaded: Bool { @@ -909,7 +910,19 @@ struct OnboardingFlowView: View { Spacer() } - if self.showsMappedRecommendedModel { + if self.preferredLanguageChoice == .multipleLanguages && self.shouldShowLanguageChoice { + LazyVGrid( + columns: [ + GridItem(.flexible(), spacing: 10, alignment: .top), + GridItem(.flexible(), spacing: 10, alignment: .top), + ], + spacing: 10 + ) { + ForEach(self.recommendedOnboardingModels) { model in + self.onboardingRecommendedModelCard(for: model) + } + } + } else if self.showsMappedRecommendedModel { HStack(spacing: 10) { Label(self.recommendedOnboardingModel.downloadSize, systemImage: "internaldrive") .font(.caption) @@ -964,7 +977,11 @@ struct OnboardingFlowView: View { } HStack(spacing: 10) { - if self.isRecommendedModelReady { + if self.preferredLanguageChoice == .multipleLanguages && self.shouldShowLanguageChoice { + Text("Choose either FluidVoice-recommended multilingual model.") + .font(.caption.weight(.semibold)) + .foregroundStyle(.secondary) + } else if self.isRecommendedModelReady { Label( "Model downloaded and loaded", systemImage: "checkmark.seal.fill" @@ -986,7 +1003,9 @@ struct OnboardingFlowView: View { Spacer() - if self.preferredLanguageChoice == .other && self.shouldShowLanguageChoice { + if self.preferredLanguageChoice == .multipleLanguages && self.shouldShowLanguageChoice { + EmptyView() + } else if self.preferredLanguageChoice == .other && self.shouldShowLanguageChoice { Text("Choose a model from the options below.") .font(.caption.weight(.semibold)) .foregroundStyle(.secondary) @@ -1009,7 +1028,7 @@ struct OnboardingFlowView: View { if self.shouldShowAlternativeModels && !self.onboardingAlternativeModels.isEmpty { Divider().padding(.vertical, 2) - Text("Other popular options") + Text("More model options") .font(.subheadline.weight(.semibold)) .foregroundStyle(self.theme.palette.primaryText) @@ -1412,6 +1431,101 @@ struct OnboardingFlowView: View { .buttonStyle(.plain) } + private func onboardingRecommendedModelCard(for model: SettingsStore.SpeechModel) -> some View { + let isSelected = self.isOnboardingModelSelected(model) + let isDownloaded = self.isOnboardingModelDownloaded(model) + let isPreparing = self.isPreparingOnboardingModel(model) + let isReady = self.isOnboardingModelReady(model) + + return Button { + self.selectOnboardingModel(model, preserveManualChoice: true) + } label: { + VStack(alignment: .leading, spacing: 10) { + HStack(alignment: .top, spacing: 8) { + Text(model.displayName) + .font(.callout.weight(.semibold)) + .foregroundStyle(self.theme.palette.primaryText) + + Spacer(minLength: 8) + + Text("FV Recommended") + .font(.caption2.weight(.semibold)) + .padding(.horizontal, 6) + .padding(.vertical, 2) + .background(Capsule().fill(self.theme.palette.accent.opacity(0.18))) + .foregroundStyle(self.theme.palette.accent) + } + + Text(model.cardDescription) + .font(.caption) + .foregroundStyle(.secondary) + .lineLimit(3) + + HStack(spacing: 10) { + Label(model.downloadSize, systemImage: "internaldrive") + .font(.caption2) + .foregroundStyle(.secondary) + Text(model.languageSupport) + .font(.caption2) + .foregroundStyle(.secondary) + } + + Spacer(minLength: 0) + + if isPreparing { + if self.asr.isDownloadingModel, let progress = self.asr.downloadProgress { + ProgressView(value: progress) + .tint(self.theme.palette.accent) + Text(progress >= 0.82 ? "Finalizing..." : "Downloading \(Int(progress * 100))%") + .font(.caption2) + .foregroundStyle(.secondary) + } else { + Text("Loading model...") + .font(.caption2) + .foregroundStyle(.secondary) + } + } else if isReady { + Label("Downloaded and loaded", systemImage: "checkmark.circle.fill") + .font(.caption2.weight(.semibold)) + .foregroundStyle(Color.fluidGreen) + } else if isDownloaded { + Label(isSelected ? "Downloaded. Click Load to finish." : "Downloaded", systemImage: "arrow.triangle.2.circlepath") + .font(.caption2.weight(.semibold)) + .foregroundStyle(.secondary) + } else { + Label("Not downloaded yet", systemImage: "arrow.down.circle") + .font(.caption2.weight(.semibold)) + .foregroundStyle(.secondary) + } + + HStack { + Spacer() + Button(self.onboardingModelActionButtonTitle(for: model)) { + self.prepareOnboardingModel(model, preserveManualChoice: true) + } + .buttonStyle(.borderedProminent) + .controlSize(.small) + .tint(self.theme.palette.accent) + .disabled(self.asr.isRunning || isPreparing || isReady) + } + } + .frame(maxWidth: .infinity, minHeight: 190, alignment: .topLeading) + .padding(12) + .background( + RoundedRectangle(cornerRadius: 12, style: .continuous) + .fill(self.theme.palette.cardBackground.opacity(isSelected ? 0.82 : 0.55)) + .overlay( + RoundedRectangle(cornerRadius: 12, style: .continuous) + .stroke( + isSelected ? self.theme.palette.accent.opacity(0.45) : self.theme.palette.cardBorder.opacity(0.32), + lineWidth: 1 + ) + ) + ) + } + .buttonStyle(.plain) + } + private func selectOnboardingModel(_ model: SettingsStore.SpeechModel, preserveManualChoice: Bool = false) { if self.settings.selectedSpeechModel != model { self.settings.selectedSpeechModel = model @@ -1429,9 +1543,9 @@ struct OnboardingFlowView: View { case .englishOnly: return "English only uses \(self.recommendedOnboardingModelDisplayName)" case .multipleLanguages: - return "Multiple languages uses \(self.recommendedOnboardingModelDisplayName)" + return "FluidVoice recommends Parakeet TDT v3 and Cohere" case .other: - return "Other languages or workflows" + return "Whisper and more options" } } From 91a00000746aad9d1ef91ba06f99666a6b2a30e4 Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sat, 28 Mar 2026 10:13:42 -0700 Subject: [PATCH 10/22] Polish Cohere model presentation --- Sources/Fluid/Persistence/SettingsStore.swift | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index f39a5cf3..4512a0a3 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2388,7 +2388,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDT: return "~500 MB" case .parakeetTDTv2: return "~500 MB" case .qwen3Asr: return "~2.0 GB" - case .cohereTranscribeSixBit: return "~1.4 GB (manual)" + case .cohereTranscribeSixBit: return "~1.4 GB" case .appleSpeech: return "Built-in (Zero Download)" case .appleSpeechAnalyzer: return "Built-in" case .whisperTiny: return "~75 MB" @@ -2522,7 +2522,7 @@ final class SettingsStore: ObservableObject { case .qwen3Asr: return "Qwen3 multilingual ASR via FluidAudio. Higher quality, heavier memory footprint." case .cohereTranscribeSixBit: - return "External CoreML pipeline with strong accuracy. Load it from a local artifacts folder." + return "High-accuracy multilingual transcription. Supports English, French, German, Italian, Spanish, Portuguese, Greek, Dutch, Polish, Mandarin, Japanese, Korean, Vietnamese, and Arabic." case .appleSpeech: return "Built-in macOS speech recognition. No download required." case .appleSpeechAnalyzer: @@ -2573,8 +2573,6 @@ final class SettingsStore: ObservableObject { switch self { case .qwen3Asr: return "⚠️ Requires 8GB+ RAM. Best on newer Apple Silicon Macs." - case .cohereTranscribeSixBit: - return "⚠️ Requires local CoreML artifacts and 8GB+ RAM. Best on newer Apple Silicon Macs." case .whisperLarge: return "⚠️ Requires 10GB+ RAM. May crash on systems with limited memory." case .whisperLargeTurbo: @@ -2628,7 +2626,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDT: return 1.0 case .parakeetTDTv2: return 1.0 case .qwen3Asr: return 0.45 - case .cohereTranscribeSixBit: return 0.50 + case .cohereTranscribeSixBit: return 0.80 case .appleSpeech: return 0.60 case .appleSpeechAnalyzer: return 0.85 case .whisperTiny: return 0.90 @@ -2643,10 +2641,10 @@ final class SettingsStore: ObservableObject { /// Exact accuracy percentage (0.0 - 1.0) for the liquid bars var accuracyPercent: Double { switch self { - case .parakeetTDT: return 0.95 - case .parakeetTDTv2: return 0.98 + case .parakeetTDT: return 0.92 + case .parakeetTDTv2: return 0.96 case .qwen3Asr: return 0.90 - case .cohereTranscribeSixBit: return 0.96 + case .cohereTranscribeSixBit: return 0.97 case .appleSpeech: return 0.60 case .appleSpeechAnalyzer: return 0.80 case .whisperTiny: return 0.40 From d22534b65c2e05017886ae3251b7d180beb3af87 Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sat, 28 Mar 2026 20:16:03 -0700 Subject: [PATCH 11/22] Prepare Cohere beta release validation fixes --- .../xcshareddata/swiftpm/Package.resolved | 2 +- Info.plist | 2 +- .../ExternalCoreMLTranscriptionProvider.swift | 15 ++++++++++++++- .../Services/MeetingTranscriptionService.swift | 11 ++++++++++- 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index 89dec41d..1122d6f1 100644 --- a/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -34,7 +34,7 @@ "location" : "https://github.com/altic-dev/FluidAudio.git", "state" : { "branch" : "B/cohere-coreml-asr", - "revision" : "1502a6a8095bb1fc9831cc239d69a9a837d665a7" + "revision" : "e1c6ef7679bd11f4275cc6b8d582da1d42314a03" } }, { diff --git a/Info.plist b/Info.plist index 45b1c97b..ef083cab 100644 --- a/Info.plist +++ b/Info.plist @@ -15,7 +15,7 @@ CFBundleVersion 8 CFBundleShortVersionString - 1.5.10 + 1.5.11-beta.1 LSMinimumSystemVersion $(MACOSX_DEPLOYMENT_TARGET) LSApplicationCategoryType diff --git a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift index 8e7acf2b..4eb70164 100644 --- a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift +++ b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift @@ -160,12 +160,17 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { try FileManager.default.removeItem(at: compiledDirectory) } - if FileManager.default.fileExists(atPath: directory.path) { + if FileManager.default.fileExists(atPath: directory.path), Self.isAppManagedArtifactsDirectory(directory, spec: spec) { DebugLogger.shared.info( "ExternalCoreML: removing downloaded artifacts at \(directory.path)", source: "ExternalCoreML" ) try FileManager.default.removeItem(at: directory) + } else if FileManager.default.fileExists(atPath: directory.path) { + DebugLogger.shared.warning( + "ExternalCoreML: skipping deletion for non-managed artifacts directory at \(directory.path)", + source: "ExternalCoreML" + ) } self.isReady = false @@ -232,6 +237,14 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { SettingsStore.shared.externalCoreMLArtifactsDirectory(for: model) ?? spec.defaultCacheDirectory } + private static func isAppManagedArtifactsDirectory( + _ directory: URL, + spec: ExternalCoreMLASRModelSpec + ) -> Bool { + guard let defaultCacheDirectory = spec.defaultCacheDirectory else { return false } + return directory.standardizedFileURL.path == defaultCacheDirectory.standardizedFileURL.path + } + private static func makeError(_ description: String) -> NSError { NSError( domain: "ExternalCoreMLTranscriptionProvider", diff --git a/Sources/Fluid/Services/MeetingTranscriptionService.swift b/Sources/Fluid/Services/MeetingTranscriptionService.swift index 31900de8..e17c5ed3 100644 --- a/Sources/Fluid/Services/MeetingTranscriptionService.swift +++ b/Sources/Fluid/Services/MeetingTranscriptionService.swift @@ -181,7 +181,9 @@ final class MeetingTranscriptionService: ObservableObject { DebugLogger.shared.warning("Could not determine audio duration: \(error.localizedDescription)", source: "MeetingTranscriptionService") } - if provider.prefersNativeFileTranscription { + let isVideoContainer = ["mp4", "mov"].contains(fileExtension) + + if provider.prefersNativeFileTranscription && !isVideoContainer { self.currentStatus = duration > 0 ? "Transcribing audio (\(Int(duration))s)..." : "Transcribing audio..." self.progress = 0.3 @@ -218,6 +220,13 @@ final class MeetingTranscriptionService: ObservableObject { return result } + if provider.prefersNativeFileTranscription && isVideoContainer { + DebugLogger.shared.info( + "MeetingTranscriptionService: using buffered transcription path for video container [provider=\(provider.name), extension=\(fileExtension)]", + source: "MeetingTranscriptionService" + ) + } + // Transcribe using chunked processing for long files // This reads audio in ~20 minute segments to avoid memory overflow on 3+ hour files let chunkDurationSeconds: Double = 20 * 60 // 20 minutes per chunk (well under 24min model limit) From c1f1190396a14cc9b62c666d9b4a2acef5af009b Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sun, 29 Mar 2026 12:56:00 -0700 Subject: [PATCH 12/22] added languages to onboarding --- Sources/Fluid/Persistence/SettingsStore.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index 4512a0a3..fc77fcba 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2372,7 +2372,7 @@ final class SettingsStore: ObservableObject { var languageSupport: String { switch self { case .parakeetTDT: - return "25 European Languages" + return "25 Languages" case .parakeetTDTv2: return "English Only (Higher Accuracy)" case .qwen3Asr: return "30 Languages" case .cohereTranscribeSixBit: return "14 Languages" @@ -2516,7 +2516,7 @@ final class SettingsStore: ObservableObject { var cardDescription: String { switch self { case .parakeetTDT: - return "Fast multilingual transcription with 25 languages. Best for everyday use." + return "Fast multilingual transcription. Supports Bulgarian, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Hungarian, Italian, Latvian, Lithuanian, Maltese, Polish, Portuguese, Romanian, Russian, Slovak, Slovenian, Spanish, Swedish, and Ukrainian." case .parakeetTDTv2: return "Optimized for English accuracy and fastest transcription." case .qwen3Asr: From c7c5ef9c8b347e6ec4ccb88b31e1a834168266b3 Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sun, 29 Mar 2026 13:02:29 -0700 Subject: [PATCH 13/22] Add Cohere speech model logo --- .../Provider_Cohere.imageset/Contents.json | 7 +++++++ .../Provider_Cohere.imageset/logo.png | Bin 0 -> 1472 bytes .../Provider_Cohere.imageset/logo@2x.png | Bin 0 -> 2602 bytes .../UI/AISettingsView+SpeechRecognition.swift | 3 +++ 4 files changed, 10 insertions(+) create mode 100644 Sources/Fluid/Assets.xcassets/Provider_Cohere.imageset/Contents.json create mode 100644 Sources/Fluid/Assets.xcassets/Provider_Cohere.imageset/logo.png create mode 100644 Sources/Fluid/Assets.xcassets/Provider_Cohere.imageset/logo@2x.png diff --git a/Sources/Fluid/Assets.xcassets/Provider_Cohere.imageset/Contents.json b/Sources/Fluid/Assets.xcassets/Provider_Cohere.imageset/Contents.json new file mode 100644 index 00000000..1c4a4c4b --- /dev/null +++ b/Sources/Fluid/Assets.xcassets/Provider_Cohere.imageset/Contents.json @@ -0,0 +1,7 @@ +{ + "images" : [ + { "filename" : "logo.png", "idiom" : "universal", "scale" : "1x" }, + { "filename" : "logo@2x.png", "idiom" : "universal", "scale" : "2x" } + ], + "info" : { "author" : "xcode", "version" : 1 } +} diff --git a/Sources/Fluid/Assets.xcassets/Provider_Cohere.imageset/logo.png b/Sources/Fluid/Assets.xcassets/Provider_Cohere.imageset/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..3c80d3bac3a52de95557cadd1cb6da70091c3a5e GIT binary patch literal 1472 zcmeAS@N?(olHy`uVBq!ia0vp^4j|0I1|(Ny7TyC=3dtTpz6=aiY77hwEes65fI|H(?D8gCb5n0T@pr;JNj1^1m z%YcIHC7!;n?5~(*g{=jz?b>Pz6jCj5jVMV;EJ?LWE=mPb3`PbQ zmR5!a+6D$z1_u3n9$qLKa`RI%(<*Um_!s=)8&HD=+=i0O+|=Td#M}Zbddwk~optzq)WoQc+2Cf$U-S$8tW01E!Q7HdwxaJNrXsw&Etig%Rl$Dz|T`dDAPtQRKRt>AVkCciU9#+;o406 zI$~VA6|%Yv1IT=Y=lC*m{@cmMxQ8#TJA&_T(RPKcuIzf}xjHD1q7T%(lYq z+!wU|{INXWwtR!!`I9;So7xWD=&ZUc{J#7Me+75`;|e|5#Ucsf`;J}VIDVu3`vxoJ zL;F)^^SWss@Z8fG9eAtveD0xlJrAWnB&YM7Z)eyLd1DrLsqI(0KK+2nKC0V$Vmxp3kY<65RgMSEI~KdCcW*0T0*q1D`f3CA0At;NpQ z>@2+b<*4fK{;S)5OsmTEn-kM=qMe~-!2C8EwFsa5a$r%5BUYlR}!`UBye@fQ8 zUiO0SH!_*c0X31DletS&oa|U$be^1{`h?y0N0`z^%Z-c6%1d2WzL%OMn&@)0;A-;o zT?=b2=p4-Nd3)r-@&H|n2ZuN=ocdu`;aJe{GRLObPgKd^6%E(7*o(^w)SV|gB#PzE>_HFFx-&;_F)sJfnUT=?zfwQ z76!#cR#n+6X03VEwsHND4LMT4h&1_fcF|pqJ28Q~Igh=b_drMS^b2Dz#!b47(-R8L zugw<-Tethi?h8y$9wl`9&f-kpo%yCpX367V1)!R}b3Gq^-_`j2uw3kJ?VLpaXUm-z zi6rcPv+ZI2jkzA`4~26t-SWGjppsD9D)zuQW%BHG$L>vPQ*O!Kt(EMO?!j4j^!LrO zj$LvaJUH6&@}K|rTyu1_$I@=EsDJ1Q&LVa+v#xSYs352=!fow z=4>3ROFT3(u5c_|eb0N^jqTqAos>3vtxyY1UiA6C{*pqS%iG*4-cH)-V~~Gx!i1kw zQ`KsnK3w_ln{w3ZkA9K1P*Uf#=y})pj3)l9bBp*}Jmf(4xfaKo}78Aq{YfP^I)IwcOc>n-OOTbeAVB>dv0RZ9*0bmgg0El}4pg<^W z_O=of#IWv2M_~8&D|t!3BVZ&5E@xr@z(C`-3ni4>#0yCASXU1x@kL>keU>s~&Cg5) zlxD0`Kx`N$_8cPgZ|4L67#kX!9WgXHVr=1SWPvcXKo}VwHZ(*S8s6IL;`T2B96l@} zJn=sZc6_+M1OlD^?+_b-I~Nm+!4dwA=J@|Lxj4H*2LSN8tD~K7!sPPDq#D#|xt`U5 zWn6vvk#buRH}5u@m#Q^L#0jP^<9Mv%o>~e2rgTV})QLT`2M?TARlLg-%euuu`_)Cf zq4o{3p?Z3LdQypC7hEa)<)0ruB=y*gy`P_sfbxjp{e_t1JFfXL^v$K@F}zLxr>I(W z*X%?8wUG?H;zXOxOhMMP zeVkEyEj$z>_mUq})gu80Te z5U^`&CotffdWl6o{z6NXVWa1ZwAj3FwDH#;D*pr^4l^{LzU$T$VAcMB!Sti~;WIW;J8=@oLfW~S6k_TqH0peSrMICajAOl5r-a>d2a3Wrc zzZ%)ARh(g`=nidey(cVKSqPRO!t)PGGa$B@a|4H(y&6N%DxwXlJinl{y~EPZSHx3B z1(To;A|RhJ(}8iCh&Konr;W)>RF57zMZ6M+gw~bW__SeilJ;<(xBD3N^7couN$&S!kR+J8}xhNJDF* z$UIKpBecfOp^OyMAAYQNb?+9O7Oh03iR&Sxdos~n=Y{E@BW0O~uD(=YleQL$5iTM{NIt?wv5btqAt-zaL^DRNN@8_$P?JHjb zfiQ|8xueT^<843y8#hMvHONhG+P2OxJGS(SlVZKLc(QSi4KlcR*E5~VF!SR%Eumf4x{~x|BO*LIdJA|320XU?R>>K97PVwW#4AkP z?yd==K)U_n5?s~|Co}b=<26evsuv#LzWVJfbBgr=>+-NPQ8-xpEB~=VaR(o6UH@Ig zrxCp23}nWfEFR?59sIhf1OOIZBF{$@4)f@!Q2cOP~}gb)QV|IUJYhC@$84c&N{$z zKiBsfFdH>&3ue#L&_gENKqAU&glVn!>*MjXrY$!H?G=7WeaTh?_s6aicAOaDG_XuE zTCAZny-RZTs^hw6dkqe5gOew#5sK&ie{BzxsX7>-LkCgweSC zGVaD>Xl)TNnej|IM8@HZqyMLCY)lGtWz5PkQo*j2XQNy!H{o0zR;MTwfAq=Vq+mbX zhnr4E_+0E!GC5W_LZSni(EhGG*QoBc}lqLSyP`#hYk&)0bIG{$?pQ zs>%PSZvVbA|A-Mc;5ueQn(1M2!va{5V)y`R*3jPtC2koyfio zO31G5ma3J4#tjg&9PGv<&u96m;a%4z&QP4_ki4kxWnOc2D&YR6@?CP9L=$?nbActr;>&J zsDEFsO578o;Hh3t5*hUdC_X*!vfoYW9XLAv)BvW+ZJ?T08EX$!zESZatF{78w_=x`%gMTgp=__Fg2h(#Rymj+(ngrfB`Rhed;LrL$$1r&v>nH4M_-2c^h%zG zY?9QtM90j74sW|!vC$keo7SMt+dqi#L|@K$rx+!QkAsI_vDm3-$U7!8&TU-d<=KO4 zlT@&gO7D?fS#yz$j!QmIqC+jSi}p&_J_7=uN?>vnMQLZckX_XyFD_f%$&hF>vsasb z`t{zVQnG{NAzWli_ktfaAp}fLLy8%hc0BjVL3&!AK9uJ2KO7Se_ii1G-IiQ<6J#(a QIGO-gCr`&ZdvxkQ0r`QX?EnA( literal 0 HcmV?d00001 diff --git a/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift b/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift index 5cf8952f..23d34d44 100644 --- a/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift +++ b/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift @@ -726,6 +726,9 @@ extension VoiceEngineSettingsView { if brand.contains("nvidia") { return "Provider_NVIDIA" } + if brand.contains("cohere") { + return "Provider_Cohere" + } if brand.contains("openai") || brand.contains("whisper") { return "Provider_OpenAI" } From eb0ebd06695dc2626dca4c1c7f7e0c5141372207 Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sun, 29 Mar 2026 13:04:49 -0700 Subject: [PATCH 14/22] Adjust Cohere speed and accuracy display --- Sources/Fluid/Persistence/SettingsStore.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index fc77fcba..d06f21b0 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2626,7 +2626,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDT: return 1.0 case .parakeetTDTv2: return 1.0 case .qwen3Asr: return 0.45 - case .cohereTranscribeSixBit: return 0.80 + case .cohereTranscribeSixBit: return 0.85 case .appleSpeech: return 0.60 case .appleSpeechAnalyzer: return 0.85 case .whisperTiny: return 0.90 @@ -2644,7 +2644,7 @@ final class SettingsStore: ObservableObject { case .parakeetTDT: return 0.92 case .parakeetTDTv2: return 0.96 case .qwen3Asr: return 0.90 - case .cohereTranscribeSixBit: return 0.97 + case .cohereTranscribeSixBit: return 0.98 case .appleSpeech: return 0.60 case .appleSpeechAnalyzer: return 0.80 case .whisperTiny: return 0.40 From b72b6d3bda42cb5f5ec19d1b4e1cb0e97151418d Mon Sep 17 00:00:00 2001 From: altic-dev Date: Sun, 29 Mar 2026 13:31:06 -0700 Subject: [PATCH 15/22] Pin FluidAudio fix for archive builds --- .../project.xcworkspace/xcshareddata/swiftpm/Package.resolved | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index 1122d6f1..f13f7b49 100644 --- a/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -34,7 +34,7 @@ "location" : "https://github.com/altic-dev/FluidAudio.git", "state" : { "branch" : "B/cohere-coreml-asr", - "revision" : "e1c6ef7679bd11f4275cc6b8d582da1d42314a03" + "revision" : "cbbd7757f23fa271c5ad655a882dcaf3b906dbcd" } }, { From b5e861d6a689acf57b6a3f5d08f6445a7d6c9ed8 Mon Sep 17 00:00:00 2001 From: altic-dev Date: Mon, 30 Mar 2026 02:12:04 -0700 Subject: [PATCH 16/22] Wire split Cohere runtime into app --- .../xcshareddata/swiftpm/Package.resolved | 2 +- .../Fluid/Services/ExternalCoreMLModelRegistry.swift | 9 +++++++-- .../Services/ExternalCoreMLTranscriptionProvider.swift | 10 ++++++++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index f13f7b49..89dec41d 100644 --- a/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -34,7 +34,7 @@ "location" : "https://github.com/altic-dev/FluidAudio.git", "state" : { "branch" : "B/cohere-coreml-asr", - "revision" : "cbbd7757f23fa271c5ad655a882dcaf3b906dbcd" + "revision" : "1502a6a8095bb1fc9831cc239d69a9a837d665a7" } }, { diff --git a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift index 38e38d3f..22aa0476 100644 --- a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift +++ b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift @@ -1,5 +1,6 @@ import CoreML import Foundation +import FluidAudio enum ExternalCoreMLASRBackend { case cohereTranscribe @@ -44,11 +45,12 @@ struct ExternalCoreMLASRModelSpec { let manifestFileName: String let frontendFileName: String let encoderFileName: String + let crossKVProjectorFileName: String? let decoderFileName: String let cachedDecoderFileName: String let expectedModelID: String let expectedSampleRate: Int - let computeUnits: MLComputeUnits + let computeConfiguration: CohereTranscribeComputeConfiguration let sourceURL: URL? let repositoryOwner: String? let repositoryName: String? @@ -59,9 +61,11 @@ struct ExternalCoreMLASRModelSpec { self.manifestFileName, self.frontendFileName, self.encoderFileName, + self.crossKVProjectorFileName, self.decoderFileName, self.cachedDecoderFileName, ] + .compactMap { $0 } } func url(for entry: String, in directory: URL) -> URL { @@ -129,11 +133,12 @@ enum ExternalCoreMLModelRegistry { manifestFileName: "coreml_manifest.json", frontendFileName: "cohere_frontend.mlpackage", encoderFileName: "cohere_encoder.mlpackage", + crossKVProjectorFileName: "cohere_cross_kv_projector.mlpackage", decoderFileName: "cohere_decoder_fullseq_masked.mlpackage", cachedDecoderFileName: "cohere_decoder_cached.mlpackage", expectedModelID: "CohereLabs/cohere-transcribe-03-2026", expectedSampleRate: 16000, - computeUnits: .cpuAndGPU, + computeConfiguration: .aneSmall, sourceURL: URL(string: "https://huggingface.co/BarathwajAnandan/cohere-transcribe-03-2026-CoreML-6bit"), repositoryOwner: "BarathwajAnandan", repositoryName: "cohere-transcribe-03-2026-CoreML-6bit", diff --git a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift index 4eb70164..79c6e65b 100644 --- a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift +++ b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift @@ -54,11 +54,17 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { case .cohereTranscribe: let manager = CohereTranscribeAsrManager() progressHandler?(0.9) + let computeSummary = [ + String(describing: spec.computeConfiguration.frontend), + String(describing: spec.computeConfiguration.encoder), + String(describing: spec.computeConfiguration.crossKV), + String(describing: spec.computeConfiguration.decoder), + ].joined(separator: "/") DebugLogger.shared.info( - "ExternalCoreML: loading Cohere models [computeUnits=\(String(describing: spec.computeUnits))]", + "ExternalCoreML: loading Cohere models [splitCompute=\(computeSummary)]", source: "ExternalCoreML" ) - try await manager.loadModels(from: directory, computeUnits: spec.computeUnits) + try await manager.loadModels(from: directory, computeConfiguration: spec.computeConfiguration) self.cohereManager = manager } From 27f77eb8ec863f1cacfcf6fe4537de1f0b83f4bf Mon Sep 17 00:00:00 2001 From: altic-dev Date: Mon, 30 Mar 2026 02:16:00 -0700 Subject: [PATCH 17/22] Allow legacy Cohere artifact downloads --- Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift index 22aa0476..51ae09bb 100644 --- a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift +++ b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift @@ -61,10 +61,15 @@ struct ExternalCoreMLASRModelSpec { self.manifestFileName, self.frontendFileName, self.encoderFileName, - self.crossKVProjectorFileName, self.decoderFileName, self.cachedDecoderFileName, ] + } + + var optionalEntries: [String] { + [ + self.crossKVProjectorFileName, + ] .compactMap { $0 } } From 8016c6dabb70a936d0d90bdb4b5dfd65c86e255b Mon Sep 17 00:00:00 2001 From: altic-dev Date: Mon, 30 Mar 2026 02:17:30 -0700 Subject: [PATCH 18/22] Require cross-KV Cohere artifact --- Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift index 51ae09bb..22aa0476 100644 --- a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift +++ b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift @@ -61,15 +61,10 @@ struct ExternalCoreMLASRModelSpec { self.manifestFileName, self.frontendFileName, self.encoderFileName, + self.crossKVProjectorFileName, self.decoderFileName, self.cachedDecoderFileName, ] - } - - var optionalEntries: [String] { - [ - self.crossKVProjectorFileName, - ] .compactMap { $0 } } From f1de97bf0cd1cf30344d822c0d7fda74579486f2 Mon Sep 17 00:00:00 2001 From: altic-dev Date: Mon, 30 Mar 2026 02:31:12 -0700 Subject: [PATCH 19/22] Speed up Cohere live preview --- Sources/Fluid/Persistence/SettingsStore.swift | 4 +-- .../ExternalCoreMLTranscriptionProvider.swift | 25 +++++++++++++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index d06f21b0..9eb2f3cd 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2694,7 +2694,7 @@ final class SettingsStore: ObservableObject { var streamingPreviewIntervalSeconds: Double { switch self { case .cohereTranscribeSixBit: - return 2.0 + return 1.0 default: return 0.6 } @@ -2705,7 +2705,7 @@ final class SettingsStore: ObservableObject { var minimumStreamingPreviewSeconds: Double { switch self { case .cohereTranscribeSixBit: - return 2.0 + return 1.5 default: return 1.0 } diff --git a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift index 79c6e65b..daea41f2 100644 --- a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift +++ b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift @@ -10,6 +10,7 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { var isAvailable: Bool { true } private(set) var isReady: Bool = false var prefersNativeFileTranscription: Bool { true } + private let streamingPreviewMaxSeconds: Double = 12 private var cohereManager: CohereTranscribeAsrManager? private let modelOverride: SettingsStore.SpeechModel? @@ -81,11 +82,21 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { } func transcribeStreaming(_ samples: [Float]) async throws -> ASRTranscriptionResult { + let previewSamples = self.previewSamples(for: samples) DebugLogger.shared.debug( - "ExternalCoreML: streaming preview request [samples=\(samples.count)]", + "ExternalCoreML: streaming preview request [samples=\(samples.count), previewSamples=\(previewSamples.count)]", source: "ExternalCoreML" ) - return try await self.transcribeFinal(samples) + guard let manager = self.cohereManager else { + DebugLogger.shared.error( + "ExternalCoreML: streaming preview requested before manager initialization", + source: "ExternalCoreML" + ) + throw Self.makeError("External CoreML model is not initialized.") + } + + let text = try await manager.transcribe(audioSamples: previewSamples) + return ASRTranscriptionResult(text: text, confidence: 1.0) } func transcribeFile(at fileURL: URL) async throws -> ASRTranscriptionResult { @@ -258,6 +269,16 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { userInfo: [NSLocalizedDescriptionKey: description] ) } + + private func previewSamples(for samples: [Float]) -> [Float] { + let model = self.modelOverride ?? SettingsStore.shared.selectedSpeechModel + guard model == .cohereTranscribeSixBit else { return samples } + + let sampleRate = model.externalCoreMLSpec?.expectedSampleRate ?? 16_000 + let maxPreviewSamples = Int(Double(sampleRate) * self.streamingPreviewMaxSeconds) + guard samples.count > maxPreviewSamples else { return samples } + return Array(samples.suffix(maxPreviewSamples)) + } } #else From f3577c2df7286bf55634a441ac8a32ff04731034 Mon Sep 17 00:00:00 2001 From: altic-dev Date: Tue, 31 Mar 2026 22:28:28 -0700 Subject: [PATCH 20/22] Fix strict lint issues --- Sources/Fluid/Networking/ModelDownloader.swift | 16 +++++++++------- Sources/Fluid/Persistence/SettingsStore.swift | 5 ++++- .../Services/ExternalCoreMLModelRegistry.swift | 14 +++++++------- .../UI/AISettingsView+SpeechRecognition.swift | 3 +-- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/Sources/Fluid/Networking/ModelDownloader.swift b/Sources/Fluid/Networking/ModelDownloader.swift index a1f3a69c..e7b067a9 100644 --- a/Sources/Fluid/Networking/ModelDownloader.swift +++ b/Sources/Fluid/Networking/ModelDownloader.swift @@ -63,16 +63,18 @@ final class HuggingFaceModelDownloader { /// - owner: Hugging Face username or organization /// - repo: Repository name containing the models /// - revision: Branch or commit hash (default: "main") - init(owner: String, repo: String, revision: String = "main", requiredItems: [ModelItem]? = nil) { + init(owner: String, repo: String, revision: String = "main", requiredItems: [ModelItem] = []) { self.owner = owner self.repo = repo self.revision = revision - self.requiredItemsList = requiredItems ?? [ - ModelItem(path: "MelEncoder.mlmodelc", isDirectory: true), - ModelItem(path: "Decoder.mlmodelc", isDirectory: true), - ModelItem(path: "JointDecision.mlmodelc", isDirectory: true), - ModelItem(path: "parakeet_v3_vocab.json", isDirectory: false), - ] + self.requiredItemsList = requiredItems.isEmpty + ? [ + ModelItem(path: "MelEncoder.mlmodelc", isDirectory: true), + ModelItem(path: "Decoder.mlmodelc", isDirectory: true), + ModelItem(path: "JointDecision.mlmodelc", isDirectory: true), + ModelItem(path: "parakeet_v3_vocab.json", isDirectory: false), + ] + : requiredItems guard var apiBase = URL(string: "https://huggingface.co/api/models/") else { preconditionFailure("Invalid base Hugging Face API URL") } diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index 9eb2f3cd..dacbcd46 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2516,7 +2516,10 @@ final class SettingsStore: ObservableObject { var cardDescription: String { switch self { case .parakeetTDT: - return "Fast multilingual transcription. Supports Bulgarian, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Hungarian, Italian, Latvian, Lithuanian, Maltese, Polish, Portuguese, Romanian, Russian, Slovak, Slovenian, Spanish, Swedish, and Ukrainian." + return "Fast multilingual transcription. Supports Bulgarian, Croatian, Czech, Danish, " + + "Dutch, English, Estonian, Finnish, French, German, Greek, Hungarian, Italian, " + + "Latvian, Lithuanian, Maltese, Polish, Portuguese, Romanian, Russian, Slovak, " + + "Slovenian, Spanish, Swedish, and Ukrainian." case .parakeetTDTv2: return "Optimized for English accuracy and fastest transcription." case .qwen3Asr: diff --git a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift index 22aa0476..7ee373ba 100644 --- a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift +++ b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift @@ -1,6 +1,6 @@ import CoreML -import Foundation import FluidAudio +import Foundation enum ExternalCoreMLASRBackend { case cohereTranscribe @@ -25,15 +25,15 @@ enum ExternalCoreMLArtifactsValidationError: LocalizedError { var errorDescription: String? { switch self { - case .missingEntries(let entries): + case let .missingEntries(entries): return "Missing required files: \(entries.joined(separator: ", "))" - case .manifestMissing(let url): + case let .manifestMissing(url): return "Manifest file not found at \(url.path)" - case .manifestUnreadable(let url, let error): + case let .manifestUnreadable(url, error): return "Failed to read manifest at \(url.path): \(error.localizedDescription)" - case .unexpectedModelID(let expected, let actual): + case let .unexpectedModelID(expected, actual): return "Unexpected model_id '\(actual)'. Expected '\(expected)'." - case .unexpectedSampleRate(let expected, let actual): + case let .unexpectedSampleRate(expected, actual): return "Unexpected sample rate \(actual). Expected \(expected)." } } @@ -137,7 +137,7 @@ enum ExternalCoreMLModelRegistry { decoderFileName: "cohere_decoder_fullseq_masked.mlpackage", cachedDecoderFileName: "cohere_decoder_cached.mlpackage", expectedModelID: "CohereLabs/cohere-transcribe-03-2026", - expectedSampleRate: 16000, + expectedSampleRate: 16_000, computeConfiguration: .aneSmall, sourceURL: URL(string: "https://huggingface.co/BarathwajAnandan/cohere-transcribe-03-2026-CoreML-6bit"), repositoryOwner: "BarathwajAnandan", diff --git a/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift b/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift index 23d34d44..60045113 100644 --- a/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift +++ b/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift @@ -309,7 +309,6 @@ extension VoiceEngineSettingsView { Spacer() } } - } .padding(.horizontal, 10) .padding(.vertical, 8) @@ -503,7 +502,7 @@ extension VoiceEngineSettingsView { ZStack(alignment: .trailing) { if model.requiresExternalArtifacts { HStack(spacing: 8) { - if let _ = model.externalCoreMLSpec?.sourceURL { + if model.externalCoreMLSpec?.sourceURL != nil { Button { self.viewModel.openExternalModelSource(for: model) } label: { From 690cbdc52685e6b966715402c109cf8cfd740708 Mon Sep 17 00:00:00 2001 From: altic-dev Date: Tue, 31 Mar 2026 23:20:54 -0700 Subject: [PATCH 21/22] Fix external CoreML input contract handling --- .../ExternalCoreMLModelRegistry.swift | 49 +++++++++++--- .../ExternalCoreMLTranscriptionProvider.swift | 67 +++++++++++++++++-- 2 files changed, 101 insertions(+), 15 deletions(-) diff --git a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift index 7ee373ba..0e99c0a7 100644 --- a/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift +++ b/Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift @@ -9,10 +9,16 @@ enum ExternalCoreMLASRBackend { struct ExternalCoreMLManifestIdentity: Decodable { let modelID: String let sampleRate: Int + let maxAudioSamples: Int + let maxAudioSeconds: Double + let overlapSamples: Int? private enum CodingKeys: String, CodingKey { case modelID = "model_id" case sampleRate = "sample_rate" + case maxAudioSamples = "max_audio_samples" + case maxAudioSeconds = "max_audio_seconds" + case overlapSamples = "overlap_samples" } } @@ -22,6 +28,8 @@ enum ExternalCoreMLArtifactsValidationError: LocalizedError { case manifestUnreadable(URL, Error) case unexpectedModelID(expected: String, actual: String) case unexpectedSampleRate(expected: Int, actual: Int) + case unexpectedMaxAudioSamples(expected: Int, actual: Int) + case unexpectedMaxAudioSeconds(expected: Double, actual: Double) var errorDescription: String? { switch self { @@ -35,6 +43,10 @@ enum ExternalCoreMLArtifactsValidationError: LocalizedError { return "Unexpected model_id '\(actual)'. Expected '\(expected)'." case let .unexpectedSampleRate(expected, actual): return "Unexpected sample rate \(actual). Expected \(expected)." + case let .unexpectedMaxAudioSamples(expected, actual): + return "Unexpected max audio samples \(actual). Expected \(expected)." + case let .unexpectedMaxAudioSeconds(expected, actual): + return "Unexpected max audio seconds \(actual). Expected \(expected)." } } } @@ -50,6 +62,8 @@ struct ExternalCoreMLASRModelSpec { let cachedDecoderFileName: String let expectedModelID: String let expectedSampleRate: Int + let expectedMaxAudioSamples: Int + let expectedMaxAudioSeconds: Double let computeConfiguration: CohereTranscribeComputeConfiguration let sourceURL: URL? let repositoryOwner: String? @@ -88,24 +102,27 @@ struct ExternalCoreMLASRModelSpec { } } - func validateArtifactsOrThrow(at directory: URL) throws { - let missingEntries = self.missingEntries(at: directory) - guard missingEntries.isEmpty else { - throw ExternalCoreMLArtifactsValidationError.missingEntries(missingEntries) - } - + func loadManifest(at directory: URL) throws -> ExternalCoreMLManifestIdentity { let manifestURL = self.url(for: self.manifestFileName, in: directory) guard FileManager.default.fileExists(atPath: manifestURL.path) else { throw ExternalCoreMLArtifactsValidationError.manifestMissing(manifestURL) } - let manifest: ExternalCoreMLManifestIdentity do { let data = try Data(contentsOf: manifestURL) - manifest = try JSONDecoder().decode(ExternalCoreMLManifestIdentity.self, from: data) + return try JSONDecoder().decode(ExternalCoreMLManifestIdentity.self, from: data) } catch { throw ExternalCoreMLArtifactsValidationError.manifestUnreadable(manifestURL, error) } + } + + func validateArtifactsOrThrow(at directory: URL) throws { + let missingEntries = self.missingEntries(at: directory) + guard missingEntries.isEmpty else { + throw ExternalCoreMLArtifactsValidationError.missingEntries(missingEntries) + } + + let manifest = try self.loadManifest(at: directory) guard manifest.modelID == self.expectedModelID else { throw ExternalCoreMLArtifactsValidationError.unexpectedModelID( @@ -120,6 +137,20 @@ struct ExternalCoreMLASRModelSpec { actual: manifest.sampleRate ) } + + guard manifest.maxAudioSamples == self.expectedMaxAudioSamples else { + throw ExternalCoreMLArtifactsValidationError.unexpectedMaxAudioSamples( + expected: self.expectedMaxAudioSamples, + actual: manifest.maxAudioSamples + ) + } + + guard manifest.maxAudioSeconds == self.expectedMaxAudioSeconds else { + throw ExternalCoreMLArtifactsValidationError.unexpectedMaxAudioSeconds( + expected: self.expectedMaxAudioSeconds, + actual: manifest.maxAudioSeconds + ) + } } } @@ -138,6 +169,8 @@ enum ExternalCoreMLModelRegistry { cachedDecoderFileName: "cohere_decoder_cached.mlpackage", expectedModelID: "CohereLabs/cohere-transcribe-03-2026", expectedSampleRate: 16_000, + expectedMaxAudioSamples: 560_000, + expectedMaxAudioSeconds: 35.0, computeConfiguration: .aneSmall, sourceURL: URL(string: "https://huggingface.co/BarathwajAnandan/cohere-transcribe-03-2026-CoreML-6bit"), repositoryOwner: "BarathwajAnandan", diff --git a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift index daea41f2..115bda27 100644 --- a/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift +++ b/Sources/Fluid/Services/ExternalCoreMLTranscriptionProvider.swift @@ -14,6 +14,7 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { private var cohereManager: CohereTranscribeAsrManager? private let modelOverride: SettingsStore.SpeechModel? + private var loadedManifest: ExternalCoreMLManifestIdentity? init(modelOverride: SettingsStore.SpeechModel? = nil) { self.modelOverride = modelOverride @@ -51,6 +52,8 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { progressHandler?(0.85) + self.loadedManifest = try spec.loadManifest(at: directory) + switch spec.backend { case .cohereTranscribe: let manager = CohereTranscribeAsrManager() @@ -62,7 +65,7 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { String(describing: spec.computeConfiguration.decoder), ].joined(separator: "/") DebugLogger.shared.info( - "ExternalCoreML: loading Cohere models [splitCompute=\(computeSummary)]", + "ExternalCoreML: loading Cohere models [splitCompute=\(computeSummary), maxAudioSamples=\(self.loadedManifest?.maxAudioSamples ?? spec.expectedMaxAudioSamples)]", source: "ExternalCoreML" ) try await manager.loadModels(from: directory, computeConfiguration: spec.computeConfiguration) @@ -95,7 +98,7 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { throw Self.makeError("External CoreML model is not initialized.") } - let text = try await manager.transcribe(audioSamples: previewSamples) + let text = try await manager.transcribe(audioSamples: self.paddedSamplesToModelLimit(previewSamples)) return ASRTranscriptionResult(text: text, confidence: 1.0) } @@ -137,7 +140,7 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { "ExternalCoreML: transcribing \(samples.count) samples [audioSeconds=\(String(format: "%.2f", audioSeconds))]", source: "ExternalCoreML" ) - let text = try await manager.transcribe(audioSamples: samples) + let text = try await self.transcribeByManifestWindow(samples, manager: manager) let elapsed = Date().timeIntervalSince(startedAt) let rtf = audioSeconds > 0 ? elapsed / audioSeconds : 0 DebugLogger.shared.info( @@ -192,6 +195,7 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { self.isReady = false self.cohereManager = nil + self.loadedManifest = nil DebugLogger.shared.info( "ExternalCoreML: provider reset after cache clear", source: "ExternalCoreML" @@ -271,14 +275,63 @@ final class ExternalCoreMLTranscriptionProvider: TranscriptionProvider { } private func previewSamples(for samples: [Float]) -> [Float] { - let model = self.modelOverride ?? SettingsStore.shared.selectedSpeechModel - guard model == .cohereTranscribeSixBit else { return samples } - - let sampleRate = model.externalCoreMLSpec?.expectedSampleRate ?? 16_000 + let sampleRate = self.loadedManifest?.sampleRate + ?? (self.modelOverride ?? SettingsStore.shared.selectedSpeechModel).externalCoreMLSpec?.expectedSampleRate + ?? 16_000 let maxPreviewSamples = Int(Double(sampleRate) * self.streamingPreviewMaxSeconds) guard samples.count > maxPreviewSamples else { return samples } return Array(samples.suffix(maxPreviewSamples)) } + + private func transcribeByManifestWindow( + _ samples: [Float], + manager: CohereTranscribeAsrManager + ) async throws -> String { + let maxAudioSamples = self.loadedManifest?.maxAudioSamples ?? 0 + guard maxAudioSamples > 0 else { + return try await manager.transcribe(audioSamples: samples) + } + + if samples.count <= maxAudioSamples { + return try await manager.transcribe(audioSamples: self.paddedSamplesToModelLimit(samples)) + } + + let overlapSamples = min(self.loadedManifest?.overlapSamples ?? 0, maxAudioSamples / 2) + let step = max(maxAudioSamples - overlapSamples, 1) + var chunkTexts: [String] = [] + var startIndex = 0 + + while startIndex < samples.count { + let endIndex = min(startIndex + maxAudioSamples, samples.count) + let chunk = Array(samples[startIndex..= samples.count { + break + } + startIndex += step + } + + return chunkTexts.joined(separator: " ") + } + + private func paddedSamplesToModelLimit(_ samples: [Float]) -> [Float] { + let maxAudioSamples = self.loadedManifest?.maxAudioSamples ?? samples.count + guard maxAudioSamples > 0 else { return samples } + + if samples.count == maxAudioSamples { + return samples + } + + if samples.count > maxAudioSamples { + return Array(samples.suffix(maxAudioSamples)) + } + + return samples + Array(repeating: 0, count: maxAudioSamples - samples.count) + } } #else From e104859624e53e159141c7c1d22354664b4268d1 Mon Sep 17 00:00:00 2001 From: altic-dev Date: Tue, 31 Mar 2026 23:31:33 -0700 Subject: [PATCH 22/22] Bump version to 1.5.11-beta.2 --- Info.plist | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Info.plist b/Info.plist index ef083cab..fe22e63a 100644 --- a/Info.plist +++ b/Info.plist @@ -13,9 +13,9 @@ CFBundleIdentifier $(PRODUCT_BUNDLE_IDENTIFIER) CFBundleVersion - 8 + 9 CFBundleShortVersionString - 1.5.11-beta.1 + 1.5.11-beta.2 LSMinimumSystemVersion $(MACOSX_DEPLOYMENT_TARGET) LSApplicationCategoryType