Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Info.plist
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
<key>CFBundleIdentifier</key>
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
<key>CFBundleVersion</key>
<string>9</string>
<string>10</string>
<key>CFBundleShortVersionString</key>
<string>1.5.11-beta.2</string>
<string>1.5.11-beta.3</string>
<key>LSMinimumSystemVersion</key>
<string>$(MACOSX_DEPLOYMENT_TARGET)</string>
<key>LSApplicationCategoryType</key>
Expand Down
2 changes: 1 addition & 1 deletion Package.resolved

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

66 changes: 62 additions & 4 deletions Sources/Fluid/Persistence/SettingsStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2359,7 +2359,7 @@ final class SettingsStore: ObservableObject {
case .parakeetTDTv2: return "Parakeet TDT v2 (English Only)"
case .parakeetRealtime: return "Parakeet Flash (Beta)"
case .qwen3Asr: return "Qwen3 ASR (Beta)"
case .cohereTranscribeSixBit: return "Cohere Transcribe 6-bit"
case .cohereTranscribeSixBit: return "Cohere Transcribe"
case .appleSpeech: return "Apple ASR Legacy"
case .appleSpeechAnalyzer: return "Apple Speech - macOS 26+"
case .whisperTiny: return "Whisper Tiny"
Expand All @@ -2378,7 +2378,7 @@ final class SettingsStore: ObservableObject {
case .parakeetTDTv2: return "English Only (Higher Accuracy)"
case .parakeetRealtime: return "English Only (Live Streaming)"
case .qwen3Asr: return "30 Languages"
case .cohereTranscribeSixBit: return "14 Languages"
case .cohereTranscribeSixBit: return "14 Languages (Select Manually)"
case .appleSpeech: return "System Languages"
case .appleSpeechAnalyzer: return "EN, ES, FR, DE, IT, JA, KO, PT, ZH"
case .whisperTiny, .whisperBase, .whisperSmall, .whisperMedium, .whisperLargeTurbo, .whisperLarge:
Expand Down Expand Up @@ -2532,7 +2532,7 @@ final class SettingsStore: ObservableObject {
case .qwen3Asr:
return "Qwen3 multilingual ASR via FluidAudio. Higher quality, heavier memory footprint."
case .cohereTranscribeSixBit:
return "High-accuracy multilingual transcription. Supports English, French, German, Italian, Spanish, Portuguese, Greek, Dutch, Polish, Mandarin, Japanese, Korean, Vietnamese, and Arabic."
return "High-accuracy multilingual transcription. Select the language manually before dictation for best results."
case .appleSpeech:
return "Built-in macOS speech recognition. No download required."
case .appleSpeechAnalyzer:
Expand Down Expand Up @@ -2999,6 +2999,7 @@ private extension SettingsStore {

// Unified Speech Model (replaces above two)
static let selectedSpeechModel = "SelectedSpeechModel"
static let selectedCohereLanguage = "SelectedCohereLanguage"
static let externalCoreMLArtifactsDirectories = "ExternalCoreMLArtifactsDirectories"

// Overlay Position
Expand Down Expand Up @@ -3130,7 +3131,7 @@ extension SettingsStore.SpeechModel {
case .parakeetRealtime:
return "EN"
case .cohereTranscribeSixBit:
return "14 Languages"
return "AR, DE, EL, EN, ES, FR, IT, JA, KO, NL, PL, PT, VI, ZH"
case .appleSpeechAnalyzer:
return "EN, ES, FR, DE, IT, JA, KO, PT, ZH"
default:
Expand All @@ -3144,13 +3145,55 @@ extension SettingsStore.SpeechModel {
return """
Bulgarian, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Hungarian, Italian, Latvian, Lithuanian, Maltese, Polish, Portuguese, Romanian, Slovak, Slovenian, Spanish, Swedish, Russian, and Ukrainian
"""
case .cohereTranscribeSixBit:
return "Arabic, German, Greek, English, Spanish, French, Italian, Japanese, Korean, Dutch, Polish, Portuguese, Vietnamese, and Mandarin Chinese"
default:
return nil
}
}
}

extension SettingsStore {
enum CohereLanguage: String, CaseIterable, Identifiable, Codable {
case arabic = "ar"
case german = "de"
case greek = "el"
case english = "en"
case spanish = "es"
case french = "fr"
case italian = "it"
case japanese = "ja"
case korean = "ko"
case dutch = "nl"
case polish = "pl"
case portuguese = "pt"
case vietnamese = "vi"
case mandarinChinese = "zh"

var id: String { self.rawValue }

var displayName: String {
switch self {
case .arabic: return "Arabic"
case .german: return "German"
case .greek: return "Greek"
case .english: return "English"
case .spanish: return "Spanish"
case .french: return "French"
case .italian: return "Italian"
case .japanese: return "Japanese"
case .korean: return "Korean"
case .dutch: return "Dutch"
case .polish: return "Polish"
case .portuguese: return "Portuguese"
case .vietnamese: return "Vietnamese"
case .mandarinChinese: return "Mandarin Chinese"
}
}

var tokenString: String { "<|\(self.rawValue)|>" }
}

// MARK: - Unified Speech Model Selection

/// The selected speech recognition model.
Expand Down Expand Up @@ -3187,6 +3230,21 @@ extension SettingsStore {
}
}

var selectedCohereLanguage: CohereLanguage {
get {
if let rawValue = self.defaults.string(forKey: Keys.selectedCohereLanguage),
let language = CohereLanguage(rawValue: rawValue)
{
return language
}
return .english
}
set {
objectWillChange.send()
self.defaults.set(newValue.rawValue, forKey: Keys.selectedCohereLanguage)
}
}

func externalCoreMLArtifactsDirectory(for model: SpeechModel) -> URL? {
guard let spec = model.externalCoreMLSpec else { return nil }
let paths = self.defaults.dictionary(forKey: Keys.externalCoreMLArtifactsDirectories) as? [String: String] ?? [:]
Expand Down
44 changes: 43 additions & 1 deletion Sources/Fluid/Services/ExternalCoreMLModelRegistry.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ enum ExternalCoreMLArtifactsValidationError: LocalizedError {
case manifestUnreadable(URL, Error)
case unexpectedModelID(expected: String, actual: String)
case unexpectedSampleRate(expected: Int, actual: Int)
case invalidMaxAudioSeconds(Double)
case invalidMaxAudioSamples(Int)
case inconsistentAudioWindow(samples: Int, seconds: Double, sampleRate: Int)
case invalidOverlapSamples(Int, maxAudioSamples: Int)

var errorDescription: String? {
switch self {
Expand All @@ -41,6 +45,14 @@ enum ExternalCoreMLArtifactsValidationError: LocalizedError {
return "Unexpected model_id '\(actual)'. Expected '\(expected)'."
case let .unexpectedSampleRate(expected, actual):
return "Unexpected sample rate \(actual). Expected \(expected)."
case let .invalidMaxAudioSeconds(seconds):
return "Invalid max_audio_seconds \(seconds)."
case let .invalidMaxAudioSamples(samples):
return "Invalid max_audio_samples \(samples)."
case let .inconsistentAudioWindow(samples, seconds, sampleRate):
return "Manifest audio window is inconsistent: \(samples) samples vs \(seconds)s at \(sampleRate) Hz."
case let .invalidOverlapSamples(overlapSamples, maxAudioSamples):
return "Invalid overlap_samples \(overlapSamples) for max_audio_samples \(maxAudioSamples)."
}
}
}
Expand All @@ -61,6 +73,8 @@ struct ExternalCoreMLASRModelSpec {
let repositoryOwner: String?
let repositoryName: String?
let repositoryRevision: String
let artifactBundleVersion: String
private let maximumAudioWindowSeconds: Double = 60

var requiredEntries: [String] {
[
Expand Down Expand Up @@ -129,6 +143,33 @@ struct ExternalCoreMLASRModelSpec {
actual: manifest.sampleRate
)
}

guard manifest.maxAudioSeconds > 0, manifest.maxAudioSeconds <= self.maximumAudioWindowSeconds else {
throw ExternalCoreMLArtifactsValidationError.invalidMaxAudioSeconds(manifest.maxAudioSeconds)
}

let maximumAudioSamples = Int((Double(self.expectedSampleRate) * self.maximumAudioWindowSeconds).rounded())
guard manifest.maxAudioSamples > 0, manifest.maxAudioSamples <= maximumAudioSamples else {
throw ExternalCoreMLArtifactsValidationError.invalidMaxAudioSamples(manifest.maxAudioSamples)
}

let expectedSamples = Int((manifest.maxAudioSeconds * Double(manifest.sampleRate)).rounded())
guard abs(expectedSamples - manifest.maxAudioSamples) <= 1 else {
throw ExternalCoreMLArtifactsValidationError.inconsistentAudioWindow(
samples: manifest.maxAudioSamples,
seconds: manifest.maxAudioSeconds,
sampleRate: manifest.sampleRate
)
}

if let overlapSamples = manifest.overlapSamples {
guard overlapSamples >= 0, overlapSamples < manifest.maxAudioSamples else {
throw ExternalCoreMLArtifactsValidationError.invalidOverlapSamples(
overlapSamples,
maxAudioSamples: manifest.maxAudioSamples
)
}
}
}
}

Expand All @@ -151,7 +192,8 @@ enum ExternalCoreMLModelRegistry {
sourceURL: URL(string: "https://huggingface.co/BarathwajAnandan/cohere-transcribe-03-2026-CoreML-6bit"),
repositoryOwner: "BarathwajAnandan",
repositoryName: "cohere-transcribe-03-2026-CoreML-6bit",
repositoryRevision: "main"
repositoryRevision: "main",
artifactBundleVersion: "2026-04-02-cohere-refresh-1"
)
default:
return nil
Expand Down
Loading
Loading