From 3c7876690779e114635ded8d40dd8b198dbea5ca Mon Sep 17 00:00:00 2001 From: stego-z Date: Sat, 4 Apr 2026 09:35:36 +0800 Subject: [PATCH] Fix Chinese input and Apple ASR locale selection --- Sources/Fluid/Persistence/SettingsStore.swift | 31 ++++++-- .../AppleSpeechAnalyzerProvider.swift | 71 ++++++++++++++----- .../Fluid/Services/AppleSpeechProvider.swift | 19 +++-- .../Fluid/Services/SpeechLocaleResolver.swift | 64 +++++++++++++++++ Sources/Fluid/Services/TypingService.swift | 35 ++++++++- .../UI/AISettingsView+SpeechRecognition.swift | 17 ++++- Sources/Fluid/UI/WelcomeView.swift | 9 +++ 7 files changed, 215 insertions(+), 31 deletions(-) create mode 100644 Sources/Fluid/Services/SpeechLocaleResolver.swift diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index cfafda6b..a50149e7 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2493,7 +2493,10 @@ final class SettingsStore: ObservableObject { /// Default model for the current architecture static var defaultModel: SpeechModel { - CPUArchitecture.isAppleSilicon ? .parakeetTDT : .whisperBase + if SpeechLocaleResolver.prefersChineseRecognition { + return .whisperBase + } + return CPUArchitecture.isAppleSilicon ? .parakeetTDT : .whisperBase } // MARK: - UI Card Metadata @@ -3168,7 +3171,8 @@ extension SettingsStore { case polish = "pl" case portuguese = "pt" case vietnamese = "vi" - case mandarinChinese = "zh" + case simplifiedChinese = "zh-Hans" + case traditionalChinese = "zh-Hant" var id: String { self.rawValue } @@ -3187,11 +3191,19 @@ extension SettingsStore { case .polish: return "Polish" case .portuguese: return "Portuguese" case .vietnamese: return "Vietnamese" - case .mandarinChinese: return "Mandarin Chinese" + case .simplifiedChinese: return "Simplified Chinese" + case .traditionalChinese: return "Traditional Chinese" } } - var tokenString: String { "<|\(self.rawValue)|>" } + var tokenString: String { + switch self { + case .simplifiedChinese, .traditionalChinese: + return "<|zh|>" + default: + return "<|\(self.rawValue)|>" + } + } } // MARK: - Unified Speech Model Selection @@ -3218,6 +3230,14 @@ extension SettingsStore { if model.requiresMacOS26, #unavailable(macOS 26.0) { return .whisperBase } + if SpeechLocaleResolver.prefersChineseRecognition { + switch model { + case .parakeetTDT, .parakeetTDTv2, .parakeetRealtime: + return .whisperBase + default: + break + } + } return model } @@ -3237,6 +3257,9 @@ extension SettingsStore { { return language } + if self.defaults.string(forKey: Keys.selectedCohereLanguage) == "zh" { + return .traditionalChinese + } return .english } set { diff --git a/Sources/Fluid/Services/AppleSpeechAnalyzerProvider.swift b/Sources/Fluid/Services/AppleSpeechAnalyzerProvider.swift index 0835d902..3f3c4b73 100644 --- a/Sources/Fluid/Services/AppleSpeechAnalyzerProvider.swift +++ b/Sources/Fluid/Services/AppleSpeechAnalyzerProvider.swift @@ -34,31 +34,63 @@ final class AppleSpeechAnalyzerProvider: TranscriptionProvider { init() {} + private func resolvedRecognitionLocale() async throws -> Locale { + let preferredLocale = SpeechLocaleResolver.preferredRecognitionLocale() + + if let supportedLocale = await SpeechTranscriber.supportedLocale(equivalentTo: preferredLocale) { + let preferredID = preferredLocale.identifier(.bcp47) + let resolvedID = supportedLocale.identifier(.bcp47) + if preferredID != resolvedID { + DebugLogger.shared.info( + "AppleSpeechAnalyzerProvider: Falling back from locale \(preferredID) to supported locale \(resolvedID)", + source: "AppleSpeechAnalyzerProvider" + ) + } + return supportedLocale + } + + let supportedLocales = await SpeechTranscriber.supportedLocales + if let englishLocale = supportedLocales.first(where: { + $0.language.languageCode?.identifier == "en" + }) { + DebugLogger.shared.warning( + "AppleSpeechAnalyzerProvider: Preferred locale \(preferredLocale.identifier(.bcp47)) unsupported, using English fallback \(englishLocale.identifier(.bcp47))", + source: "AppleSpeechAnalyzerProvider" + ) + return englishLocale + } + + if let firstSupportedLocale = supportedLocales.first { + DebugLogger.shared.warning( + "AppleSpeechAnalyzerProvider: Preferred locale \(preferredLocale.identifier(.bcp47)) unsupported, using first supported locale \(firstSupportedLocale.identifier(.bcp47))", + source: "AppleSpeechAnalyzerProvider" + ) + return firstSupportedLocale + } + + throw NSError( + domain: "AppleSpeechAnalyzerProvider", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "SpeechAnalyzer is unavailable on this device"] + ) + } + // MARK: - Lifecycle func prepare(progressHandler: ((Double) -> Void)?) async throws { + let recognitionLocale = try await self.resolvedRecognitionLocale() + // 1. Create a transcriber to check locale support and download if needed let transcriber = SpeechTranscriber( - locale: Locale.current, + locale: recognitionLocale, transcriptionOptions: [], reportingOptions: [], attributeOptions: [] ) - // 2. Check if locale is supported - let supportedLocales = await SpeechTranscriber.supportedLocales - let currentLocaleID = Locale.current.identifier(.bcp47) - let isSupported = supportedLocales.map { $0.identifier(.bcp47) }.contains(currentLocaleID) - - guard isSupported else { - throw NSError( - domain: "AppleSpeechAnalyzerProvider", - code: 1, - userInfo: [NSLocalizedDescriptionKey: "Current locale is not supported by SpeechAnalyzer"] - ) - } + let currentLocaleID = recognitionLocale.identifier(.bcp47) - // 3. Check if model is installed, download if needed + // 2. Check if model is installed, download if needed let installedLocales = await SpeechTranscriber.installedLocales let isInstalled = installedLocales.map { $0.identifier(.bcp47) }.contains(currentLocaleID) @@ -77,7 +109,7 @@ final class AppleSpeechAnalyzerProvider: TranscriptionProvider { } } - // 4. Get the best available audio format for conversion + // 3. Get the best available audio format for conversion self.analyzerFormat = await SpeechAnalyzer.bestAvailableAudioFormat(compatibleWith: [transcriber]) self.converter = BufferConverter() @@ -121,8 +153,12 @@ final class AppleSpeechAnalyzerProvider: TranscriptionProvider { /// /// - Returns: `true` if the current locale's speech model is installed on disk, `false` otherwise. func refreshModelsExistOnDiskAsync() async -> Bool { + guard let recognitionLocale = try? await self.resolvedRecognitionLocale() else { + self._cacheQueue.sync { self._modelsInstalledCache = false } + return false + } let installedLocales = await SpeechTranscriber.installedLocales - let currentLocaleID = Locale.current.identifier(.bcp47) + let currentLocaleID = recognitionLocale.identifier(.bcp47) let isInstalled = installedLocales.map { $0.identifier(.bcp47) }.contains(currentLocaleID) self._cacheQueue.sync { self._modelsInstalledCache = isInstalled } @@ -147,10 +183,11 @@ final class AppleSpeechAnalyzerProvider: TranscriptionProvider { } DebugLogger.shared.debug("AppleSpeechAnalyzer: Starting transcription with \(samples.count) samples", source: "AppleSpeechAnalyzerProvider") + let recognitionLocale = try await self.resolvedRecognitionLocale() // 1. Create a FRESH transcriber for this transcription let freshTranscriber = SpeechTranscriber( - locale: Locale.current, + locale: recognitionLocale, transcriptionOptions: [], reportingOptions: [], attributeOptions: [] diff --git a/Sources/Fluid/Services/AppleSpeechProvider.swift b/Sources/Fluid/Services/AppleSpeechProvider.swift index af32948a..c23b8a42 100644 --- a/Sources/Fluid/Services/AppleSpeechProvider.swift +++ b/Sources/Fluid/Services/AppleSpeechProvider.swift @@ -23,13 +23,24 @@ final class AppleSpeechProvider: TranscriptionProvider { private var recognizer: SFSpeechRecognizer? init() { - // Initialize with user's current locale - self.recognizer = SFSpeechRecognizer(locale: Locale.current) + // Initialize with the user's preferred speech locale when possible. + self.recognizer = SFSpeechRecognizer(locale: SpeechLocaleResolver.preferredRecognitionLocale()) + } + + private func refreshRecognizerIfNeeded() { + let preferredLocale = SpeechLocaleResolver.preferredRecognitionLocale() + if self.recognizer?.locale.identifier != preferredLocale.identifier { + self.recognizer = SFSpeechRecognizer(locale: preferredLocale) + } else if self.recognizer == nil { + self.recognizer = SFSpeechRecognizer(locale: preferredLocale) + } } // MARK: - Lifecycle func prepare(progressHandler: ((Double) -> Void)?) async throws { + self.refreshRecognizerIfNeeded() + // 1. Request Authorization let status = await self.requestAuthorization() @@ -70,9 +81,7 @@ final class AppleSpeechProvider: TranscriptionProvider { } // 2. Ensure recognizer exists - if self.recognizer == nil { - self.recognizer = SFSpeechRecognizer(locale: Locale.current) - } + self.refreshRecognizerIfNeeded() guard let recognizer = self.recognizer else { throw NSError(domain: "AppleSpeechProvider", code: 5, userInfo: [NSLocalizedDescriptionKey: "Failed to initialize SFSpeechRecognizer"]) } diff --git a/Sources/Fluid/Services/SpeechLocaleResolver.swift b/Sources/Fluid/Services/SpeechLocaleResolver.swift new file mode 100644 index 00000000..1c32aeeb --- /dev/null +++ b/Sources/Fluid/Services/SpeechLocaleResolver.swift @@ -0,0 +1,64 @@ +import Foundation + +enum SpeechLocaleResolver { + static var prefersChineseRecognition: Bool { + Locale.preferredLanguages.contains { Self.languageCode(from: $0) == "zh" } + } + + static func preferredRecognitionLocale() -> Locale { + let selectedModel = SettingsStore.shared.selectedSpeechModel + switch selectedModel { + case .appleSpeech, .appleSpeechAnalyzer, .cohereTranscribeSixBit: + return Self.locale(for: SettingsStore.shared.selectedCohereLanguage) + default: + break + } + + if let preferredChinese = Locale.preferredLanguages.first(where: { Self.languageCode(from: $0) == "zh" }) { + return Locale(identifier: preferredChinese) + } + return Locale.autoupdatingCurrent + } + + private static func locale(for language: SettingsStore.CohereLanguage) -> Locale { + switch language { + case .arabic: + return Locale(identifier: "ar-SA") + case .german: + return Locale(identifier: "de-DE") + case .greek: + return Locale(identifier: "el-GR") + case .english: + return Locale(identifier: "en-US") + case .spanish: + return Locale(identifier: "es-ES") + case .french: + return Locale(identifier: "fr-FR") + case .italian: + return Locale(identifier: "it-IT") + case .japanese: + return Locale(identifier: "ja-JP") + case .korean: + return Locale(identifier: "ko-KR") + case .dutch: + return Locale(identifier: "nl-NL") + case .polish: + return Locale(identifier: "pl-PL") + case .portuguese: + return Locale(identifier: "pt-BR") + case .vietnamese: + return Locale(identifier: "vi-VN") + case .simplifiedChinese: + return Locale(identifier: "zh-CN") + case .traditionalChinese: + return Locale(identifier: "zh-TW") + } + } + + private static func languageCode(from identifier: String) -> String? { + let normalized = identifier.lowercased() + let separator = normalized.firstIndex(where: { $0 == "-" || $0 == "_" }) ?? normalized.endIndex + let code = String(normalized[.. Bool { + text.unicodeScalars.contains { scalar in + switch scalar.value { + case 0x2E80...0x2EFF, // CJK Radicals Supplement + 0x2F00...0x2FDF, // Kangxi Radicals + 0x3040...0x309F, // Hiragana + 0x30A0...0x30FF, // Katakana + 0x3100...0x312F, // Bopomofo + 0x3130...0x318F, // Hangul Compatibility Jamo + 0x31A0...0x31BF, // Bopomofo Extended + 0x3400...0x4DBF, // CJK Unified Ideographs Extension A + 0x4E00...0x9FFF, // CJK Unified Ideographs + 0xA960...0xA97F, // Hangul Jamo Extended-A + 0xAC00...0xD7AF, // Hangul Syllables + 0xD7B0...0xD7FF, // Hangul Jamo Extended-B + 0xF900...0xFAFF, // CJK Compatibility Ideographs + 0xFE30...0xFE4F, // CJK Compatibility Forms + 0xFF00...0xFFEF: // Fullwidth forms + return true + default: + return false + } + } + } + // MARK: - Layout-aware key code lookup /// Returns the virtual key code that produces `character` under the current keyboard layout. @@ -262,7 +287,7 @@ final class TypingService { } self.log("[TypingService] Starting async text insertion process") - if self.textInsertionMode == .reliablePaste { + if self.shouldForceReliablePaste(for: text) || self.textInsertionMode == .reliablePaste { // Reliable Paste still needs a short settle window after focus restoration. usleep(80_000) } else { @@ -279,8 +304,14 @@ final class TypingService { private func insertTextInstantly(_ text: String, preferredTargetPID: pid_t?) { self.log("[TypingService] insertTextInstantly called with \(text.count) characters") self.log("[TypingService] Attempting to type text: \"\(text.prefix(50))\(text.count > 50 ? "..." : "")\"") + let forceReliablePaste = self.shouldForceReliablePaste(for: text) + let effectiveInsertionMode: SettingsStore.TextInsertionMode = forceReliablePaste ? .reliablePaste : self.textInsertionMode + + if forceReliablePaste { + self.log("[TypingService] Detected CJK text; forcing Reliable Paste for IME-safe insertion") + } - if self.textInsertionMode == .reliablePaste { + if effectiveInsertionMode == .reliablePaste { self.log("[TypingService] Reliable Paste mode enabled") if self.tryReliablePasteInsertion(text, preferredTargetPID: preferredTargetPID) { self.log("[TypingService] SUCCESS: Reliable Paste mode completed") diff --git a/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift b/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift index e7322d76..8079ee73 100644 --- a/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift +++ b/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift @@ -270,7 +270,7 @@ extension VoiceEngineSettingsView { .animation(.spring(response: 0.5, dampingFraction: 0.7), value: model.id) } - if model == .cohereTranscribeSixBit { + if model == .cohereTranscribeSixBit || model == .appleSpeech || model == .appleSpeechAnalyzer { VStack(alignment: .leading, spacing: 8) { HStack(alignment: .center, spacing: 10) { Image(systemName: "globe") @@ -281,7 +281,7 @@ extension VoiceEngineSettingsView { Text("Select Language Manually") .font(.caption) .fontWeight(.semibold) - Text("Choose the language token injected into Cohere's transcription prompt.") + Text(self.manualLanguageDescription(for: model)) .font(.caption2) .foregroundStyle(.secondary) .lineLimit(2) @@ -289,7 +289,7 @@ extension VoiceEngineSettingsView { Spacer(minLength: 8) - Picker("Cohere Language", selection: Binding( + Picker("Recognition Language", selection: Binding( get: { self.settings.selectedCohereLanguage }, set: { newValue in guard newValue != self.settings.selectedCohereLanguage else { return } @@ -352,6 +352,17 @@ extension VoiceEngineSettingsView { .padding(.vertical, 6) } + private func manualLanguageDescription(for model: SettingsStore.SpeechModel) -> String { + switch model { + case .cohereTranscribeSixBit: + return "Choose the language token injected into Cohere's transcription prompt." + case .appleSpeech, .appleSpeechAnalyzer: + return "Choose the locale used by Apple's on-device speech recognition." + default: + return "Choose the recognition language." + } + } + func speechModelCard(for model: SettingsStore.SpeechModel) -> some View { let isSelected = self.viewModel.previewSpeechModel == model let isConfiguredActive = self.viewModel.isActiveSpeechModel(model) diff --git a/Sources/Fluid/UI/WelcomeView.swift b/Sources/Fluid/UI/WelcomeView.swift index 207191fa..9f74f6be 100644 --- a/Sources/Fluid/UI/WelcomeView.swift +++ b/Sources/Fluid/UI/WelcomeView.swift @@ -648,6 +648,9 @@ struct OnboardingFlowView: View { } private var recommendedOnboardingModel: SettingsStore.SpeechModel { + if SpeechLocaleResolver.prefersChineseRecognition { + return .whisperBase + } if CPUArchitecture.isAppleSilicon { switch self.preferredLanguageChoice { case .englishOnly: @@ -664,6 +667,9 @@ struct OnboardingFlowView: View { } private var recommendedOnboardingModels: [SettingsStore.SpeechModel] { + if SpeechLocaleResolver.prefersChineseRecognition { + return [.whisperBase, .whisperSmall].filter { SettingsStore.SpeechModel.availableModels.contains($0) } + } if CPUArchitecture.isAppleSilicon { switch self.preferredLanguageChoice { case .englishOnly: @@ -678,6 +684,9 @@ struct OnboardingFlowView: View { } private var recommendedModelReasonText: String { + if SpeechLocaleResolver.prefersChineseRecognition { + return "Best if you mainly speak Chinese. Whisper Base gives broad multilingual coverage and works across Apple Silicon and Intel Macs." + } if CPUArchitecture.isAppleSilicon { switch self.preferredLanguageChoice { case .englishOnly: