Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 28 additions & 4 deletions Sources/Fluid/Services/MeetingTranscriptionService.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import AVFoundation
import Combine
import CoreMedia
import Foundation
import UniformTypeIdentifiers

/// Result of a transcription operation
struct TranscriptionResult: Identifiable, Sendable, Codable {
Expand Down Expand Up @@ -67,6 +68,29 @@ final class MeetingTranscriptionService: ObservableObject {
@Published var error: String?
@Published var result: TranscriptionResult?

// MARK: - Supported Formats

/// File extensions the OS can actually decode, queried dynamically from AVFoundation.
/// Filtered to audio/video types only — excludes subtitles, playlists, etc.
static let supportedFileExtensions: Set<String> = {
let avTypes = AVURLAsset.audiovisualTypes()
let extensions = avTypes.compactMap { fileType -> String? in
guard let utType = UTType(fileType.rawValue) else { return nil }
guard utType.conforms(to: .audio) || utType.conforms(to: .movie) else { return nil }
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Restrict movie extensions to containers the pipeline can handle

Including every UTType that conforms to .movie in supportedFileExtensions admits many new video extensions (e.g. avi, mts, vob) that were previously rejected, but downstream logic still treats only mp4/mov as video containers (isVideoContainer = ["mp4", "mov"]). For providers with prefersNativeFileTranscription == true, those newly admitted movie files now take the native path instead of the buffered video path and can fail at runtime, turning a deterministic fileNotSupported rejection into a later transcription/conversion error.

Useful? React with 👍 / 👎.

return utType.preferredFilenameExtension
}
return Set(extensions)
Comment on lines +76 to +82
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Filter dynamic types to audio formats before validation

The new allowlist is built from AVURLAsset.audiovisualTypes() without filtering, but this API includes non-transcribable types (for example subtitle/playlist formats like vtt, ttml, m3u shown in the commit notes). Because this set gates fileNotSupported, those files now pass extension validation and only fail later at AVAudioFile(forReading:) with an audioConversionFailed error, which is a user-visible regression from the previous explicit unsupported-format path.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any clever way to do this filtering or would I need to hardcode the list of non-transcribable types?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Found a way to filter dynamically, commit

cat <<'EOF' > /tmp/probe3.swift
import AVFoundation
import UniformTypeIdentifiers

let avTypes = AVURLAsset.audiovisualTypes()
let audioOrVideo = avTypes.compactMap { fileType -> String? in
    guard let utType = UTType(fileType.rawValue) else { return nil }
    guard utType.conforms(to: .audio) || utType.conforms(to: .movie) else { return nil }
    return utType.preferredFilenameExtension
}
let sorted = Set(audioOrVideo).sorted()
print("Audio/Video only (\(sorted.count)): \(sorted)")

// Show what got filtered out
let all = avTypes.compactMap { UTType($0.rawValue)?.preferredFilenameExtension }
let excluded = Set(all).subtracting(Set(audioOrVideo)).sorted()
print("\nExcluded (\(excluded.count)): \(excluded)")
EOF
swiftc -sdk $(xcrun --sdk macosx --show-sdk-path) /tmp/probe3.swift -o /tmp/probe3 && /tmp/probe3
Audio/Video only (36): ["3g2", "3gp", "aa", "aac", "aax", "ac3", "aifc", "aiff", "amr", "au", "avi", "caf", "dv", "eac3", "flac", "loas", "m2p", "m2v", "m4a", "m4b", "m4p", "m4r", "m4v", "mod", "mov", "mp2", "mp3", "mp4", "mpg", "mts", "ogg", "qta", "ts", "vob", "w64", "wav"]

Excluded (16): ["aivu", "awb", "itt", "m1a", "m2a", "m3u", "mp1", "mpa", "mqv", "pls", "sc2", "scc", "ttml", "vtt", "webvtt", "xhe"]

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are the descriptions for the remaining 36, they are all containers that can genuinely have audio:

cat <<'EOF' > /tmp/probe4.swift
import AVFoundation
import UniformTypeIdentifiers

let avTypes = AVURLAsset.audiovisualTypes()
let audioOrVideo: [(String, String, Bool, Bool)] = avTypes.compactMap { fileType in
    guard let utType = UTType(fileType.rawValue) else { return nil }
    guard utType.conforms(to: .audio) || utType.conforms(to: .movie) else { return nil }
    guard let ext = utType.preferredFilenameExtension else { return nil }
    return (ext, utType.localizedDescription ?? "?", utType.conforms(to: .audio), utType.conforms(to: .movie))
}
for (ext, desc, isAudio, _) in audioOrVideo.sorted(by: { $0.0 < $1.0 }) {
    let kind = isAudio ? "audio" : "video"
    print("\(ext.padding(toLength: 6, withPad: " ", startingAt: 0)) \(kind.padding(toLength: 6, withPad: " ", startingAt: 0)) \(desc)")
}
EOF
swiftc -sdk $(xcrun --sdk macosx --show-sdk-path) /tmp/probe4.swift -o /tmp/probe4 && /tmp/probe4
3g2    video  3GPP2 movie
3gp    video  3GPP movie
aa     audio  Audible.com Audiobook
aac    audio  AAC audio
aax    audio  Audible.com Audiobook
ac3    audio  AC-3 audio
aifc   audio  AIFF-C audio
aiff   audio  AIFF audio
amr    audio  Adaptive Multi-rate audio
au     audio  AU audio
avi    video  AVI movie
caf    audio  Apple CoreAudio format
dv     video  DV movie
eac3   audio  Enhanced AC-3 audio
flac   audio  FLAC audio
loas   audio  Low Overhead MPEG-4 Audio Stream
m2p    video  MPEG-2 Stream
m2v    video  MPEG-2 video
m4a    audio  Apple MPEG-4 audio
m4b    audio  protected MPEG-4 audio
m4p    audio  protected MPEG-4 audio
m4r    audio  Ringtone
m4v    video  Apple MPEG-4 movie
mod    audio  MOD Audio File
mov    video  QuickTime movie
mp2    audio  MP2 audio
mp2    audio  MP2 audio
mp3    audio  MP3 audio
mp4    video  MPEG-4 movie
mp4    audio  MPEG-4 audio
mpg    video  MPEG movie
mts    video  AVCHD MPEG-2 Transport Stream
ogg    audio  Ogg Audio
qta    audio  QuickTime Audio
ts     video  MPEG-2 Transport Stream
vob    video  VOB File (DVD Video)
w64    audio  Wave64 Audio
wav    audio  Waveform audio

}()

/// Content types accepted by the file picker — broad categories so the OS filters naturally.
static let allowedContentTypes: [UTType] = [.audio, .movie]

/// User-facing description of supported formats (curated for readability).
static let supportedFormatsDescription = "Supported: WAV, MP3, M4A, OGG, MP4, MOV, and more"

/// Error copy shown when a dropped file is not accepted.
static let dropErrorCopy = "Accepted file types: WAV, MP3, M4A, OGG, MP4, MOV, and more."

/// Share the ASR service instance to avoid loading models twice
private let asrService: ASRService

Expand Down Expand Up @@ -159,11 +183,10 @@ final class MeetingTranscriptionService: ObservableObject {

// Check file extension
let fileExtension = fileURL.pathExtension.lowercased()
let supportedFormats = ["wav", "mp3", "m4a", "ogg", "aac", "flac", "aiff", "caf", "mp4", "mov"]

guard supportedFormats.contains(fileExtension) else {
guard Self.supportedFileExtensions.contains(fileExtension) else {
throw TranscriptionError
.fileNotSupported("Format .\(fileExtension) not supported. Supported: \(supportedFormats.joined(separator: ", "))")
.fileNotSupported("Format .\(fileExtension) not supported. \(Self.supportedFormatsDescription)")
}

// Get audio duration for progress display
Expand All @@ -181,7 +204,8 @@ final class MeetingTranscriptionService: ObservableObject {
DebugLogger.shared.warning("Could not determine audio duration: \(error.localizedDescription)", source: "MeetingTranscriptionService")
}

let isVideoContainer = ["mp4", "mov"].contains(fileExtension)
let isVideoContainer = UTType(filenameExtension: fileExtension)
.map { $0.conforms(to: .movie) } ?? false

if provider.prefersNativeFileTranscription && !isVideoContainer {
self.currentStatus = duration > 0 ? "Transcribing audio (\(Int(duration))s)..." : "Transcribing audio..."
Expand Down
16 changes: 4 additions & 12 deletions Sources/Fluid/UI/MeetingTranscriptionView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ struct MeetingTranscriptionView: View {
Text("Choose Audio or Video File")
.font(.headline)

Text("Supported: WAV, MP3, M4A, OGG, MP4, MOV, and more")
Text(MeetingTranscriptionService.supportedFormatsDescription)
.font(.caption)
.foregroundColor(.secondary)
}
Expand Down Expand Up @@ -221,15 +221,7 @@ struct MeetingTranscriptionView: View {
}
.fileImporter(
isPresented: self.$showingFilePicker,
allowedContentTypes: [
.audio,
.movie,
.mpeg4Movie,
UTType(filenameExtension: "wav") ?? .audio,
UTType(filenameExtension: "mp3") ?? .audio,
UTType(filenameExtension: "m4a") ?? .audio,
UTType(filenameExtension: "ogg") ?? .audio,
],
allowedContentTypes: MeetingTranscriptionService.allowedContentTypes,
allowsMultipleSelection: false
) { result in
switch result {
Expand Down Expand Up @@ -545,9 +537,9 @@ struct MeetingTranscriptionView: View {

// MARK: - Helper Functions

private static let supportedFileExtensions = ["wav", "mp3", "m4a", "ogg", "aac", "flac", "aiff", "caf", "mp4", "mov"]
private static let supportedFileExtensions = MeetingTranscriptionService.supportedFileExtensions

private static let dropErrorCopy = "Accepted file types: WAV, MP3, M4A, OGG, MP4, MOV, and more."
private static let dropErrorCopy = MeetingTranscriptionService.dropErrorCopy

private func handleDrop(providers: [NSItemProvider]) -> Bool {
guard let provider = providers.first else { return false }
Expand Down
Loading