diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go new file mode 100644 index 00000000..18d47237 --- /dev/null +++ b/internal/importer/archive/iso/bluray.go @@ -0,0 +1,143 @@ +package iso + +import ( + "context" + "io" + "log/slog" + "sort" + "strings" +) + +// MainFeaturePlaylist is the result of analysing a Blu-ray's BDMV. +// Streams is the ordered list of M2TS file entries that, concatenated, +// form the main feature; the slice is empty if no parseable playlist +// was found. +type MainFeaturePlaylist struct { + PlaylistName string // e.g. "00800.MPLS" — for logging only + DurationTicks int64 // sum of (OUT-IN) at 45 kHz + Streams []isoFileEntry // ordered M2TS entries +} + +// ResolveMainFeature inspects the entries returned by ListISOFiles for a +// Blu-ray (BDMV) structure and returns the playlist that represents the +// main movie. Returns nil if the disc is not BDMV, has no .mpls, or no +// playlist resolves to a non-empty M2TS sequence. +// +// Selection heuristic: pick the playlist with the longest total +// presentation duration. Ties break on PlayItem count (more clips wins), +// then lexicographically smallest filename for determinism. +// +// Failures parsing individual playlists are non-fatal — we skip them and +// keep evaluating the rest, mirroring how every Blu-ray player tolerates +// malformed entries in BDMV/PLAYLIST/. +func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlaylist { + // Build per-clip indexes. M2TS streams live at BDMV/STREAM/.M2TS + // and carry the 2D version (or the only version on a 2D disc). SSIF + // streams live at BDMV/STREAM/SSIF/.SSIF and carry the + // stereoscopic interleaved 3D version — on 3D-only Blu-ray releases + // the main feature playlist references SSIF clips, while the M2TS + // directory holds only extras. We prefer M2TS when both exist (smaller + // bytes, universal playback) and fall back to SSIF when only it + // resolves the playlist's clip names. + m2tsByClip := make(map[string]isoFileEntry) + ssifByClip := make(map[string]isoFileEntry) + var playlistEntries []isoFileEntry + for _, f := range files { + up := strings.ToUpper(f.path) + switch { + case strings.HasPrefix(up, "BDMV/PLAYLIST/") && strings.HasSuffix(up, ".MPLS"): + playlistEntries = append(playlistEntries, f) + case strings.HasPrefix(up, "BDMV/STREAM/SSIF/") && strings.HasSuffix(up, ".SSIF"): + base := up[len("BDMV/STREAM/SSIF/") : len(up)-len(".SSIF")] + ssifByClip[base] = f + case strings.HasPrefix(up, "BDMV/STREAM/") && strings.HasSuffix(up, ".M2TS"): + base := up[len("BDMV/STREAM/") : len(up)-len(".M2TS")] + m2tsByClip[base] = f + } + } + if len(playlistEntries) == 0 || (len(m2tsByClip) == 0 && len(ssifByClip) == 0) { + return nil + } + + // Deterministic order: shorter filenames (and lexicographic ties) win + // the tie-break later. + sort.Slice(playlistEntries, func(i, j int) bool { + return playlistEntries[i].path < playlistEntries[j].path + }) + + var best *MainFeaturePlaylist + for _, pe := range playlistEntries { + data, err := readISOFile(rs, pe) + if err != nil { + continue + } + pl, err := ParseMPLS(data) + if err != nil { + continue + } + + // Resolve clip names in playlist order, preferring M2TS over SSIF. + streams := make([]isoFileEntry, 0, len(pl.PlayItems)) + for _, it := range pl.PlayItems { + name := strings.ToUpper(it.ClipName) + if entry, ok := m2tsByClip[name]; ok { + streams = append(streams, entry) + continue + } + if entry, ok := ssifByClip[name]; ok { + streams = append(streams, entry) + } + } + if len(streams) == 0 { + continue + } + + cand := &MainFeaturePlaylist{ + PlaylistName: pe.path, + DurationTicks: pl.DurationTicks(), + Streams: streams, + } + if best == nil || isBetterPlaylist(cand, best, len(pl.PlayItems), len(best.Streams)) { + best = cand + } + } + if best != nil { + slog.InfoContext(ctx, "Blu-ray main feature playlist resolved", + "playlist", best.PlaylistName, + "clips", len(best.Streams), + "duration_seconds", best.DurationTicks/45000, + ) + } + return best +} + +// isBetterPlaylist returns true when cand should replace best. +// Comparison: longer duration > more PlayItems > earlier filename. +// The filename tie-break relies on playlistEntries being sorted before +// iteration so the smaller path is seen first; we therefore only swap +// when strictly better. +func isBetterPlaylist(cand, best *MainFeaturePlaylist, candItems, bestItems int) bool { + if cand.DurationTicks != best.DurationTicks { + return cand.DurationTicks > best.DurationTicks + } + return candItems > bestItems +} + +// readISOFile reads the full contents of one isoFileEntry from rs, +// concatenating bytes across every on-disc extent. MPLS files are tiny +// (~KBs) and almost always single-extent, but multi-extent MPLS is +// legal so we iterate. +func readISOFile(rs io.ReadSeeker, e isoFileEntry) ([]byte, error) { + out := make([]byte, 0, e.size) + for _, ext := range e.extents { + if _, err := rs.Seek(int64(ext.lba)*iso9660SectorSize, io.SeekStart); err != nil { + return nil, err + } + chunk := make([]byte, ext.length) + if _, err := io.ReadFull(rs, chunk); err != nil { + return nil, err + } + out = append(out, chunk...) + } + return out, nil +} diff --git a/internal/importer/archive/iso/bluray_test.go b/internal/importer/archive/iso/bluray_test.go new file mode 100644 index 00000000..42b3c02c --- /dev/null +++ b/internal/importer/archive/iso/bluray_test.go @@ -0,0 +1,214 @@ +package iso + +import ( + "bytes" + "context" + "io" + "testing" +) + +// mkEntry builds a single-extent isoFileEntry — the common case for tests. +func mkEntry(path string, lba uint32, size uint64) isoFileEntry { + return isoFileEntry{ + path: path, + size: size, + extents: []isoExtent{{lba: lba, length: size}}, + } +} + +// makeImage assembles an in-memory disc image by placing each piece of +// data at the sector index given in its key. The returned reader can be +// used as if it were a real ISO read-seeker. +func makeImage(t *testing.T, pieces map[uint32][]byte) io.ReadSeeker { + t.Helper() + var maxSect uint32 + for s, b := range pieces { + end := s + uint32((len(b)+iso9660SectorSize-1)/iso9660SectorSize) + if end > maxSect { + maxSect = end + } + } + if maxSect == 0 { + maxSect = 1 + } + img := make([]byte, int(maxSect)*iso9660SectorSize) + for s, b := range pieces { + copy(img[int(s)*iso9660SectorSize:], b) + } + return bytes.NewReader(img) +} + +func TestResolveMainFeature(t *testing.T) { + t.Parallel() + + t.Run("picks longest playlist", func(t *testing.T) { + t.Parallel() + // Two playlists: + // 00001.MPLS → 1 clip, short (extras playlist) + // 00800.MPLS → 3 clips, long (main feature) + short := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00010", InTime: 0, OutTime: 45000}, + }, nil) + long := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 90 * 45000}, + {ClipName: "00002", InTime: 0, OutTime: 60 * 45000}, + {ClipName: "00003", InTime: 0, OutTime: 30 * 45000}, + }, nil) + + rs := makeImage(t, map[uint32][]byte{ + 100: short, + 110: long, + }) + + // File listing: two playlists and four M2TS clips (one extra). + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(short))), + mkEntry("BDMV/PLAYLIST/00800.MPLS", 110, uint64(len(long))), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), + mkEntry("BDMV/STREAM/00002.M2TS", 300, 2_000_000), + mkEntry("BDMV/STREAM/00003.M2TS", 400, 3_000_000), + mkEntry("BDMV/STREAM/00010.M2TS", 500, 500_000), + } + + got := ResolveMainFeature(context.Background(), rs, files) + if got == nil { + t.Fatal("ResolveMainFeature returned nil") + } + if got.PlaylistName != "BDMV/PLAYLIST/00800.MPLS" { + t.Errorf("PlaylistName = %q, want 00800.MPLS", got.PlaylistName) + } + if len(got.Streams) != 3 { + t.Fatalf("Streams len = %d, want 3", len(got.Streams)) + } + wantOrder := []string{"BDMV/STREAM/00001.M2TS", "BDMV/STREAM/00002.M2TS", "BDMV/STREAM/00003.M2TS"} + for i, s := range got.Streams { + if s.path != wantOrder[i] { + t.Errorf("Streams[%d].path = %q, want %q", i, s.path, wantOrder[i]) + } + } + }) + + t.Run("non-BDMV disc returns nil", func(t *testing.T) { + t.Parallel() + files := []isoFileEntry{ + mkEntry("movie.mkv", 100, 1_000_000), + } + if got := ResolveMainFeature(context.Background(), bytes.NewReader(make([]byte, 16*iso9660SectorSize)), files); got != nil { + t.Errorf("expected nil for non-BDMV disc, got %+v", got) + } + }) + + t.Run("BDMV with no parseable MPLS returns nil", func(t *testing.T) { + t.Parallel() + rs := makeImage(t, map[uint32][]byte{ + 100: []byte("not a real mpls"), + }) + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, 15), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), + } + if got := ResolveMainFeature(context.Background(), rs, files); got != nil { + t.Errorf("expected nil for unparseable MPLS, got %+v", got) + } + }) + + t.Run("3D BD: playlist resolves against SSIF when M2TS missing", func(t *testing.T) { + t.Parallel() + // Avatar-2-style 3D-only release: BDMV/STREAM/*.M2TS holds only + // extras (tiny). The real main feature lives in BDMV/STREAM/SSIF/ + // and is referenced by its own MPLS. The resolver must index SSIF + // so the long playlist resolves and wins. + extras := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00010", InTime: 0, OutTime: 90 * 45000}, // 90s extra + }, nil) + mainFeature3D := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00100", InTime: 0, OutTime: 60 * 60 * 45000}, + {ClipName: "00101", InTime: 0, OutTime: 60 * 60 * 45000}, + {ClipName: "00102", InTime: 0, OutTime: 12 * 60 * 45000}, // 132 min total + }, nil) + + rs := makeImage(t, map[uint32][]byte{ + 100: extras, + 110: mainFeature3D, + }) + + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(extras))), + mkEntry("BDMV/PLAYLIST/00800.MPLS", 110, uint64(len(mainFeature3D))), + // Only the extras live as M2TS: + mkEntry("BDMV/STREAM/00010.M2TS", 200, 50_000_000), + // Main feature is SSIF only: + mkEntry("BDMV/STREAM/SSIF/00100.SSIF", 300, 25_000_000_000), + mkEntry("BDMV/STREAM/SSIF/00101.SSIF", 400, 25_000_000_000), + mkEntry("BDMV/STREAM/SSIF/00102.SSIF", 500, 5_000_000_000), + } + + got := ResolveMainFeature(context.Background(), rs, files) + if got == nil { + t.Fatal("ResolveMainFeature returned nil — SSIF index missing?") + } + if got.PlaylistName != "BDMV/PLAYLIST/00800.MPLS" { + t.Errorf("PlaylistName = %q, want 00800.MPLS (3D main feature)", got.PlaylistName) + } + if len(got.Streams) != 3 { + t.Fatalf("Streams len = %d, want 3 SSIF clips", len(got.Streams)) + } + wantOrder := []string{ + "BDMV/STREAM/SSIF/00100.SSIF", + "BDMV/STREAM/SSIF/00101.SSIF", + "BDMV/STREAM/SSIF/00102.SSIF", + } + for i, s := range got.Streams { + if s.path != wantOrder[i] { + t.Errorf("Streams[%d].path = %q, want %q", i, s.path, wantOrder[i]) + } + } + }) + + t.Run("hybrid 3D BD: prefers M2TS over SSIF when both exist", func(t *testing.T) { + t.Parallel() + // Both 2D MPLS (refs M2TS) and 3D MPLS (refs SSIF) point at clips + // of the same name. With both files present, the M2TS version is + // the right pick: smaller bytes, universal playback. The resolver + // should select it even if the 3D playlist is marginally longer. + mainFeature := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00100", InTime: 0, OutTime: 60 * 60 * 45000}, + }, nil) + rs := makeImage(t, map[uint32][]byte{100: mainFeature}) + + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00800.MPLS", 100, uint64(len(mainFeature))), + mkEntry("BDMV/STREAM/00100.M2TS", 200, 20_000_000_000), + mkEntry("BDMV/STREAM/SSIF/00100.SSIF", 300, 40_000_000_000), + } + + got := ResolveMainFeature(context.Background(), rs, files) + if got == nil { + t.Fatal("ResolveMainFeature returned nil") + } + if len(got.Streams) != 1 { + t.Fatalf("Streams len = %d, want 1", len(got.Streams)) + } + if got.Streams[0].path != "BDMV/STREAM/00100.M2TS" { + t.Errorf("picked %q, want M2TS over SSIF", got.Streams[0].path) + } + }) + + t.Run("playlist referencing missing M2TS yields nil", func(t *testing.T) { + t.Parallel() + // Playlist references a clip that has no corresponding M2TS entry. + data := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "99999", InTime: 0, OutTime: 45000}, + }, nil) + rs := makeImage(t, map[uint32][]byte{ + 100: data, + }) + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(data))), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), + } + if got := ResolveMainFeature(context.Background(), rs, files); got != nil { + t.Errorf("expected nil when MPLS references unknown clip, got %+v", got) + } + }) +} diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index ace50d53..31415fbd 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -10,11 +10,32 @@ import ( const iso9660SectorSize = 2048 -// isoFileEntry is one non-directory file returned by ListISOFiles. +// isoFileEntry is one non-directory file returned by ListISOFiles. The +// file's data on disc may be split across multiple contiguous extents +// — Blu-ray main-feature M2TS files routinely use hundreds of extents +// chained via Allocation Extent Descriptors. extents is in disc order; +// concatenating their bytes yields the complete file. type isoFileEntry struct { - path string // full path within ISO (e.g. "BDMV/STREAM/00001.M2TS") - lba uint32 - size uint64 + path string + size uint64 + extents []isoExtent +} + +// firstLBA returns the start LBA of the file's first extent. Callers +// that only need a starting sector (e.g. reading a small MPLS file +// known to be single-extent) can use this. +func (e isoFileEntry) firstLBA() uint32 { + if len(e.extents) == 0 { + return 0 + } + return e.extents[0].lba +} + +// isoExtent is one contiguous run of sectors on disc that contributes +// length bytes to the logical file. +type isoExtent struct { + lba uint32 + length uint64 } // ───────────────────────────────────────────────────────────────────────────── @@ -100,7 +121,13 @@ func iso9660WalkAll(rs io.ReadSeeker, dirLBA uint32, dirSize uint64, prefix stri } result = append(result, sub...) } else { - result = append(result, isoFileEntry{path: entryPath, lba: e.lba, size: e.size}) + // ISO 9660 stores file data in a single contiguous extent. + // (Interleave mode exists but is essentially never used.) + result = append(result, isoFileEntry{ + path: entryPath, + size: e.size, + extents: []isoExtent{{lba: e.lba, length: e.size}}, + }) } } return result, nil @@ -328,6 +355,66 @@ func udfResolveICB(loc udfLBA, metaMap []udfMetaSpan, partStart uint32) (uint32, return udfResolveMetaBlock(loc.block, metaMap, partStart) } +// readMetaExtent reads a contiguous extent of `length` bytes starting at +// logical metadata block `startBlock`, walking sector by sector through +// the metaMap so multi-sector extents (e.g. a 26 KiB directory) are +// returned in full. Without this, callers that read only the first +// 2048-byte sector silently lose every entry past the first sector — the +// root cause of the "main-feature M2TS files missing from listing" bug. +func readMetaExtent(rs io.ReadSeeker, startBlock uint32, length int, metaMap []udfMetaSpan, partStart uint32) ([]byte, error) { + if length <= 0 { + return nil, nil + } + out := make([]byte, 0, length) + remaining := length + for b := uint32(0); remaining > 0; b++ { + ps, err := udfResolveMetaBlock(startBlock+b, metaMap, partStart) + if err != nil { + return nil, err + } + _, sector, err := udfReadTag(rs, ps) + if err != nil { + // Malformed image (e.g. extent claims more sectors than exist): + // return what we successfully read rather than failing the + // entire walk. Callers parse partial directory data correctly. + return out, nil + } + take := min(remaining, len(sector)) + out = append(out, sector[:take]...) + remaining -= take + } + return out, nil +} + +// readICBExtent is the long_ad analogue of readMetaExtent: walks blocks +// by incrementing the logical-block field inside the ICB long_ad. +func readICBExtent(rs io.ReadSeeker, loc udfLBA, length int, metaMap []udfMetaSpan, partStart uint32) ([]byte, error) { + if length <= 0 { + return nil, nil + } + out := make([]byte, 0, length) + remaining := length + cur := loc + for remaining > 0 { + ps, err := udfResolveICB(cur, metaMap, partStart) + if err != nil { + return nil, err + } + _, sector, err := udfReadTag(rs, ps) + if err != nil { + // Malformed image (e.g. extent claims more sectors than exist): + // return what we successfully read rather than failing the + // entire walk. Callers parse partial directory data correctly. + return out, nil + } + take := min(remaining, len(sector)) + out = append(out, sector[:take]...) + remaining -= take + cur.block++ + } + return out, nil +} + // udfReadDirEntries reads all File Identifier Descriptor records from a // File Entry at physSect. func udfReadDirEntries(rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, partStart uint32) ([]udfDirEntry, error) { @@ -360,21 +447,22 @@ func udfReadDirEntries(rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, case 3: // inline dirData = buf[allocDescOff : allocDescOff+allocDescLen] case 0: // short_ad + // A single allocation descriptor describes an extent that can span + // many 2048-byte sectors. The previous version of this code read + // only the first sector and truncated the rest of the extent, + // silently dropping every directory entry past ~30 FIDs — which is + // why BDMV/STREAM/ on a real Blu-ray (~300 entries, ~26 KiB) lost + // every main-feature M2TS clip. We now walk the full extent. for off := 0; off+8 <= allocDescLen; off += 8 { ad := udfParseShortAD(buf[allocDescOff:], off) if ad.length == 0 { break } - ps, rerr := udfResolveMetaBlock(ad.block, metaMap, partStart) - if rerr != nil { - return nil, rerr - } - _, sector, rerr := udfReadTag(rs, ps) + data, rerr := readMetaExtent(rs, ad.block, int(ad.length), metaMap, partStart) if rerr != nil { return nil, rerr } - take := min(int(ad.length), len(sector)) - dirData = append(dirData, sector[:take]...) + dirData = append(dirData, data...) } case 1: // long_ad for off := 0; off+16 <= allocDescLen; off += 16 { @@ -382,16 +470,11 @@ func udfReadDirEntries(rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, if ad.length == 0 { break } - ps, rerr := udfResolveICB(ad.loc, metaMap, partStart) + data, rerr := readICBExtent(rs, ad.loc, int(ad.length), metaMap, partStart) if rerr != nil { return nil, rerr } - _, sector, rerr := udfReadTag(rs, ps) - if rerr != nil { - return nil, rerr - } - take := min(int(ad.length), len(sector)) - dirData = append(dirData, sector[:take]...) + dirData = append(dirData, data...) } } @@ -549,24 +632,190 @@ func udfWalkAll(rs io.ReadSeeker, dirICB udfLongAD, metaMap []udfMetaSpan, partS allocDescLen = len(feBuf) - allocDescOff } - var fileLBA uint32 - switch allocType { - case 0: - if allocDescLen >= 8 { - ad := udfParseShortAD(feBuf[allocDescOff:], 0) - fileLBA = partStart + ad.block + extents := collectFileExtents(rs, feBuf[allocDescOff:allocDescOff+allocDescLen], allocType, metaMap, partStart, infoLen, fePhys) + if len(extents) == 0 { + continue + } + result = append(result, isoFileEntry{ + path: entryPath, + size: infoLen, + extents: extents, + }) + } + return result, nil +} + +// collectFileExtents walks the allocation descriptors of a UDF File Entry +// (or Extended File Entry), following Allocation Extent Descriptor chains +// when the inline AD area is exhausted, and returns one isoExtent per +// recorded data extent in disc order. +// +// allocType is the lower 3 bits of the FE's ICBTag flags: +// +// 0 → short_ad (8 bytes each) +// 1 → long_ad (16 bytes each) +// 2 → extended ad (20 bytes; rare, treated as short_ad-prefix here) +// 3 → file data embedded in the FE itself (small files) +// +// The high 2 bits of each AD's length field encode the AD "type": +// +// 0 → recorded & allocated extent (real data — emit) +// 1 → not recorded, allocated (sparse — skip, file should not see this on BD) +// 2 → not recorded, not allocated (hole — skip) +// 3 → next AD points at a continuation Allocation Extent Descriptor +// (tag 258) holding more ADs; chase the chain +// +// embeddedFEPhys is only meaningful for allocType 3 (it's the FE's own +// physical sector — the file data is inline at allocDescOff of that +// sector, so we materialise a single synthetic extent pointing at it). +func collectFileExtents(rs io.ReadSeeker, inlineADs []byte, allocType byte, metaMap []udfMetaSpan, partStart uint32, infoLen uint64, embeddedFEPhys uint32) []isoExtent { + if allocType == 3 { + // Embedded data — a single "extent" pointing at the FE sector + // itself with the inline-AD area treated as the file data. We + // can't emit a usable LBA for slicing because the data isn't + // sector-aligned. Skip for now; BD streams never use embedded. + return nil + } + var step int + switch allocType { + case 0: + step = 8 + case 1: + step = 16 + case 2: + step = 20 // first 16 bytes are a long_ad; trailing 4 bytes are impl-use + default: + return nil + } + + var extents []isoExtent + chase := inlineADs + safety := 0 + for { + safety++ + if safety > 4096 { + break // pathological — bail to avoid runaway IO + } + var chain *udfLongAD + for off := 0; off+step <= len(chase); off += step { + lenField := binary.LittleEndian.Uint32(chase[off:]) + adType := lenField >> 30 + adLen := lenField & 0x3FFFFFFF + if adLen == 0 && adType != 3 { + break + } + if adType == 3 { + var loc udfLongAD + switch step { + case 8: + // short_ad continuation: the 4 bytes after length + // are the next AED's logical block; partition is + // implicit (same as parent). + loc = udfLongAD{length: adLen, loc: udfLBA{block: binary.LittleEndian.Uint32(chase[off+4:])}} + default: + loc = udfParseLongAD(chase, off) + } + chain = &loc + break } - case 1: - if allocDescLen >= 16 { - ad := udfParseLongAD(feBuf[allocDescOff:], 0) - fileLBA, _ = udfResolveICB(ad.loc, metaMap, partStart) + if adType != 0 { + // Type 1 (allocated but not recorded) and type 2 (hole) + // don't carry real bytes. Skip — BD streams shouldn't + // have these in practice. + continue } + var lba uint32 + switch step { + case 8: + ad := udfParseShortAD(chase, off) + resolved, err := udfResolveMetaBlock(ad.block, metaMap, partStart) + if err != nil { + continue + } + lba = resolved + default: + ad := udfParseLongAD(chase, off) + resolved, err := udfResolveICB(ad.loc, metaMap, partStart) + if err != nil { + continue + } + lba = resolved + } + extents = append(extents, isoExtent{lba: lba, length: uint64(adLen)}) } - if fileLBA > 0 { - result = append(result, isoFileEntry{path: entryPath, lba: fileLBA, size: infoLen}) + if chain == nil { + break } + ps, err := udfResolveICB(chain.loc, metaMap, partStart) + if err != nil { + break + } + _, aedBuf, err := udfReadTag(rs, ps) + if err != nil { + break + } + // Allocation Extent Descriptor layout: 16-byte tag + 4-byte + // previous-AED pointer + 4-byte length-of-allocation-descriptors, + // then the ADs themselves. + if len(aedBuf) < 24 { + break + } + nextLen := int(binary.LittleEndian.Uint32(aedBuf[20:24])) + if nextLen <= 0 || 24+nextLen > len(aedBuf) { + break + } + chase = aedBuf[24 : 24+nextLen] } - return result, nil + + // Defensive: cap the total extent bytes at the FE's info_length so a + // malformed disc with mis-sized ADs can't return more bytes than the + // file legitimately contains. + var total uint64 + for i := range extents { + if total+extents[i].length > infoLen { + extents[i].length = infoLen - total + extents = extents[:i+1] + break + } + total += extents[i].length + } + + // Coalesce physically contiguous extents — many BD3D SSIF files have + // dozens of small ADs that sit right next to each other on disc. The + // underlying bytes are one contiguous run; merging the ADs collapses + // the NestedSources count proportionally (Avatar SSIF: 23 → 2) and + // shrinks both the metadata proto and the validation surface. + extents = coalesceExtents(extents) + _ = embeddedFEPhys + return extents +} + +// coalesceExtents merges adjacent extents whose physical sectors are +// contiguous (next.lba == prev.lba + prev.length/sector). Returns the +// possibly-shorter slice in disc order. A file whose extents are +// physically scattered (typical for BD M2TS clips interleaved with SSIF +// dependent-view data) is returned unchanged. +func coalesceExtents(in []isoExtent) []isoExtent { + if len(in) <= 1 { + return in + } + out := make([]isoExtent, 0, len(in)) + cur := in[0] + for i := 1; i < len(in); i++ { + next := in[i] + // length must be a whole number of sectors for the contiguity + // arithmetic to apply; if it isn't (final partial sector of a + // file), fall through and start a new run after. + if cur.length%iso9660SectorSize == 0 && + next.lba == cur.lba+uint32(cur.length/iso9660SectorSize) { + cur.length += next.length + continue + } + out = append(out, cur) + cur = next + } + out = append(out, cur) + return out } // ListISOFiles walks the ISO 9660/UDF filesystem and returns all non-directory diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go new file mode 100644 index 00000000..1c52d65b --- /dev/null +++ b/internal/importer/archive/iso/fs_local_test.go @@ -0,0 +1,322 @@ +package iso + +import ( + "encoding/binary" + "fmt" + "os" + "sort" + "testing" +) + +// TestLocalISO_DiscoverBigFiles is a manual integration test: it walks a +// real Blu-ray ISO from local disk and dumps a size-sorted summary. Skipped +// unless ALTMOUNT_LOCAL_ISO is set, so CI stays unaffected. +// +// Set ALTMOUNT_LOCAL_ISO=/abs/path/to.iso to run, e.g.: +// +// ALTMOUNT_LOCAL_ISO=/Volumes/.../DISC_1.iso go test \ +// ./internal/importer/archive/iso/... -run TestLocalISO -v +func TestLocalISO_DiscoverBigFiles(t *testing.T) { + path := os.Getenv("ALTMOUNT_LOCAL_ISO") + if path == "" { + t.Skip("ALTMOUNT_LOCAL_ISO not set") + } + f, err := os.Open(path) + if err != nil { + t.Fatalf("open %s: %v", path, err) + } + defer f.Close() + + stat, _ := f.Stat() + t.Logf("ISO: %s size=%d (%.2f GiB)", path, stat.Size(), float64(stat.Size())/(1<<30)) + + entries, err := ListISOFiles(f) + if err != nil { + t.Fatalf("ListISOFiles: %v", err) + } + + var sum int64 + for _, e := range entries { + sum += int64(e.size) + } + t.Logf("listed_files=%d listed_sum=%d (%.2f GiB) coverage=%.1f%%", + len(entries), sum, float64(sum)/(1<<30), 100*float64(sum)/float64(stat.Size())) + + // Top 25 by size — should match `ls -laS BDMV/STREAM/` if walker is sane. + sort.Slice(entries, func(i, j int) bool { return entries[i].size > entries[j].size }) + t.Logf("top 25 by size:") + for i, e := range entries { + if i >= 25 { + break + } + t.Logf(" %s size=%d (%.2f MiB) extents=%d first_lba=%d", + e.path, e.size, float64(e.size)/(1<<20), len(e.extents), e.firstLBA()) + } + + // Sanity sentinels for the Avatar disc 1 main-feature clips. Each is + // >1 GiB and uses many on-disc extents (00022.m2ts has ~945). Assert + // the file is present, the size is right, AND the extents slice fully + // covers it — otherwise downstream concat reads wrong bytes past the + // first extent. + want := []string{"BDMV/STREAM/00016.m2ts", "BDMV/STREAM/00022.m2ts", "BDMV/STREAM/00028.m2ts"} + have := make(map[string]isoFileEntry, len(entries)) + for _, e := range entries { + have[e.path] = e + } + for _, w := range want { + e, ok := have[w] + if !ok { + t.Errorf("missing %s — walker dropped this file", w) + continue + } + if e.size < 1<<30 { + t.Errorf("%s reported size=%d (%.2f MiB), want >1 GiB", + w, e.size, float64(e.size)/(1<<20)) + } + if len(e.extents) < 2 { + t.Errorf("%s has only %d extents — expected multi-extent (BD main-feature clips fragment heavily)", + w, len(e.extents)) + } + var covered uint64 + for _, ext := range e.extents { + covered += ext.length + } + if covered != e.size { + t.Errorf("%s: sum of extent lengths = %d but file size = %d (delta %d)", + w, covered, e.size, int64(e.size)-int64(covered)) + } + } + + if t.Failed() { + fmt.Println(">>> walker is dropping big files; this is the bug") + } +} + +// TestLocalISO_CountExtentsForBigFiles probes each entry's File Entry on the +// real ISO and reports how many allocation descriptors a file's data uses. +// The walker today reads only the first AD — if any of the multi-GiB main- +// feature clips reports >1 AD, downstream byte reads past the first extent +// will hit wrong sectors. Gated on ALTMOUNT_LOCAL_ISO same as the discovery +// test. +func TestLocalISO_CountExtentsForBigFiles(t *testing.T) { + path := os.Getenv("ALTMOUNT_LOCAL_ISO") + if path == "" { + t.Skip("ALTMOUNT_LOCAL_ISO not set") + } + f, err := os.Open(path) + if err != nil { + t.Fatalf("open: %v", err) + } + defer f.Close() + + partStart, metaMap, rootICB, err := udfSetup(f) + if err != nil { + t.Fatalf("udfSetup: %v", err) + } + + // Re-walk to get entries plus their ICB so we can re-read each FE and + // count its allocation descriptors. We can't reuse ListISOFiles output + // directly because isoFileEntry discards the ICB. + type probed struct { + path string + size uint64 + ads int // allocation descriptors observed (= number of on-disc extents) + alloc byte + } + + var probedAll []probed + var walk func(dirICB udfLongAD, prefix string) + walk = func(dirICB udfLongAD, prefix string) { + physSect, e := udfResolveICB(dirICB.loc, metaMap, partStart) + if e != nil { + return + } + entries, e := udfReadDirEntries(f, physSect, metaMap, partStart) + if e != nil { + return + } + for _, ent := range entries { + p := ent.name + if prefix != "" { + p = prefix + "/" + ent.name + } + if ent.isDir { + walk(ent.icb, p) + continue + } + fePhys, rerr := udfResolveICB(ent.icb.loc, metaMap, partStart) + if rerr != nil { + continue + } + feTag, feBuf, rerr := udfReadTag(f, fePhys) + if rerr != nil || (feTag.id != 261 && feTag.id != 266) { + continue + } + alloc := feBuf[34] & 0x07 + var adOff, adLen int + if feTag.id == 266 { + eaLen := int(binary.LittleEndian.Uint32(feBuf[208:212])) + adLen = int(binary.LittleEndian.Uint32(feBuf[212:216])) + adOff = 216 + eaLen + } else { + eaLen := int(binary.LittleEndian.Uint32(feBuf[168:172])) + adLen = int(binary.LittleEndian.Uint32(feBuf[172:176])) + adOff = 176 + eaLen + } + if adOff+adLen > len(feBuf) { + adLen = len(feBuf) - adOff + } + // Count extents using the UDF rules: high 2 bits of the + // length field encode the AD "type": + // 0 = recorded and allocated (real extent) + // 1 = not recorded, allocated (sparse / zero-fill) + // 2 = not recorded, not allocated (sparse hole) + // 3 = next AD points at a continuation AED sector, follow it + // We count types 0,1,2 as logical extents (each contributes + // length bytes to the file) and chase type 3 into AED chains. + n := 0 + step := 0 + switch alloc { + case 0: + step = 8 + case 1: + step = 16 + case 2: + step = 20 + case 3: + n = 1 // embedded + } + if step > 0 { + countADs := func(buf []byte) (extents int, chain *udfLongAD) { + for off := 0; off+step <= len(buf); off += step { + lenField := binary.LittleEndian.Uint32(buf[off:]) + adType := lenField >> 30 + adLen := lenField & 0x3FFFFFFF + if adLen == 0 && adType != 3 { + break + } + if adType == 3 { + var loc udfLongAD + switch step { + case 8: + loc = udfLongAD{length: adLen, loc: udfLBA{block: binary.LittleEndian.Uint32(buf[off+4:])}} + case 16: + loc = udfParseLongAD(buf, off) + } + return extents, &loc + } + extents++ + } + return extents, nil + } + cnt, chain := countADs(feBuf[adOff : adOff+adLen]) + n = cnt + safety := 0 + for chain != nil && safety < 100 { + safety++ + ps, e := udfResolveICB(chain.loc, metaMap, partStart) + if e != nil { + break + } + _, aedBuf, e := udfReadTag(f, ps) + if e != nil { + break + } + // AED layout: 16-byte tag + 4-byte previous-AED pointer + // + 4-byte length-of-allocation-descriptors + ADs. + if len(aedBuf) < 24 { + break + } + aedLen := int(binary.LittleEndian.Uint32(aedBuf[20:24])) + if aedLen <= 0 || 24+aedLen > len(aedBuf) { + break + } + more, nextChain := countADs(aedBuf[24 : 24+aedLen]) + n += more + chain = nextChain + } + } + probedAll = append(probedAll, probed{ + path: p, + size: binary.LittleEndian.Uint64(feBuf[56:64]), + ads: n, + alloc: alloc, + }) + } + } + walk(rootICB, "") + + // Report the big files specifically + any file with >1 AD. + sort.Slice(probedAll, func(i, j int) bool { return probedAll[i].size > probedAll[j].size }) + t.Logf("top 15 by size (with extent count):") + for i, p := range probedAll { + if i >= 15 { + break + } + t.Logf(" %s size=%d (%.2f MiB) alloc_type=%d extents=%d", + p.path, p.size, float64(p.size)/(1<<20), p.alloc, p.ads) + } + + multi := 0 + for _, p := range probedAll { + if p.ads > 1 { + multi++ + } + } + t.Logf("files with >1 extent: %d / %d", multi, len(probedAll)) + if multi == 0 { + t.Logf("CONCLUSION: all files are contiguous — single-LBA model is sufficient for this ISO") + } else { + t.Logf("CONCLUSION: fragmentation present — single-LBA walker yields WRONG bytes past extent 1") + } +} + +// TestLocalISO_CountAdjacentExtents checks whether multi-extent files have +// physically contiguous extents that could be coalesced. If yes, segment +// count downstream can be reduced dramatically — the importer hit +// total_segments_to_validate=888,903 on this NZB precisely because every +// AD became its own NestedSource even when adjacent ADs sat next to each +// other on disc. +func TestLocalISO_CountAdjacentExtents(t *testing.T) { + path := os.Getenv("ALTMOUNT_LOCAL_ISO") + if path == "" { + t.Skip("ALTMOUNT_LOCAL_ISO not set") + } + f, err := os.Open(path) + if err != nil { + t.Fatalf("open: %v", err) + } + defer f.Close() + + entries, err := ListISOFiles(f) + if err != nil { + t.Fatalf("list: %v", err) + } + sort.Slice(entries, func(i, j int) bool { return entries[i].size > entries[j].size }) + + const lookAt = 15 + for i, e := range entries { + if i >= lookAt { + break + } + if len(e.extents) <= 1 { + continue + } + // Count adjacent runs (where next.lba == this.lba + this.length/sector). + adjacent := 0 + distinctRuns := 1 + for j := 1; j < len(e.extents); j++ { + prev := e.extents[j-1] + next := e.extents[j] + expectedNextLBA := prev.lba + uint32(prev.length/iso9660SectorSize) + if next.lba == expectedNextLBA { + adjacent++ + } else { + distinctRuns++ + } + } + t.Logf(" %s: extents=%d adjacent_pairs=%d distinct_runs=%d coalesce_ratio=%.1fx", + e.path, len(e.extents), adjacent, distinctRuns, + float64(len(e.extents))/float64(distinctRuns)) + } +} diff --git a/internal/importer/archive/iso/fs_test.go b/internal/importer/archive/iso/fs_test.go index c03e1c95..1b2bfe38 100644 --- a/internal/importer/archive/iso/fs_test.go +++ b/internal/importer/archive/iso/fs_test.go @@ -6,7 +6,17 @@ import ( "testing" ) -func TestUDFReadDirEntriesShortADClampsExtentLength(t *testing.T) { +// TestUDFReadDirEntriesTruncatedExtent locks in the fix for the bug where +// a directory's allocation descriptor advertised an extent spanning +// multiple sectors but the walker read only the first sector and silently +// dropped every entry past it (~ the reason the Avatar BDMV main-feature +// M2TS files were invisible). Two assertions: +// - readMetaExtent must keep reading sectors until ad.length is +// satisfied (the fix); +// - if a sector read fails because the image is shorter than ad.length, +// the walk returns partial data without an error so a malformed ISO +// can't fail the entire import. +func TestUDFReadDirEntriesTruncatedExtent(t *testing.T) { image := make([]byte, iso9660SectorSize*21) dirICBSector := image[10*iso9660SectorSize : 11*iso9660SectorSize] binary.LittleEndian.PutUint16(dirICBSector[0:2], 261) diff --git a/internal/importer/archive/iso/mpls.go b/internal/importer/archive/iso/mpls.go new file mode 100644 index 00000000..141d7a02 --- /dev/null +++ b/internal/importer/archive/iso/mpls.go @@ -0,0 +1,108 @@ +package iso + +import ( + "encoding/binary" + "errors" + "fmt" +) + +// MPLS (Blu-ray PlayList) is a fixed binary format defined by the BDA spec. +// We only parse the fields needed to identify the main feature playlist and +// its ordered list of M2TS clips: the clip_information_file_name for each +// PlayItem and the IN/OUT presentation times used to estimate duration. + +// mplsHeaderSize is the fixed prefix length: 4 magic + 4 version + +// 4 PlayList offset + 4 PlayListMark offset + 4 ExtensionData offset. +const mplsHeaderSize = 20 + +// MPLSPlayItem describes one entry in a PlayList. +type MPLSPlayItem struct { + // ClipName is the 5-character clip_information_file_name (e.g. "00001"). + // The corresponding stream lives at BDMV/STREAM/.M2TS. + ClipName string + // InTime and OutTime are 45 kHz presentation timestamps. Duration in + // ticks is OutTime - InTime; convert to seconds by dividing by 45000. + InTime uint32 + OutTime uint32 +} + +// MPLSPlayList is the parsed view of a single .mpls file. +type MPLSPlayList struct { + Version string // e.g. "0100", "0200", "0300" + PlayItems []MPLSPlayItem +} + +// DurationTicks returns the sum of (OutTime-InTime) across PlayItems in +// 45 kHz ticks. This is the standard proxy for "longest playlist = +// main feature" used by every Blu-ray player. +func (p *MPLSPlayList) DurationTicks() int64 { + var total int64 + for _, it := range p.PlayItems { + if it.OutTime > it.InTime { + total += int64(it.OutTime - it.InTime) + } + } + return total +} + +// ParseMPLS decodes a .mpls file. All multi-byte integers are big-endian +// per the BDA spec. Sub-paths, the STN table, and per-angle alternates +// are skipped — we use each PlayItem's leading length field to advance +// past everything we don't need. +func ParseMPLS(data []byte) (*MPLSPlayList, error) { + if len(data) < mplsHeaderSize { + return nil, errors.New("mpls: truncated header") + } + if string(data[0:4]) != "MPLS" { + return nil, fmt.Errorf("mpls: bad magic %q", data[0:4]) + } + version := string(data[4:8]) + playListOff := binary.BigEndian.Uint32(data[8:12]) + if int(playListOff) < mplsHeaderSize || int(playListOff)+10 > len(data) { + return nil, fmt.Errorf("mpls: PlayList offset %d out of range (file size %d)", playListOff, len(data)) + } + + // PlayList header: length(4) + reserved(2) + numPlayItems(2) + numSubPaths(2) + pl := data[playListOff:] + playListLen := binary.BigEndian.Uint32(pl[0:4]) + if int(playListOff)+4+int(playListLen) > len(data) { + return nil, fmt.Errorf("mpls: PlayList length %d exceeds file size", playListLen) + } + numPlayItems := binary.BigEndian.Uint16(pl[6:8]) + + items := make([]MPLSPlayItem, 0, numPlayItems) + // PlayItems start after the 10-byte PlayList header. + cursor := 10 + plBody := pl[:4+int(playListLen)] + for i := range int(numPlayItems) { + if cursor+2 > len(plBody) { + return nil, fmt.Errorf("mpls: PlayItem %d header out of range", i) + } + // PlayItem length excludes the 2-byte length field itself. + itemLen := int(binary.BigEndian.Uint16(plBody[cursor : cursor+2])) + itemStart := cursor + 2 + itemEnd := itemStart + itemLen + if itemEnd > len(plBody) { + return nil, fmt.Errorf("mpls: PlayItem %d length %d overruns PlayList", i, itemLen) + } + // Fixed PlayItem layout we care about: + // +0 5 clip_information_file_name (e.g. "00001") + // +5 4 clip_codec_identifier ("M2TS") + // +9 2 flags incl. is_multi_angle / connection_condition + // +11 1 ref_to_STC_id + // +12 4 IN_time (45 kHz) + // +16 4 OUT_time (45 kHz) + if itemLen < 20 { + return nil, fmt.Errorf("mpls: PlayItem %d too short (len=%d)", i, itemLen) + } + body := plBody[itemStart:itemEnd] + items = append(items, MPLSPlayItem{ + ClipName: string(body[0:5]), + InTime: binary.BigEndian.Uint32(body[12:16]), + OutTime: binary.BigEndian.Uint32(body[16:20]), + }) + cursor = itemEnd + } + + return &MPLSPlayList{Version: version, PlayItems: items}, nil +} diff --git a/internal/importer/archive/iso/mpls_test.go b/internal/importer/archive/iso/mpls_test.go new file mode 100644 index 00000000..0df3b4da --- /dev/null +++ b/internal/importer/archive/iso/mpls_test.go @@ -0,0 +1,167 @@ +package iso + +import ( + "encoding/binary" + "testing" +) + +// buildMPLS constructs a synthetic .mpls byte stream containing the given +// PlayItems. Each PlayItem is laid out at its minimum legal size (20 bytes +// body + 2-byte length prefix). multiAngleTail, when non-nil, is appended +// inside the first PlayItem to exercise the length-prefixed skip logic. +func buildMPLS(t *testing.T, version string, items []MPLSPlayItem, multiAngleTail []byte) []byte { + t.Helper() + if len(version) != 4 { + t.Fatalf("version must be 4 bytes, got %q", version) + } + + // Build PlayItems body. + var playItemsBuf []byte + for i, it := range items { + if len(it.ClipName) != 5 { + t.Fatalf("item %d: ClipName must be 5 chars", i) + } + body := make([]byte, 20) + copy(body[0:5], it.ClipName) + copy(body[5:9], "M2TS") + // flags (2) + ref_to_STC_id (1) left zero + binary.BigEndian.PutUint32(body[12:16], it.InTime) + binary.BigEndian.PutUint32(body[16:20], it.OutTime) + // Inject the multi-angle tail into the first item only — the parser + // must skip past it via the length field without misaligning the + // next item. + if i == 0 && multiAngleTail != nil { + body = append(body, multiAngleTail...) + } + // PlayItem length excludes its own 2-byte length prefix. + lenPrefix := make([]byte, 2) + binary.BigEndian.PutUint16(lenPrefix, uint16(len(body))) + playItemsBuf = append(playItemsBuf, lenPrefix...) + playItemsBuf = append(playItemsBuf, body...) + } + + // PlayList header: length(4)+reserved(2)+numPI(2)+numSub(2)+playItems + plHeader := make([]byte, 10) + // length excludes its own 4-byte field + binary.BigEndian.PutUint32(plHeader[0:4], uint32(6+len(playItemsBuf))) + binary.BigEndian.PutUint16(plHeader[6:8], uint16(len(items))) + // numSubPaths left zero + + playList := append(plHeader, playItemsBuf...) + + // File header: 4 magic + 4 version + 4 PL offset + 4 PLMark + 4 ExtData + hdr := make([]byte, mplsHeaderSize) + copy(hdr[0:4], "MPLS") + copy(hdr[4:8], version) + binary.BigEndian.PutUint32(hdr[8:12], uint32(mplsHeaderSize)) + // PlayListMark & ExtensionData offsets unused; leave zero. + + return append(hdr, playList...) +} + +func TestParseMPLS(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + data []byte + wantErr bool + wantItems []MPLSPlayItem + wantTicks int64 + }{ + { + name: "single PlayItem", + data: buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 1000, OutTime: 91000}, + }, nil), + wantItems: []MPLSPlayItem{{ClipName: "00001", InTime: 1000, OutTime: 91000}}, + wantTicks: 90000, // 2s at 45kHz + }, + { + name: "five PlayItems (main feature shape)", + data: buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 45000}, + {ClipName: "00003", InTime: 0, OutTime: 45000}, + {ClipName: "00004", InTime: 0, OutTime: 45000}, + {ClipName: "00005", InTime: 0, OutTime: 45000}, + }, nil), + wantItems: []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 45000}, + {ClipName: "00003", InTime: 0, OutTime: 45000}, + {ClipName: "00004", InTime: 0, OutTime: 45000}, + {ClipName: "00005", InTime: 0, OutTime: 45000}, + }, + wantTicks: 5 * 45000, + }, + { + name: "multi-angle PlayItem (tail must be skipped)", + // The tail simulates angle-count + alt-angle records appended + // after the fixed PlayItem prefix. The parser only consumes the + // first 20 bytes and uses the length field to skip past the + // rest, so item 2 must still parse cleanly. + data: buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 90000}, + }, []byte{ + 0x02, // num_angles + 0x00, // is_different_audios flags + '0', '0', '0', '0', '7', 'M', '2', 'T', 'S', 0x00, // one alt angle entry (10 bytes) + }), + wantItems: []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 90000}, + }, + wantTicks: 45000 + 90000, + }, + { + name: "wrong magic", + data: []byte("NOTMPLS-padding-here-padding-here"), + wantErr: true, + }, + { + name: "truncated header", + data: []byte("MPLS"), + wantErr: true, + }, + { + name: "PlayList offset out of range", + data: func() []byte { + b := make([]byte, mplsHeaderSize) + copy(b[0:4], "MPLS") + copy(b[4:8], "0200") + binary.BigEndian.PutUint32(b[8:12], 9999) + return b + }(), + wantErr: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got, err := ParseMPLS(tc.data) + if tc.wantErr { + if err == nil { + t.Fatalf("expected error, got nil") + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(got.PlayItems) != len(tc.wantItems) { + t.Fatalf("PlayItems len = %d, want %d", len(got.PlayItems), len(tc.wantItems)) + } + for i, it := range got.PlayItems { + if it != tc.wantItems[i] { + t.Errorf("PlayItem[%d] = %+v, want %+v", i, it, tc.wantItems[i]) + } + } + if d := got.DurationTicks(); d != tc.wantTicks { + t.Errorf("DurationTicks = %d, want %d", d, tc.wantTicks) + } + }) + } +} diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index 10c5fd7a..71076a29 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -3,6 +3,7 @@ package iso import ( "context" "fmt" + "log/slog" "path/filepath" "strings" "time" @@ -11,61 +12,100 @@ import ( "github.com/javi11/altmount/internal/pool" ) -// AnalyzeISOContent enumerates all allowed media files inside the given ISO source -// and returns ISOFileContent entries with Usenet segment mappings. -func AnalyzeISOContent( +// AnalyzeISO inspects the given ISO source and returns: +// - the volume label (for multi-disc grouping), +// - the filtered list of inner files (Files), +// - the ordered MainFeature M2TS list when the ISO is a Blu-ray with a +// resolvable playlist (nil otherwise). +// +// allowedExtensions only filters Files. MainFeature is always returned for +// BDMV discs regardless of the extension list — its existence is the +// signal callers use to opt into virtual concatenation. +func AnalyzeISO( ctx context.Context, src ISOSource, poolManager pool.Manager, maxPrefetch int, readTimeout time.Duration, allowedExtensions []string, -) ([]ISOFileContent, error) { +) (*AnalyzedISO, error) { rs, closer, err := NewISOReadSeeker(ctx, src, poolManager, maxPrefetch, readTimeout) if err != nil { return nil, fmt.Errorf("iso: creating read seeker for %q: %w", src.Filename, err) } defer closer.Close() - files, err := ListISOFiles(rs) + entries, err := ListISOFiles(rs) if err != nil { return nil, fmt.Errorf("iso: listing files in %q: %w", src.Filename, err) } - var result []ISOFileContent - for _, entry := range files { - if !isAllowedFile(entry.path, int64(entry.size), allowedExtensions) { + slog.InfoContext(ctx, "ISO analysed", + "filename", src.Filename, + "iso_size_bytes", src.Size, + "files", len(entries), + ) + + out := &AnalyzedISO{VolumeLabel: ReadVolumeLabel(rs)} + + for _, e := range entries { + if !isAllowedFile(e.path, int64(e.size), allowedExtensions) { continue } + out.Files = append(out.Files, buildFileContent(src, e)) + } - isoOffset := int64(entry.lba) * iso9660SectorSize - - fc := ISOFileContent{ - InternalPath: entry.path, - Filename: filepath.Base(entry.path), - Size: int64(entry.size), + if mf := ResolveMainFeature(ctx, rs, entries); mf != nil { + out.DurationTicks = mf.DurationTicks + for _, e := range mf.Streams { + out.MainFeature = append(out.MainFeature, buildFileContent(src, e)) } + } + + return out, nil +} +// buildFileContent turns one ISO directory entry into an ISOFileContent, +// emitting one ISONestedSource per on-disc extent. Concatenating the +// sources' byte ranges yields the complete file. This is the path that +// previously fed BAD bytes for multi-extent files like Avatar's 17 GiB +// 00022.m2ts (945 extents) — only the first extent's data was correct. +func buildFileContent(src ISOSource, e isoFileEntry) ISOFileContent { + fc := ISOFileContent{ + InternalPath: e.path, + Filename: filepath.Base(e.path), + Size: int64(e.size), + Sources: make([]ISONestedSource, 0, len(e.extents)), + } + for _, ext := range e.extents { + isoOffset := int64(ext.lba) * iso9660SectorSize + extLen := int64(ext.length) if len(src.AesKey) == 0 { - // Unencrypted: slice segments to cover exactly this file's bytes - sliced, _ := sliceSegmentsForRange(src.Segments, isoOffset, int64(entry.size)) - fc.Segments = sliced + // Unencrypted: pre-slice outer segments to cover this extent + // only. The downstream nested reader treats InnerOffset as + // an offset within the (already-sliced) segment chain. + sliced, _ := sliceSegmentsForRange(src.Segments, isoOffset, extLen) + fc.Sources = append(fc.Sources, ISONestedSource{ + Segments: sliced, + InnerOffset: 0, + InnerLength: extLen, + InnerVolumeSize: extLen, + }) } else { - // Encrypted: create a NestedSource so the VFS can decrypt and seek - fc.NestedSource = &ISONestedSource{ + // Encrypted: AES-CBC needs the IV chain from byte 0 of the + // outer ISO, so every source gets the full outer segments + // and the cipher seeks via InnerOffset. + fc.Sources = append(fc.Sources, ISONestedSource{ Segments: src.Segments, AesKey: src.AesKey, AesIV: src.AesIV, InnerOffset: isoOffset, - InnerLength: int64(entry.size), + InnerLength: extLen, InnerVolumeSize: src.Size, - } + }) } - - result = append(result, fc) } - - return result, nil + return fc } // isAllowedFile returns true if the file extension is in the allowed list. diff --git a/internal/importer/archive/iso/types.go b/internal/importer/archive/iso/types.go index 53e51467..09e0aad5 100644 --- a/internal/importer/archive/iso/types.go +++ b/internal/importer/archive/iso/types.go @@ -11,25 +11,45 @@ type ISOSource struct { Size int64 // Decrypted ISO size } -// ISOFileContent represents one file found inside the ISO. +// ISOFileContent represents one file found inside the ISO. The file's +// data may be split across multiple on-disc extents (Blu-ray main-feature +// M2TS files routinely use hundreds), so Sources is a slice of inner +// sources in disc order. Concatenating their byte ranges yields the +// complete file content. type ISOFileContent struct { InternalPath string // e.g. "BDMV/STREAM/00001.m2ts" Filename string // Base filename - Size int64 // File size in bytes + Size int64 // Total file size in bytes (sum of Sources.InnerLength) NzbdavID string // Carried from parent archive Content - // Unencrypted case: Segments sliced to cover exactly this file - Segments []*metapb.SegmentData - // Encrypted case: nil Segments + populated NestedSource - NestedSource *ISONestedSource + Sources []ISONestedSource } -// ISONestedSource holds everything needed to decrypt and seek into the ISO -// for a single inner file. +// ISONestedSource is one extent of an inner file. For unencrypted ISOs, +// Segments is pre-sliced to cover exactly this extent and AesKey is nil +// (InnerOffset is 0, InnerLength equals the extent length). For encrypted +// ISOs, AesKey/AesIV are populated, Segments cover the full outer ISO, +// InnerOffset is the byte offset of this extent within the decrypted +// ISO, and InnerVolumeSize is the full decrypted ISO size — the cipher +// chain needs to start at byte 0 so multi-extent encrypted reads use +// the same outer-ISO data with different inner offsets. type ISONestedSource struct { Segments []*metapb.SegmentData AesKey []byte AesIV []byte - InnerOffset int64 // lba * 2048 - InnerLength int64 // file size - InnerVolumeSize int64 // ISO total decrypted size + InnerOffset int64 + InnerLength int64 + InnerVolumeSize int64 +} + +// AnalyzedISO is the full result of inspecting one ISO image. Files mirrors +// what AnalyzeISOContent has always returned (all media files with extension +// filtering applied). MainFeature, when non-nil, is the ordered M2TS list +// that forms the Blu-ray main feature according to BDMV/PLAYLIST/*.mpls — +// this is the slice callers should concatenate to produce a single playable +// virtual file. +type AnalyzedISO struct { + VolumeLabel string + Files []ISOFileContent + MainFeature []ISOFileContent // nil for non-BDMV / unparseable playlists + DurationTicks int64 // sum of (OUT-IN) of MainFeature at 45 kHz } diff --git a/internal/importer/archive/iso/volume.go b/internal/importer/archive/iso/volume.go new file mode 100644 index 00000000..f2db5657 --- /dev/null +++ b/internal/importer/archive/iso/volume.go @@ -0,0 +1,30 @@ +package iso + +import ( + "io" + "strings" +) + +// ReadVolumeLabel returns the ISO 9660 Volume Identifier from the Primary +// Volume Descriptor at sector 16. Hybrid Blu-ray discs always carry a +// 9660 PVD even when the active filesystem is UDF, so this works for both +// plain ISOs and BD images. +// +// Returns an empty string if the descriptor is missing or invalid — callers +// fall back to the ISO filename for disc-group keying. +func ReadVolumeLabel(rs io.ReadSeeker) string { + pvd := make([]byte, iso9660SectorSize) + if _, err := rs.Seek(16*iso9660SectorSize, io.SeekStart); err != nil { + return "" + } + if _, err := io.ReadFull(rs, pvd); err != nil { + return "" + } + // Type 1 = Primary Volume Descriptor; identifier "CD001" at +1. + if pvd[0] != 1 || string(pvd[1:6]) != "CD001" { + return "" + } + // Volume identifier: 32 bytes of a-characters at offset 40, space-padded. + label := strings.TrimRight(string(pvd[40:72]), " \x00") + return label +} diff --git a/internal/importer/archive/iso/volume_test.go b/internal/importer/archive/iso/volume_test.go new file mode 100644 index 00000000..f8aeac1a --- /dev/null +++ b/internal/importer/archive/iso/volume_test.go @@ -0,0 +1,70 @@ +package iso + +import ( + "bytes" + "io" + "testing" +) + +// buildPVD constructs a 17-sector buffer with a synthetic Primary Volume +// Descriptor placed at sector 16. The remaining bytes are zero-filled. +func buildPVD(label string, typeCode byte, identifier string) io.ReadSeeker { + buf := make([]byte, 17*iso9660SectorSize) + pvd := buf[16*iso9660SectorSize:] + pvd[0] = typeCode + copy(pvd[1:6], identifier) + // Volume identifier field is 32 bytes, space-padded. + field := make([]byte, 32) + for i := range field { + field[i] = ' ' + } + copy(field, label) + copy(pvd[40:72], field) + return bytes.NewReader(buf) +} + +func TestReadVolumeLabel(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + rs io.ReadSeeker + want string + }{ + { + name: "Avatar disc 1 label", + rs: buildPVD("AVATAR_FIRE_AND_ASH_DISC_1", 1, "CD001"), + want: "AVATAR_FIRE_AND_ASH_DISC_1", + }, + { + name: "padded short label trimmed", + rs: buildPVD("FOO", 1, "CD001"), + want: "FOO", + }, + { + name: "wrong type code", + rs: buildPVD("ANYTHING", 2, "CD001"), + want: "", + }, + { + name: "wrong identifier", + rs: buildPVD("ANYTHING", 1, "BAD!?"), + want: "", + }, + { + name: "short input (no sector 16)", + rs: bytes.NewReader(make([]byte, 1024)), + want: "", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got := ReadVolumeLabel(tc.rs) + if got != tc.want { + t.Errorf("ReadVolumeLabel = %q, want %q", got, tc.want) + } + }) + } +} diff --git a/internal/importer/archive/iso_expansion.go b/internal/importer/archive/iso_expansion.go new file mode 100644 index 00000000..d9d9fbbf --- /dev/null +++ b/internal/importer/archive/iso_expansion.go @@ -0,0 +1,291 @@ +package archive + +import ( + "context" + "fmt" + "log/slog" + "path/filepath" + "regexp" + "sort" + "strconv" + "strings" + "time" + + "github.com/javi11/altmount/internal/importer/archive/iso" + "github.com/javi11/altmount/internal/pool" +) + +// analyzedISO bundles an ISO Content with its inspection result and its +// place in a multi-disc grouping. Used internally by ExpandISOContents. +type analyzedISO struct { + src Content // original ISO Content (for fallback / metadata) + analyzed *iso.AnalyzedISO // result of iso.AnalyzeISO + discNum int // parsed disc number; 0 when label has no disc suffix + groupKey string // base name stripped of any DISC/CD/PART suffix +} + +// ExpandISOContents replaces .iso entries in contents with the media they +// contain, applying two Blu-ray-aware optimisations on top of the legacy +// "pick the largest file" behaviour: +// +// 1. Within a disc, if BDMV/PLAYLIST/*.mpls identifies a main feature +// spanning multiple M2TS clips, the clips are virtually concatenated +// into one Content via NestedSources — the player sees a single file. +// 2. Across discs in the same archive group (e.g. DISC_1 and DISC_2 ISOs +// in one NZB), discs sharing a stripped volume label are merged so +// the cross-disc movie also plays as one file. +// +// Non-ISO entries pass through unchanged. Per-ISO errors are non-fatal: +// on failure the original .iso Content is kept so downstream still has +// something to work with. +func ExpandISOContents( + ctx context.Context, + expand bool, + contents []Content, + poolManager pool.Manager, + maxPrefetch int, + readTimeout time.Duration, + allowedExtensions []string, +) ([]Content, error) { + if !expand { + return contents, nil + } + + var ( + result []Content + groups = make(map[string][]analyzedISO) + groupKeys []string + ) + + for _, c := range contents { + if c.IsDirectory || strings.ToLower(filepath.Ext(c.Filename)) != ".iso" { + result = append(result, c) + continue + } + + src := iso.ISOSource{ + Filename: c.Filename, + Segments: c.Segments, + AesKey: c.AesKey, + AesIV: c.AesIV, + Size: c.Size, + } + a, err := iso.AnalyzeISO(ctx, src, poolManager, maxPrefetch, readTimeout, allowedExtensions) + if err != nil { + slog.WarnContext(ctx, "Failed to analyze ISO content, keeping ISO as-is", + "file", c.Filename, "error", err) + result = append(result, c) + continue + } + if len(a.Files) == 0 && len(a.MainFeature) == 0 { + result = append(result, c) + continue + } + + key, discNum := discGroupKey(a.VolumeLabel, c.Filename) + entry := analyzedISO{src: c, analyzed: a, discNum: discNum, groupKey: key} + if _, exists := groups[key]; !exists { + groupKeys = append(groupKeys, key) + } + groups[key] = append(groups[key], entry) + } + + sort.Strings(groupKeys) // deterministic output order + for _, key := range groupKeys { + g := groups[key] + sort.SliceStable(g, func(i, j int) bool { return g[i].discNum < g[j].discNum }) + + // Concatenate main features only when *every* member of the group + // has one — mixing BDMV and non-BDMV in a single group is almost + // always a false grouping, so fall back to per-disc handling. + allHaveMainFeature := true + for _, e := range g { + if len(e.analyzed.MainFeature) == 0 { + allHaveMainFeature = false + break + } + } + + if allHaveMainFeature { + merged, ok := buildMainFeatureContent(ctx, key, g) + if ok { + result = append(result, merged) + continue + } + } + + // Fallback: legacy per-ISO largest-file selection. + for _, e := range g { + nc, ok := buildLargestFileContent(e.src, e.analyzed.Files) + if !ok { + result = append(result, e.src) + continue + } + result = append(result, nc) + } + } + + return result, nil +} + +// buildMainFeatureContent concatenates every member's MainFeature into a +// single Content whose NestedSources chain spans every M2TS in disc and +// playlist order. Returns (zero, false) when, after conversion, the chain +// is empty. +func buildMainFeatureContent(ctx context.Context, groupKey string, g []analyzedISO) (Content, bool) { + var ( + sources []NestedSource + totalSize int64 + firstISOName string + nzbdavID string + ) + for _, e := range g { + if firstISOName == "" { + firstISOName = e.src.Filename + nzbdavID = e.src.NzbdavID + } + for _, fc := range e.analyzed.MainFeature { + for _, ns := range isoFileContentToNestedSources(fc) { + if ns.InnerLength <= 0 { + continue + } + sources = append(sources, ns) + totalSize += ns.InnerLength + } + } + } + if len(sources) == 0 { + return Content{}, false + } + + filename := mainFeatureFilename(groupKey, firstISOName) + slog.InfoContext(ctx, "Built Blu-ray main-feature virtual file", + "group", groupKey, + "discs", len(g), + "clips", len(sources), + "size_bytes", totalSize, + "filename", filename, + ) + + return Content{ + InternalPath: filename, + Filename: filename, + Size: totalSize, + PackedSize: totalSize, + NzbdavID: nzbdavID, + NestedSources: sources, + ISOExpansionIndex: 1, + }, true +} + +// buildLargestFileContent reproduces the pre-existing "pick the single +// biggest file inside the ISO" behaviour. Kept as a fallback for ISOs +// that have no BDMV main feature. +func buildLargestFileContent(src Content, files []iso.ISOFileContent) (Content, bool) { + if len(files) == 0 { + return Content{}, false + } + sort.Slice(files, func(i, j int) bool { return files[i].Size > files[j].Size }) + f := files[0] + nc := Content{ + InternalPath: f.InternalPath, + Filename: f.Filename, + Size: f.Size, + PackedSize: f.Size, + NzbdavID: src.NzbdavID, + ISOExpansionIndex: 1, + } + nc.NestedSources = isoFileContentToNestedSources(f) + if len(nc.NestedSources) == 0 { + return Content{}, false + } + return nc, true +} + +// isoFileContentToNestedSources fans an ISOFileContent's on-disc extents +// out into one NestedSource per extent, preserving disc order. Concating +// the resulting sources yields the file's bytes — the multi-extent fix +// for Blu-ray main-feature M2TS files lives here. +func isoFileContentToNestedSources(fc iso.ISOFileContent) []NestedSource { + out := make([]NestedSource, 0, len(fc.Sources)) + for _, s := range fc.Sources { + out = append(out, NestedSource{ + Segments: s.Segments, + AesKey: s.AesKey, + AesIV: s.AesIV, + InnerOffset: s.InnerOffset, + InnerLength: s.InnerLength, + InnerVolumeSize: s.InnerVolumeSize, + }) + } + return out +} + +// discSuffixPattern matches volume labels like "AVATAR_FIRE_AND_ASH_DISC_1", +// "MOVIE-CD2", "TITLE PART 3", etc. Capture 1 is the stripped base name, +// capture 2 is the disc identifier (numeric or single letter). +var discSuffixPattern = regexp.MustCompile(`(?i)^(.+?)[ _\-]*(?:disc|cd|part|d|side)[ _\-]*([0-9]+|[a-z])$`) + +// discGroupKey computes the disc-grouping key and parsed disc number for +// an ISO. It prefers the volume label and falls back to the ISO filename +// (without extension) when the label is empty or doesn't match a disc +// pattern. Single-disc ISOs return key=, discNum=0. +func discGroupKey(label, isoFilename string) (string, int) { + candidates := []string{label} + if isoFilename != "" { + candidates = append(candidates, strings.TrimSuffix(isoFilename, filepath.Ext(isoFilename))) + } + for _, c := range candidates { + c = strings.TrimSpace(c) + if c == "" { + continue + } + if m := discSuffixPattern.FindStringSubmatch(c); m != nil { + base := normaliseGroupKey(m[1]) + return base, parseDiscNumber(m[2]) + } + } + for _, c := range candidates { + c = strings.TrimSpace(c) + if c != "" { + return normaliseGroupKey(c), 0 + } + } + return "", 0 +} + +func normaliseGroupKey(s string) string { + s = strings.TrimSpace(s) + s = strings.Trim(s, "_- ") + return strings.ToUpper(s) +} + +// parseDiscNumber turns "1" → 1, "2" → 2, "A" → 1, "B" → 2, etc. +func parseDiscNumber(s string) int { + if n, err := strconv.Atoi(s); err == nil { + return n + } + if len(s) == 1 { + c := strings.ToUpper(s)[0] + if c >= 'A' && c <= 'Z' { + return int(c-'A') + 1 + } + } + return 0 +} + +// mainFeatureFilename derives a sensible filename for the virtual concat. +// Downstream renaming (see rar/sevenzip aggregator post-processing) will +// usually replace the base name with the NZB release name; we only need a +// valid .m2ts extension here. +func mainFeatureFilename(groupKey, isoFilename string) string { + const ext = ".m2ts" + if groupKey != "" { + return fmt.Sprintf("%s%s", groupKey, ext) + } + if isoFilename != "" { + stem := strings.TrimSuffix(isoFilename, filepath.Ext(isoFilename)) + return stem + ext + } + return "main_feature" + ext +} diff --git a/internal/importer/archive/iso_expansion_test.go b/internal/importer/archive/iso_expansion_test.go new file mode 100644 index 00000000..bff8bfd1 --- /dev/null +++ b/internal/importer/archive/iso_expansion_test.go @@ -0,0 +1,253 @@ +package archive + +import ( + "context" + "testing" + + "github.com/javi11/altmount/internal/importer/archive/iso" + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +func TestDiscGroupKey(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + label string + filename string + wantKey string + wantNum int + }{ + {"avatar disc 1 label", "AVATAR_FIRE_AND_ASH_DISC_1", "any.iso", "AVATAR_FIRE_AND_ASH", 1}, + {"avatar disc 2 label", "AVATAR_FIRE_AND_ASH_DISC_2", "any.iso", "AVATAR_FIRE_AND_ASH", 2}, + {"compact DISC2", "MOVIE_DISC2", "any.iso", "MOVIE", 2}, + {"CD suffix", "MOVIE-CD1", "any.iso", "MOVIE", 1}, + {"PART suffix with spaces", "TITLE PART 3", "any.iso", "TITLE", 3}, + {"letter disc identifier B → 2", "FOO_DISC_B", "any.iso", "FOO", 2}, + {"no suffix → solo", "PLAIN_MOVIE", "any.iso", "PLAIN_MOVIE", 0}, + {"empty label falls back to filename stem", "", "MyMovie_Disc_1.iso", "MYMOVIE", 1}, + {"empty label and weird filename", "", "thing.iso", "THING", 0}, + {"only label has disc, filename plain", "X_DISC_2", "anything.iso", "X", 2}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + gotKey, gotNum := discGroupKey(tc.label, tc.filename) + if gotKey != tc.wantKey || gotNum != tc.wantNum { + t.Errorf("discGroupKey(%q,%q) = (%q,%d), want (%q,%d)", + tc.label, tc.filename, gotKey, gotNum, tc.wantKey, tc.wantNum) + } + }) + } +} + +func TestParseDiscNumber(t *testing.T) { + t.Parallel() + + cases := map[string]int{ + "1": 1, + "2": 2, + "10": 10, + "A": 1, + "a": 1, + "B": 2, + "": 0, + "AB": 0, + "foo": 0, + } + for in, want := range cases { + if got := parseDiscNumber(in); got != want { + t.Errorf("parseDiscNumber(%q) = %d, want %d", in, got, want) + } + } +} + +func TestIsoFileContentToNestedSources(t *testing.T) { + t.Parallel() + + t.Run("single unencrypted extent → one NestedSource", func(t *testing.T) { + t.Parallel() + fc := iso.ISOFileContent{ + Filename: "00001.m2ts", + Size: 100, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{{Id: "a", StartOffset: 0, EndOffset: 99, SegmentSize: 100}}, + InnerOffset: 0, + InnerLength: 100, + InnerVolumeSize: 100, + }}, + } + got := isoFileContentToNestedSources(fc) + if len(got) != 1 { + t.Fatalf("want 1 source, got %d", len(got)) + } + if got[0].InnerLength != 100 || got[0].InnerOffset != 0 || len(got[0].AesKey) != 0 { + t.Fatalf("unexpected NestedSource: %+v", got[0]) + } + }) + + t.Run("multi-extent file → one NestedSource per extent in order", func(t *testing.T) { + t.Parallel() + // The bug we just fixed: a 17 GiB M2TS spans hundreds of extents. + // Each extent must become its own NestedSource so the downstream + // concat reader stitches them in disc order. + fc := iso.ISOFileContent{ + Filename: "00022.m2ts", + Size: 30, + Sources: []iso.ISONestedSource{ + {Segments: []*metapb.SegmentData{{Id: "e1"}}, InnerLength: 10}, + {Segments: []*metapb.SegmentData{{Id: "e2"}}, InnerLength: 10}, + {Segments: []*metapb.SegmentData{{Id: "e3"}}, InnerLength: 10}, + }, + } + got := isoFileContentToNestedSources(fc) + if len(got) != 3 { + t.Fatalf("want 3 sources, got %d", len(got)) + } + wantIDs := []string{"e1", "e2", "e3"} + for i, ns := range got { + if len(ns.Segments) != 1 || ns.Segments[0].Id != wantIDs[i] { + t.Errorf("source %d: want segment id %q, got %+v", i, wantIDs[i], ns.Segments) + } + } + }) + + t.Run("encrypted source carries key + IV through", func(t *testing.T) { + t.Parallel() + fc := iso.ISOFileContent{ + Filename: "00001.m2ts", + Size: 2048, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{{Id: "outer", StartOffset: 0, EndOffset: 99999, SegmentSize: 100000}}, + AesKey: []byte("0123456789abcdef0123456789abcdef"), + AesIV: []byte("0123456789abcdef"), + InnerOffset: 1024, + InnerLength: 2048, + InnerVolumeSize: 99999, + }}, + } + got := isoFileContentToNestedSources(fc) + if len(got) != 1 { + t.Fatalf("want 1 source, got %d", len(got)) + } + if got[0].InnerOffset != 1024 || got[0].InnerLength != 2048 || got[0].InnerVolumeSize != 99999 { + t.Fatalf("offsets mangled: %+v", got[0]) + } + if len(got[0].AesKey) == 0 { + t.Error("AesKey should be carried through for encrypted source") + } + }) +} + +func TestBuildMainFeatureContent_TwoDiscs(t *testing.T) { + t.Parallel() + + // Helper to make a fake ISO main-feature ISOFileContent with given size + // and a single-segment outer slice (segment values are not interpreted + // by buildMainFeatureContent — only Size and the source attributes + // matter for the assembled NestedSources chain). + mkClip := func(name string, size int64) iso.ISOFileContent { + return iso.ISOFileContent{ + Filename: name, + Size: size, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{ + {Id: name, StartOffset: 0, EndOffset: size - 1, SegmentSize: size}, + }, + InnerLength: size, + InnerVolumeSize: size, + }}, + } + } + + disc1 := analyzedISO{ + src: Content{Filename: "AVATAR_DISC_1.iso", NzbdavID: "nzb-1"}, + analyzed: &iso.AnalyzedISO{ + VolumeLabel: "AVATAR_DISC_1", + MainFeature: []iso.ISOFileContent{ + mkClip("00001.m2ts", 10_000_000), + mkClip("00002.m2ts", 20_000_000), + }, + }, + discNum: 1, + groupKey: "AVATAR", + } + disc2 := analyzedISO{ + src: Content{Filename: "AVATAR_DISC_2.iso", NzbdavID: "nzb-2"}, + analyzed: &iso.AnalyzedISO{ + VolumeLabel: "AVATAR_DISC_2", + MainFeature: []iso.ISOFileContent{ + mkClip("00003.m2ts", 30_000_000), + }, + }, + discNum: 2, + groupKey: "AVATAR", + } + + got, ok := buildMainFeatureContent(context.Background(), "AVATAR", []analyzedISO{disc1, disc2}) + if !ok { + t.Fatal("buildMainFeatureContent returned ok=false") + } + if got.ISOExpansionIndex != 1 { + t.Errorf("ISOExpansionIndex = %d, want 1", got.ISOExpansionIndex) + } + if got.NzbdavID != "nzb-1" { + t.Errorf("NzbdavID = %q, want nzb-1 (from first disc)", got.NzbdavID) + } + if len(got.NestedSources) != 3 { + t.Fatalf("NestedSources count = %d, want 3 (2 clips from disc 1 + 1 clip from disc 2)", len(got.NestedSources)) + } + wantSize := int64(10_000_000 + 20_000_000 + 30_000_000) + if got.Size != wantSize { + t.Errorf("Size = %d, want %d", got.Size, wantSize) + } + if got.PackedSize != wantSize { + t.Errorf("PackedSize = %d, want %d", got.PackedSize, wantSize) + } + // Order must follow disc-then-playlist (disc1.clip1, disc1.clip2, disc2.clip3). + wantOrder := []int64{10_000_000, 20_000_000, 30_000_000} + for i, ns := range got.NestedSources { + if ns.InnerLength != wantOrder[i] { + t.Errorf("NestedSources[%d].InnerLength = %d, want %d", i, ns.InnerLength, wantOrder[i]) + } + } + if got.Filename != "AVATAR.m2ts" { + t.Errorf("Filename = %q, want AVATAR.m2ts", got.Filename) + } +} + +func TestBuildLargestFileContent(t *testing.T) { + t.Parallel() + + mkFile := func(name string, size int64, segID string) iso.ISOFileContent { + return iso.ISOFileContent{ + Filename: name, + Size: size, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{{Id: segID, StartOffset: 0, EndOffset: size - 1, SegmentSize: size}}, + InnerLength: size, + InnerVolumeSize: size, + }}, + } + } + files := []iso.ISOFileContent{ + mkFile("small.mkv", 500, "s"), + mkFile("big.mkv", 5_000_000, "b"), + } + src := Content{Filename: "thing.iso", NzbdavID: "id-1"} + + got, ok := buildLargestFileContent(src, files) + if !ok { + t.Fatal("buildLargestFileContent returned ok=false") + } + if got.Filename != "big.mkv" { + t.Errorf("Filename = %q, want big.mkv (largest)", got.Filename) + } + if got.ISOExpansionIndex != 1 { + t.Errorf("ISOExpansionIndex = %d, want 1", got.ISOExpansionIndex) + } + if got.NzbdavID != "id-1" { + t.Errorf("NzbdavID = %q, want id-1", got.NzbdavID) + } +} diff --git a/internal/importer/archive/rar/aggregator.go b/internal/importer/archive/rar/aggregator.go index 52f8d487..91138405 100644 --- a/internal/importer/archive/rar/aggregator.go +++ b/internal/importer/archive/rar/aggregator.go @@ -16,7 +16,6 @@ import ( "github.com/javi11/altmount/internal/encryption/aes" "github.com/javi11/altmount/internal/importer/archive" - "github.com/javi11/altmount/internal/importer/archive/iso" "github.com/javi11/altmount/internal/importer/filesystem" "github.com/javi11/altmount/internal/importer/parser" "github.com/javi11/altmount/internal/importer/utils" @@ -209,7 +208,7 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { } // Expand ISO files found inside the RAR archive into their inner media files - rarContents, err := expandISOContents(ctx, expandBlurayIso, rarContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) + rarContents, err := archive.ExpandISOContents(ctx, expandBlurayIso, rarContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) if err != nil { slog.WarnContext(ctx, "ISO expansion failed, proceeding without ISO contents", "error", err) } @@ -474,81 +473,6 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { return nil } -// expandISOContents replaces any .iso Content entries with the media files found -// inside them. Non-ISO entries are passed through unchanged. Per-file errors are -// non-fatal: on failure the original ISO Content is kept. -func expandISOContents( - ctx context.Context, - expand bool, - contents []Content, - poolManager pool.Manager, - maxPrefetch int, - readTimeout time.Duration, - allowedExtensions []string, -) ([]Content, error) { - if !expand { - return contents, nil - } - var result []Content - for _, c := range contents { - if c.IsDirectory || strings.ToLower(filepath.Ext(c.Filename)) != ".iso" { - result = append(result, c) - continue - } - - src := iso.ISOSource{ - Filename: c.Filename, - Segments: c.Segments, - AesKey: c.AesKey, - AesIV: c.AesIV, - Size: c.Size, - } - - isoFiles, err := iso.AnalyzeISOContent(ctx, src, poolManager, maxPrefetch, readTimeout, allowedExtensions) - if err != nil { - slog.WarnContext(ctx, "Failed to analyze ISO content, keeping ISO as-is", - "file", c.Filename, "error", err) - result = append(result, c) - continue - } - - if len(isoFiles) == 0 { - result = append(result, c) - continue - } - - // Sort ISO files by size descending so the largest (main feature) gets index 1. - sort.Slice(isoFiles, func(i, j int) bool { - return isoFiles[i].Size > isoFiles[j].Size - }) - - // Keep only the largest file (index 0 after sort); discard smaller streams. - f := isoFiles[0] - nc := Content{ - InternalPath: f.InternalPath, - Filename: f.Filename, - Size: f.Size, - PackedSize: f.Size, // raw ISO data — packed == unpacked - NzbdavID: c.NzbdavID, - ISOExpansionIndex: 1, - } - if f.NestedSource != nil { - nc.NestedSources = []NestedSource{{ - Segments: f.NestedSource.Segments, - AesKey: f.NestedSource.AesKey, - AesIV: f.NestedSource.AesIV, - InnerOffset: f.NestedSource.InnerOffset, - InnerLength: f.NestedSource.InnerLength, - InnerVolumeSize: f.NestedSource.InnerVolumeSize, - }} - } else { - nc.Segments = f.Segments - } - result = append(result, nc) - } - return result, nil -} - // GroupArchivesByBaseName groups ParsedFiles by their RAR base name (case-insensitive). // Returns groups in deterministic order (sorted by base name) for testability. func GroupArchivesByBaseName(files []parser.ParsedFile) [][]parser.ParsedFile { diff --git a/internal/importer/archive/sevenzip/aggregator.go b/internal/importer/archive/sevenzip/aggregator.go index f0214a29..4fbabce9 100644 --- a/internal/importer/archive/sevenzip/aggregator.go +++ b/internal/importer/archive/sevenzip/aggregator.go @@ -6,7 +6,6 @@ import ( "log/slog" "os" "path/filepath" - "sort" "strings" "sync/atomic" "time" @@ -14,7 +13,6 @@ import ( concpool "github.com/sourcegraph/conc/pool" "github.com/javi11/altmount/internal/importer/archive" - "github.com/javi11/altmount/internal/importer/archive/iso" "github.com/javi11/altmount/internal/importer/filesystem" "github.com/javi11/altmount/internal/importer/parser" "github.com/javi11/altmount/internal/importer/utils" @@ -186,7 +184,7 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { slog.InfoContext(ctx, "Successfully analyzed 7zip archive content", "files_in_archive", len(sevenZipContents)) // Expand ISO files found inside the 7zip archive into their inner media files - sevenZipContents, err = expandISOContents(ctx, expandBlurayIso, sevenZipContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) + sevenZipContents, err = archive.ExpandISOContents(ctx, expandBlurayIso, sevenZipContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) if err != nil { slog.WarnContext(ctx, "ISO expansion failed, proceeding without ISO contents", "error", err) } @@ -445,81 +443,6 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { return nil } -// expandISOContents replaces any .iso Content entries with the media files found -// inside them. Non-ISO entries are passed through unchanged. Per-file errors are -// non-fatal: on failure the original ISO Content is kept. -func expandISOContents( - ctx context.Context, - expand bool, - contents []Content, - poolManager pool.Manager, - maxPrefetch int, - readTimeout time.Duration, - allowedExtensions []string, -) ([]Content, error) { - if !expand { - return contents, nil - } - var result []Content - for _, c := range contents { - if c.IsDirectory || strings.ToLower(filepath.Ext(c.Filename)) != ".iso" { - result = append(result, c) - continue - } - - src := iso.ISOSource{ - Filename: c.Filename, - Segments: c.Segments, - AesKey: c.AesKey, - AesIV: c.AesIV, - Size: c.Size, - } - - isoFiles, err := iso.AnalyzeISOContent(ctx, src, poolManager, maxPrefetch, readTimeout, allowedExtensions) - if err != nil { - slog.WarnContext(ctx, "Failed to analyze ISO content, keeping ISO as-is", - "file", c.Filename, "error", err) - result = append(result, c) - continue - } - - if len(isoFiles) == 0 { - result = append(result, c) - continue - } - - // Sort ISO files by size descending so the largest (main feature) gets index 1. - sort.Slice(isoFiles, func(i, j int) bool { - return isoFiles[i].Size > isoFiles[j].Size - }) - - // Keep only the largest file (index 0 after sort); discard smaller streams. - f := isoFiles[0] - nc := Content{ - InternalPath: f.InternalPath, - Filename: f.Filename, - Size: f.Size, - PackedSize: f.Size, // raw ISO data — packed == unpacked - NzbdavID: c.NzbdavID, - ISOExpansionIndex: 1, - } - if f.NestedSource != nil { - nc.NestedSources = []NestedSource{{ - Segments: f.NestedSource.Segments, - AesKey: f.NestedSource.AesKey, - AesIV: f.NestedSource.AesIV, - InnerOffset: f.NestedSource.InnerOffset, - InnerLength: f.NestedSource.InnerLength, - InnerVolumeSize: f.NestedSource.InnerVolumeSize, - }} - } else { - nc.Segments = f.Segments - } - result = append(result, nc) - } - return result, nil -} - // normalizeArchiveReleaseFilename aligns the filename to the NZB basename while keeping the original extension. func normalizeArchiveReleaseFilename(nzbFilename, originalFilename string) string { releaseName := nzbtrim.TrimNzbExtension(nzbFilename)