From f07a300d85547daf75b2ee579e6fedef80588341 Mon Sep 17 00:00:00 2001 From: javi11 Date: Wed, 20 May 2026 19:51:38 +0200 Subject: [PATCH 1/8] feat(iso): concat Blu-ray main feature across clips and discs Long Blu-ray releases split the main feature two ways: across multiple M2TS clips within a disc (joined by BDMV/PLAYLIST/*.mpls), and across multiple discs in one NZB (e.g. AVATAR_FIRE_AND_ASH_DISC_1 / _DISC_2). The importer previously kept only the single largest M2TS per ISO, which both dropped the rest of the movie within a disc and treated each disc as an unrelated file. Now ExpandISOContents (shared between rar and sevenzip aggregators) parses the main MPLS playlist on each ISO, reads the 9660 PVD volume label, groups ISOs by stripped base name with a DISC|CD|PART suffix regex, and emits a single Content whose NestedSources chain spans every M2TS in disc-then-playlist order. The metadata layer's existing nested multi-reader produces one seamless seekable virtual file. Non-BDMV discs and unparseable playlists fall back to the legacy largest-file behaviour so nothing regresses. --- internal/importer/archive/iso/bluray.go | 116 +++++++ internal/importer/archive/iso/bluray_test.go | 122 ++++++++ internal/importer/archive/iso/mpls.go | 108 +++++++ internal/importer/archive/iso/mpls_test.go | 167 ++++++++++ internal/importer/archive/iso/processor.go | 83 +++-- internal/importer/archive/iso/types.go | 13 + internal/importer/archive/iso/volume.go | 30 ++ internal/importer/archive/iso/volume_test.go | 70 +++++ internal/importer/archive/iso_expansion.go | 296 ++++++++++++++++++ .../importer/archive/iso_expansion_test.go | 214 +++++++++++++ internal/importer/archive/rar/aggregator.go | 78 +---- .../importer/archive/sevenzip/aggregator.go | 79 +---- 12 files changed, 1189 insertions(+), 187 deletions(-) create mode 100644 internal/importer/archive/iso/bluray.go create mode 100644 internal/importer/archive/iso/bluray_test.go create mode 100644 internal/importer/archive/iso/mpls.go create mode 100644 internal/importer/archive/iso/mpls_test.go create mode 100644 internal/importer/archive/iso/volume.go create mode 100644 internal/importer/archive/iso/volume_test.go create mode 100644 internal/importer/archive/iso_expansion.go create mode 100644 internal/importer/archive/iso_expansion_test.go diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go new file mode 100644 index 00000000..e2ce4d73 --- /dev/null +++ b/internal/importer/archive/iso/bluray.go @@ -0,0 +1,116 @@ +package iso + +import ( + "io" + "sort" + "strings" +) + +// MainFeaturePlaylist is the result of analysing a Blu-ray's BDMV. +// Streams is the ordered list of M2TS file entries that, concatenated, +// form the main feature; the slice is empty if no parseable playlist +// was found. +type MainFeaturePlaylist struct { + PlaylistName string // e.g. "00800.MPLS" — for logging only + DurationTicks int64 // sum of (OUT-IN) at 45 kHz + Streams []isoFileEntry // ordered M2TS entries +} + +// ResolveMainFeature inspects the entries returned by ListISOFiles for a +// Blu-ray (BDMV) structure and returns the playlist that represents the +// main movie. Returns nil if the disc is not BDMV, has no .mpls, or no +// playlist resolves to a non-empty M2TS sequence. +// +// Selection heuristic: pick the playlist with the longest total +// presentation duration. Ties break on PlayItem count (more clips wins), +// then lexicographically smallest filename for determinism. +// +// Failures parsing individual playlists are non-fatal — we skip them and +// keep evaluating the rest, mirroring how every Blu-ray player tolerates +// malformed entries in BDMV/PLAYLIST/. +func ResolveMainFeature(rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlaylist { + // Build an index of all M2TS streams by their 5-digit clip stem (the + // part MPLS references). M2TS files live at BDMV/STREAM/.M2TS + // case-insensitively. + streamByClip := make(map[string]isoFileEntry) + var playlistEntries []isoFileEntry + for _, f := range files { + up := strings.ToUpper(f.path) + switch { + case strings.HasPrefix(up, "BDMV/PLAYLIST/") && strings.HasSuffix(up, ".MPLS"): + playlistEntries = append(playlistEntries, f) + case strings.HasPrefix(up, "BDMV/STREAM/") && strings.HasSuffix(up, ".M2TS"): + base := up[len("BDMV/STREAM/") : len(up)-len(".M2TS")] + streamByClip[base] = f + } + } + if len(playlistEntries) == 0 || len(streamByClip) == 0 { + return nil + } + + // Deterministic order: shorter filenames (and lexicographic ties) win + // the tie-break later. + sort.Slice(playlistEntries, func(i, j int) bool { + return playlistEntries[i].path < playlistEntries[j].path + }) + + var best *MainFeaturePlaylist + for _, pe := range playlistEntries { + data, err := readISOFile(rs, pe) + if err != nil { + continue + } + pl, err := ParseMPLS(data) + if err != nil { + continue + } + + // Resolve clip names → M2TS entries, in playlist order. + streams := make([]isoFileEntry, 0, len(pl.PlayItems)) + for _, it := range pl.PlayItems { + entry, ok := streamByClip[strings.ToUpper(it.ClipName)] + if !ok { + continue + } + streams = append(streams, entry) + } + if len(streams) == 0 { + continue + } + + cand := &MainFeaturePlaylist{ + PlaylistName: pe.path, + DurationTicks: pl.DurationTicks(), + Streams: streams, + } + if best == nil || isBetterPlaylist(cand, best, len(pl.PlayItems), len(best.Streams)) { + best = cand + } + } + return best +} + +// isBetterPlaylist returns true when cand should replace best. +// Comparison: longer duration > more PlayItems > earlier filename. +// The filename tie-break relies on playlistEntries being sorted before +// iteration so the smaller path is seen first; we therefore only swap +// when strictly better. +func isBetterPlaylist(cand, best *MainFeaturePlaylist, candItems, bestItems int) bool { + if cand.DurationTicks != best.DurationTicks { + return cand.DurationTicks > best.DurationTicks + } + return candItems > bestItems +} + +// readISOFile reads the full contents of one isoFileEntry from rs. +// MPLS files are tiny (~KBs), so a one-shot read is fine. +func readISOFile(rs io.ReadSeeker, e isoFileEntry) ([]byte, error) { + if _, err := rs.Seek(int64(e.lba)*iso9660SectorSize, io.SeekStart); err != nil { + return nil, err + } + buf := make([]byte, e.size) + if _, err := io.ReadFull(rs, buf); err != nil { + return nil, err + } + return buf, nil +} diff --git a/internal/importer/archive/iso/bluray_test.go b/internal/importer/archive/iso/bluray_test.go new file mode 100644 index 00000000..e548f61a --- /dev/null +++ b/internal/importer/archive/iso/bluray_test.go @@ -0,0 +1,122 @@ +package iso + +import ( + "bytes" + "io" + "testing" +) + +// makeImage assembles an in-memory disc image by placing each piece of +// data at the sector index given in its key. The returned reader can be +// used as if it were a real ISO read-seeker. +func makeImage(t *testing.T, pieces map[uint32][]byte) io.ReadSeeker { + t.Helper() + var maxSect uint32 + for s, b := range pieces { + end := s + uint32((len(b)+iso9660SectorSize-1)/iso9660SectorSize) + if end > maxSect { + maxSect = end + } + } + if maxSect == 0 { + maxSect = 1 + } + img := make([]byte, int(maxSect)*iso9660SectorSize) + for s, b := range pieces { + copy(img[int(s)*iso9660SectorSize:], b) + } + return bytes.NewReader(img) +} + +func TestResolveMainFeature(t *testing.T) { + t.Parallel() + + t.Run("picks longest playlist", func(t *testing.T) { + t.Parallel() + // Two playlists: + // 00001.MPLS → 1 clip, short (extras playlist) + // 00800.MPLS → 3 clips, long (main feature) + short := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00010", InTime: 0, OutTime: 45000}, + }, nil) + long := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 90 * 45000}, + {ClipName: "00002", InTime: 0, OutTime: 60 * 45000}, + {ClipName: "00003", InTime: 0, OutTime: 30 * 45000}, + }, nil) + + rs := makeImage(t, map[uint32][]byte{ + 100: short, + 110: long, + }) + + // File listing: two playlists and four M2TS clips (one extra). + files := []isoFileEntry{ + {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: uint64(len(short))}, + {path: "BDMV/PLAYLIST/00800.MPLS", lba: 110, size: uint64(len(long))}, + {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, + {path: "BDMV/STREAM/00002.M2TS", lba: 300, size: 2_000_000}, + {path: "BDMV/STREAM/00003.M2TS", lba: 400, size: 3_000_000}, + {path: "BDMV/STREAM/00010.M2TS", lba: 500, size: 500_000}, + } + + got := ResolveMainFeature(rs, files) + if got == nil { + t.Fatal("ResolveMainFeature returned nil") + } + if got.PlaylistName != "BDMV/PLAYLIST/00800.MPLS" { + t.Errorf("PlaylistName = %q, want 00800.MPLS", got.PlaylistName) + } + if len(got.Streams) != 3 { + t.Fatalf("Streams len = %d, want 3", len(got.Streams)) + } + wantOrder := []string{"BDMV/STREAM/00001.M2TS", "BDMV/STREAM/00002.M2TS", "BDMV/STREAM/00003.M2TS"} + for i, s := range got.Streams { + if s.path != wantOrder[i] { + t.Errorf("Streams[%d].path = %q, want %q", i, s.path, wantOrder[i]) + } + } + }) + + t.Run("non-BDMV disc returns nil", func(t *testing.T) { + t.Parallel() + files := []isoFileEntry{ + {path: "movie.mkv", lba: 100, size: 1_000_000}, + } + if got := ResolveMainFeature(bytes.NewReader(make([]byte, 16*iso9660SectorSize)), files); got != nil { + t.Errorf("expected nil for non-BDMV disc, got %+v", got) + } + }) + + t.Run("BDMV with no parseable MPLS returns nil", func(t *testing.T) { + t.Parallel() + rs := makeImage(t, map[uint32][]byte{ + 100: []byte("not a real mpls"), + }) + files := []isoFileEntry{ + {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: 15}, + {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, + } + if got := ResolveMainFeature(rs, files); got != nil { + t.Errorf("expected nil for unparseable MPLS, got %+v", got) + } + }) + + t.Run("playlist referencing missing M2TS yields nil", func(t *testing.T) { + t.Parallel() + // Playlist references a clip that has no corresponding M2TS entry. + data := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "99999", InTime: 0, OutTime: 45000}, + }, nil) + rs := makeImage(t, map[uint32][]byte{ + 100: data, + }) + files := []isoFileEntry{ + {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: uint64(len(data))}, + {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, + } + if got := ResolveMainFeature(rs, files); got != nil { + t.Errorf("expected nil when MPLS references unknown clip, got %+v", got) + } + }) +} diff --git a/internal/importer/archive/iso/mpls.go b/internal/importer/archive/iso/mpls.go new file mode 100644 index 00000000..141d7a02 --- /dev/null +++ b/internal/importer/archive/iso/mpls.go @@ -0,0 +1,108 @@ +package iso + +import ( + "encoding/binary" + "errors" + "fmt" +) + +// MPLS (Blu-ray PlayList) is a fixed binary format defined by the BDA spec. +// We only parse the fields needed to identify the main feature playlist and +// its ordered list of M2TS clips: the clip_information_file_name for each +// PlayItem and the IN/OUT presentation times used to estimate duration. + +// mplsHeaderSize is the fixed prefix length: 4 magic + 4 version + +// 4 PlayList offset + 4 PlayListMark offset + 4 ExtensionData offset. +const mplsHeaderSize = 20 + +// MPLSPlayItem describes one entry in a PlayList. +type MPLSPlayItem struct { + // ClipName is the 5-character clip_information_file_name (e.g. "00001"). + // The corresponding stream lives at BDMV/STREAM/.M2TS. + ClipName string + // InTime and OutTime are 45 kHz presentation timestamps. Duration in + // ticks is OutTime - InTime; convert to seconds by dividing by 45000. + InTime uint32 + OutTime uint32 +} + +// MPLSPlayList is the parsed view of a single .mpls file. +type MPLSPlayList struct { + Version string // e.g. "0100", "0200", "0300" + PlayItems []MPLSPlayItem +} + +// DurationTicks returns the sum of (OutTime-InTime) across PlayItems in +// 45 kHz ticks. This is the standard proxy for "longest playlist = +// main feature" used by every Blu-ray player. +func (p *MPLSPlayList) DurationTicks() int64 { + var total int64 + for _, it := range p.PlayItems { + if it.OutTime > it.InTime { + total += int64(it.OutTime - it.InTime) + } + } + return total +} + +// ParseMPLS decodes a .mpls file. All multi-byte integers are big-endian +// per the BDA spec. Sub-paths, the STN table, and per-angle alternates +// are skipped — we use each PlayItem's leading length field to advance +// past everything we don't need. +func ParseMPLS(data []byte) (*MPLSPlayList, error) { + if len(data) < mplsHeaderSize { + return nil, errors.New("mpls: truncated header") + } + if string(data[0:4]) != "MPLS" { + return nil, fmt.Errorf("mpls: bad magic %q", data[0:4]) + } + version := string(data[4:8]) + playListOff := binary.BigEndian.Uint32(data[8:12]) + if int(playListOff) < mplsHeaderSize || int(playListOff)+10 > len(data) { + return nil, fmt.Errorf("mpls: PlayList offset %d out of range (file size %d)", playListOff, len(data)) + } + + // PlayList header: length(4) + reserved(2) + numPlayItems(2) + numSubPaths(2) + pl := data[playListOff:] + playListLen := binary.BigEndian.Uint32(pl[0:4]) + if int(playListOff)+4+int(playListLen) > len(data) { + return nil, fmt.Errorf("mpls: PlayList length %d exceeds file size", playListLen) + } + numPlayItems := binary.BigEndian.Uint16(pl[6:8]) + + items := make([]MPLSPlayItem, 0, numPlayItems) + // PlayItems start after the 10-byte PlayList header. + cursor := 10 + plBody := pl[:4+int(playListLen)] + for i := range int(numPlayItems) { + if cursor+2 > len(plBody) { + return nil, fmt.Errorf("mpls: PlayItem %d header out of range", i) + } + // PlayItem length excludes the 2-byte length field itself. + itemLen := int(binary.BigEndian.Uint16(plBody[cursor : cursor+2])) + itemStart := cursor + 2 + itemEnd := itemStart + itemLen + if itemEnd > len(plBody) { + return nil, fmt.Errorf("mpls: PlayItem %d length %d overruns PlayList", i, itemLen) + } + // Fixed PlayItem layout we care about: + // +0 5 clip_information_file_name (e.g. "00001") + // +5 4 clip_codec_identifier ("M2TS") + // +9 2 flags incl. is_multi_angle / connection_condition + // +11 1 ref_to_STC_id + // +12 4 IN_time (45 kHz) + // +16 4 OUT_time (45 kHz) + if itemLen < 20 { + return nil, fmt.Errorf("mpls: PlayItem %d too short (len=%d)", i, itemLen) + } + body := plBody[itemStart:itemEnd] + items = append(items, MPLSPlayItem{ + ClipName: string(body[0:5]), + InTime: binary.BigEndian.Uint32(body[12:16]), + OutTime: binary.BigEndian.Uint32(body[16:20]), + }) + cursor = itemEnd + } + + return &MPLSPlayList{Version: version, PlayItems: items}, nil +} diff --git a/internal/importer/archive/iso/mpls_test.go b/internal/importer/archive/iso/mpls_test.go new file mode 100644 index 00000000..0df3b4da --- /dev/null +++ b/internal/importer/archive/iso/mpls_test.go @@ -0,0 +1,167 @@ +package iso + +import ( + "encoding/binary" + "testing" +) + +// buildMPLS constructs a synthetic .mpls byte stream containing the given +// PlayItems. Each PlayItem is laid out at its minimum legal size (20 bytes +// body + 2-byte length prefix). multiAngleTail, when non-nil, is appended +// inside the first PlayItem to exercise the length-prefixed skip logic. +func buildMPLS(t *testing.T, version string, items []MPLSPlayItem, multiAngleTail []byte) []byte { + t.Helper() + if len(version) != 4 { + t.Fatalf("version must be 4 bytes, got %q", version) + } + + // Build PlayItems body. + var playItemsBuf []byte + for i, it := range items { + if len(it.ClipName) != 5 { + t.Fatalf("item %d: ClipName must be 5 chars", i) + } + body := make([]byte, 20) + copy(body[0:5], it.ClipName) + copy(body[5:9], "M2TS") + // flags (2) + ref_to_STC_id (1) left zero + binary.BigEndian.PutUint32(body[12:16], it.InTime) + binary.BigEndian.PutUint32(body[16:20], it.OutTime) + // Inject the multi-angle tail into the first item only — the parser + // must skip past it via the length field without misaligning the + // next item. + if i == 0 && multiAngleTail != nil { + body = append(body, multiAngleTail...) + } + // PlayItem length excludes its own 2-byte length prefix. + lenPrefix := make([]byte, 2) + binary.BigEndian.PutUint16(lenPrefix, uint16(len(body))) + playItemsBuf = append(playItemsBuf, lenPrefix...) + playItemsBuf = append(playItemsBuf, body...) + } + + // PlayList header: length(4)+reserved(2)+numPI(2)+numSub(2)+playItems + plHeader := make([]byte, 10) + // length excludes its own 4-byte field + binary.BigEndian.PutUint32(plHeader[0:4], uint32(6+len(playItemsBuf))) + binary.BigEndian.PutUint16(plHeader[6:8], uint16(len(items))) + // numSubPaths left zero + + playList := append(plHeader, playItemsBuf...) + + // File header: 4 magic + 4 version + 4 PL offset + 4 PLMark + 4 ExtData + hdr := make([]byte, mplsHeaderSize) + copy(hdr[0:4], "MPLS") + copy(hdr[4:8], version) + binary.BigEndian.PutUint32(hdr[8:12], uint32(mplsHeaderSize)) + // PlayListMark & ExtensionData offsets unused; leave zero. + + return append(hdr, playList...) +} + +func TestParseMPLS(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + data []byte + wantErr bool + wantItems []MPLSPlayItem + wantTicks int64 + }{ + { + name: "single PlayItem", + data: buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 1000, OutTime: 91000}, + }, nil), + wantItems: []MPLSPlayItem{{ClipName: "00001", InTime: 1000, OutTime: 91000}}, + wantTicks: 90000, // 2s at 45kHz + }, + { + name: "five PlayItems (main feature shape)", + data: buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 45000}, + {ClipName: "00003", InTime: 0, OutTime: 45000}, + {ClipName: "00004", InTime: 0, OutTime: 45000}, + {ClipName: "00005", InTime: 0, OutTime: 45000}, + }, nil), + wantItems: []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 45000}, + {ClipName: "00003", InTime: 0, OutTime: 45000}, + {ClipName: "00004", InTime: 0, OutTime: 45000}, + {ClipName: "00005", InTime: 0, OutTime: 45000}, + }, + wantTicks: 5 * 45000, + }, + { + name: "multi-angle PlayItem (tail must be skipped)", + // The tail simulates angle-count + alt-angle records appended + // after the fixed PlayItem prefix. The parser only consumes the + // first 20 bytes and uses the length field to skip past the + // rest, so item 2 must still parse cleanly. + data: buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 90000}, + }, []byte{ + 0x02, // num_angles + 0x00, // is_different_audios flags + '0', '0', '0', '0', '7', 'M', '2', 'T', 'S', 0x00, // one alt angle entry (10 bytes) + }), + wantItems: []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 90000}, + }, + wantTicks: 45000 + 90000, + }, + { + name: "wrong magic", + data: []byte("NOTMPLS-padding-here-padding-here"), + wantErr: true, + }, + { + name: "truncated header", + data: []byte("MPLS"), + wantErr: true, + }, + { + name: "PlayList offset out of range", + data: func() []byte { + b := make([]byte, mplsHeaderSize) + copy(b[0:4], "MPLS") + copy(b[4:8], "0200") + binary.BigEndian.PutUint32(b[8:12], 9999) + return b + }(), + wantErr: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got, err := ParseMPLS(tc.data) + if tc.wantErr { + if err == nil { + t.Fatalf("expected error, got nil") + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(got.PlayItems) != len(tc.wantItems) { + t.Fatalf("PlayItems len = %d, want %d", len(got.PlayItems), len(tc.wantItems)) + } + for i, it := range got.PlayItems { + if it != tc.wantItems[i] { + t.Errorf("PlayItem[%d] = %+v, want %+v", i, it, tc.wantItems[i]) + } + } + if d := got.DurationTicks(); d != tc.wantTicks { + t.Errorf("DurationTicks = %d, want %d", d, tc.wantTicks) + } + }) + } +} diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index 10c5fd7a..340b4acc 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -11,61 +11,80 @@ import ( "github.com/javi11/altmount/internal/pool" ) -// AnalyzeISOContent enumerates all allowed media files inside the given ISO source -// and returns ISOFileContent entries with Usenet segment mappings. -func AnalyzeISOContent( +// AnalyzeISO inspects the given ISO source and returns: +// - the volume label (for multi-disc grouping), +// - the filtered list of inner files (Files), +// - the ordered MainFeature M2TS list when the ISO is a Blu-ray with a +// resolvable playlist (nil otherwise). +// +// allowedExtensions only filters Files. MainFeature is always returned for +// BDMV discs regardless of the extension list — its existence is the +// signal callers use to opt into virtual concatenation. +func AnalyzeISO( ctx context.Context, src ISOSource, poolManager pool.Manager, maxPrefetch int, readTimeout time.Duration, allowedExtensions []string, -) ([]ISOFileContent, error) { +) (*AnalyzedISO, error) { rs, closer, err := NewISOReadSeeker(ctx, src, poolManager, maxPrefetch, readTimeout) if err != nil { return nil, fmt.Errorf("iso: creating read seeker for %q: %w", src.Filename, err) } defer closer.Close() - files, err := ListISOFiles(rs) + entries, err := ListISOFiles(rs) if err != nil { return nil, fmt.Errorf("iso: listing files in %q: %w", src.Filename, err) } - var result []ISOFileContent - for _, entry := range files { - if !isAllowedFile(entry.path, int64(entry.size), allowedExtensions) { + out := &AnalyzedISO{VolumeLabel: ReadVolumeLabel(rs)} + + for _, e := range entries { + if !isAllowedFile(e.path, int64(e.size), allowedExtensions) { continue } + out.Files = append(out.Files, buildFileContent(src, e)) + } - isoOffset := int64(entry.lba) * iso9660SectorSize - - fc := ISOFileContent{ - InternalPath: entry.path, - Filename: filepath.Base(entry.path), - Size: int64(entry.size), + if mf := ResolveMainFeature(rs, entries); mf != nil { + out.DurationTicks = mf.DurationTicks + for _, e := range mf.Streams { + out.MainFeature = append(out.MainFeature, buildFileContent(src, e)) } + } - if len(src.AesKey) == 0 { - // Unencrypted: slice segments to cover exactly this file's bytes - sliced, _ := sliceSegmentsForRange(src.Segments, isoOffset, int64(entry.size)) - fc.Segments = sliced - } else { - // Encrypted: create a NestedSource so the VFS can decrypt and seek - fc.NestedSource = &ISONestedSource{ - Segments: src.Segments, - AesKey: src.AesKey, - AesIV: src.AesIV, - InnerOffset: isoOffset, - InnerLength: int64(entry.size), - InnerVolumeSize: src.Size, - } - } + return out, nil +} - result = append(result, fc) +// buildFileContent turns one ISO directory entry into an ISOFileContent, +// slicing or referencing the source's Usenet segments according to whether +// the ISO is encrypted. +func buildFileContent(src ISOSource, e isoFileEntry) ISOFileContent { + isoOffset := int64(e.lba) * iso9660SectorSize + fc := ISOFileContent{ + InternalPath: e.path, + Filename: filepath.Base(e.path), + Size: int64(e.size), } - - return result, nil + if len(src.AesKey) == 0 { + // Unencrypted: pre-slice segments so this content stands alone. + sliced, _ := sliceSegmentsForRange(src.Segments, isoOffset, int64(e.size)) + fc.Segments = sliced + } else { + // Encrypted: AES-CBC requires the full inner volume + offset so + // the cipher can chain IVs from the start of the ISO. + fc.NestedSource = &ISONestedSource{ + Segments: src.Segments, + AesKey: src.AesKey, + AesIV: src.AesIV, + InnerOffset: isoOffset, + InnerLength: int64(e.size), + InnerVolumeSize: src.Size, + } + } + return fc } // isAllowedFile returns true if the file extension is in the allowed list. diff --git a/internal/importer/archive/iso/types.go b/internal/importer/archive/iso/types.go index 53e51467..b755fe1e 100644 --- a/internal/importer/archive/iso/types.go +++ b/internal/importer/archive/iso/types.go @@ -33,3 +33,16 @@ type ISONestedSource struct { InnerLength int64 // file size InnerVolumeSize int64 // ISO total decrypted size } + +// AnalyzedISO is the full result of inspecting one ISO image. Files mirrors +// what AnalyzeISOContent has always returned (all media files with extension +// filtering applied). MainFeature, when non-nil, is the ordered M2TS list +// that forms the Blu-ray main feature according to BDMV/PLAYLIST/*.mpls — +// this is the slice callers should concatenate to produce a single playable +// virtual file. +type AnalyzedISO struct { + VolumeLabel string + Files []ISOFileContent + MainFeature []ISOFileContent // nil for non-BDMV / unparseable playlists + DurationTicks int64 // sum of (OUT-IN) of MainFeature at 45 kHz +} diff --git a/internal/importer/archive/iso/volume.go b/internal/importer/archive/iso/volume.go new file mode 100644 index 00000000..f2db5657 --- /dev/null +++ b/internal/importer/archive/iso/volume.go @@ -0,0 +1,30 @@ +package iso + +import ( + "io" + "strings" +) + +// ReadVolumeLabel returns the ISO 9660 Volume Identifier from the Primary +// Volume Descriptor at sector 16. Hybrid Blu-ray discs always carry a +// 9660 PVD even when the active filesystem is UDF, so this works for both +// plain ISOs and BD images. +// +// Returns an empty string if the descriptor is missing or invalid — callers +// fall back to the ISO filename for disc-group keying. +func ReadVolumeLabel(rs io.ReadSeeker) string { + pvd := make([]byte, iso9660SectorSize) + if _, err := rs.Seek(16*iso9660SectorSize, io.SeekStart); err != nil { + return "" + } + if _, err := io.ReadFull(rs, pvd); err != nil { + return "" + } + // Type 1 = Primary Volume Descriptor; identifier "CD001" at +1. + if pvd[0] != 1 || string(pvd[1:6]) != "CD001" { + return "" + } + // Volume identifier: 32 bytes of a-characters at offset 40, space-padded. + label := strings.TrimRight(string(pvd[40:72]), " \x00") + return label +} diff --git a/internal/importer/archive/iso/volume_test.go b/internal/importer/archive/iso/volume_test.go new file mode 100644 index 00000000..f8aeac1a --- /dev/null +++ b/internal/importer/archive/iso/volume_test.go @@ -0,0 +1,70 @@ +package iso + +import ( + "bytes" + "io" + "testing" +) + +// buildPVD constructs a 17-sector buffer with a synthetic Primary Volume +// Descriptor placed at sector 16. The remaining bytes are zero-filled. +func buildPVD(label string, typeCode byte, identifier string) io.ReadSeeker { + buf := make([]byte, 17*iso9660SectorSize) + pvd := buf[16*iso9660SectorSize:] + pvd[0] = typeCode + copy(pvd[1:6], identifier) + // Volume identifier field is 32 bytes, space-padded. + field := make([]byte, 32) + for i := range field { + field[i] = ' ' + } + copy(field, label) + copy(pvd[40:72], field) + return bytes.NewReader(buf) +} + +func TestReadVolumeLabel(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + rs io.ReadSeeker + want string + }{ + { + name: "Avatar disc 1 label", + rs: buildPVD("AVATAR_FIRE_AND_ASH_DISC_1", 1, "CD001"), + want: "AVATAR_FIRE_AND_ASH_DISC_1", + }, + { + name: "padded short label trimmed", + rs: buildPVD("FOO", 1, "CD001"), + want: "FOO", + }, + { + name: "wrong type code", + rs: buildPVD("ANYTHING", 2, "CD001"), + want: "", + }, + { + name: "wrong identifier", + rs: buildPVD("ANYTHING", 1, "BAD!?"), + want: "", + }, + { + name: "short input (no sector 16)", + rs: bytes.NewReader(make([]byte, 1024)), + want: "", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got := ReadVolumeLabel(tc.rs) + if got != tc.want { + t.Errorf("ReadVolumeLabel = %q, want %q", got, tc.want) + } + }) + } +} diff --git a/internal/importer/archive/iso_expansion.go b/internal/importer/archive/iso_expansion.go new file mode 100644 index 00000000..daa8e4fc --- /dev/null +++ b/internal/importer/archive/iso_expansion.go @@ -0,0 +1,296 @@ +package archive + +import ( + "context" + "fmt" + "log/slog" + "path/filepath" + "regexp" + "sort" + "strconv" + "strings" + "time" + + "github.com/javi11/altmount/internal/importer/archive/iso" + "github.com/javi11/altmount/internal/pool" +) + +// analyzedISO bundles an ISO Content with its inspection result and its +// place in a multi-disc grouping. Used internally by ExpandISOContents. +type analyzedISO struct { + src Content // original ISO Content (for fallback / metadata) + analyzed *iso.AnalyzedISO // result of iso.AnalyzeISO + discNum int // parsed disc number; 0 when label has no disc suffix + groupKey string // base name stripped of any DISC/CD/PART suffix +} + +// ExpandISOContents replaces .iso entries in contents with the media they +// contain, applying two Blu-ray-aware optimisations on top of the legacy +// "pick the largest file" behaviour: +// +// 1. Within a disc, if BDMV/PLAYLIST/*.mpls identifies a main feature +// spanning multiple M2TS clips, the clips are virtually concatenated +// into one Content via NestedSources — the player sees a single file. +// 2. Across discs in the same archive group (e.g. DISC_1 and DISC_2 ISOs +// in one NZB), discs sharing a stripped volume label are merged so +// the cross-disc movie also plays as one file. +// +// Non-ISO entries pass through unchanged. Per-ISO errors are non-fatal: +// on failure the original .iso Content is kept so downstream still has +// something to work with. +func ExpandISOContents( + ctx context.Context, + expand bool, + contents []Content, + poolManager pool.Manager, + maxPrefetch int, + readTimeout time.Duration, + allowedExtensions []string, +) ([]Content, error) { + if !expand { + return contents, nil + } + + var ( + result []Content + groups = make(map[string][]analyzedISO) + groupKeys []string + ) + + for _, c := range contents { + if c.IsDirectory || strings.ToLower(filepath.Ext(c.Filename)) != ".iso" { + result = append(result, c) + continue + } + + src := iso.ISOSource{ + Filename: c.Filename, + Segments: c.Segments, + AesKey: c.AesKey, + AesIV: c.AesIV, + Size: c.Size, + } + a, err := iso.AnalyzeISO(ctx, src, poolManager, maxPrefetch, readTimeout, allowedExtensions) + if err != nil { + slog.WarnContext(ctx, "Failed to analyze ISO content, keeping ISO as-is", + "file", c.Filename, "error", err) + result = append(result, c) + continue + } + if len(a.Files) == 0 && len(a.MainFeature) == 0 { + result = append(result, c) + continue + } + + key, discNum := discGroupKey(a.VolumeLabel, c.Filename) + entry := analyzedISO{src: c, analyzed: a, discNum: discNum, groupKey: key} + if _, exists := groups[key]; !exists { + groupKeys = append(groupKeys, key) + } + groups[key] = append(groups[key], entry) + } + + sort.Strings(groupKeys) // deterministic output order + for _, key := range groupKeys { + g := groups[key] + sort.SliceStable(g, func(i, j int) bool { return g[i].discNum < g[j].discNum }) + + // Concatenate main features only when *every* member of the group + // has one — mixing BDMV and non-BDMV in a single group is almost + // always a false grouping, so fall back to per-disc handling. + allHaveMainFeature := true + for _, e := range g { + if len(e.analyzed.MainFeature) == 0 { + allHaveMainFeature = false + break + } + } + + if allHaveMainFeature { + merged, ok := buildMainFeatureContent(ctx, key, g) + if ok { + result = append(result, merged) + continue + } + } + + // Fallback: legacy per-ISO largest-file selection. + for _, e := range g { + nc, ok := buildLargestFileContent(e.src, e.analyzed.Files) + if !ok { + result = append(result, e.src) + continue + } + result = append(result, nc) + } + } + + return result, nil +} + +// buildMainFeatureContent concatenates every member's MainFeature into a +// single Content whose NestedSources chain spans every M2TS in disc and +// playlist order. Returns (zero, false) when, after conversion, the chain +// is empty. +func buildMainFeatureContent(ctx context.Context, groupKey string, g []analyzedISO) (Content, bool) { + var ( + sources []NestedSource + totalSize int64 + firstISOName string + nzbdavID string + ) + for _, e := range g { + if firstISOName == "" { + firstISOName = e.src.Filename + nzbdavID = e.src.NzbdavID + } + for _, fc := range e.analyzed.MainFeature { + ns := isoFileContentToNestedSource(fc) + if ns.InnerLength <= 0 { + continue + } + sources = append(sources, ns) + totalSize += ns.InnerLength + } + } + if len(sources) == 0 { + return Content{}, false + } + + filename := mainFeatureFilename(groupKey, firstISOName) + slog.InfoContext(ctx, "Built Blu-ray main-feature virtual file", + "group", groupKey, + "discs", len(g), + "clips", len(sources), + "size_bytes", totalSize, + "filename", filename, + ) + + return Content{ + InternalPath: filename, + Filename: filename, + Size: totalSize, + PackedSize: totalSize, + NzbdavID: nzbdavID, + NestedSources: sources, + ISOExpansionIndex: 1, + }, true +} + +// buildLargestFileContent reproduces the pre-existing "pick the single +// biggest file inside the ISO" behaviour. Kept as a fallback for ISOs +// that have no BDMV main feature. +func buildLargestFileContent(src Content, files []iso.ISOFileContent) (Content, bool) { + if len(files) == 0 { + return Content{}, false + } + sort.Slice(files, func(i, j int) bool { return files[i].Size > files[j].Size }) + f := files[0] + nc := Content{ + InternalPath: f.InternalPath, + Filename: f.Filename, + Size: f.Size, + PackedSize: f.Size, + NzbdavID: src.NzbdavID, + ISOExpansionIndex: 1, + } + if f.NestedSource != nil { + nc.NestedSources = []NestedSource{isoFileContentToNestedSource(f)} + } else { + nc.Segments = f.Segments + } + return nc, true +} + +// isoFileContentToNestedSource converts an ISOFileContent into a +// NestedSource. For unencrypted ISOs the segments are already sliced to +// cover exactly this file, so InnerOffset is 0 and InnerVolumeSize equals +// the file size (unused when AesKey is empty — see +// MetadataVirtualFile.createNestedSourceReader). +func isoFileContentToNestedSource(fc iso.ISOFileContent) NestedSource { + if fc.NestedSource != nil { + return NestedSource{ + Segments: fc.NestedSource.Segments, + AesKey: fc.NestedSource.AesKey, + AesIV: fc.NestedSource.AesIV, + InnerOffset: fc.NestedSource.InnerOffset, + InnerLength: fc.NestedSource.InnerLength, + InnerVolumeSize: fc.NestedSource.InnerVolumeSize, + } + } + return NestedSource{ + Segments: fc.Segments, + InnerOffset: 0, + InnerLength: fc.Size, + InnerVolumeSize: fc.Size, + } +} + +// discSuffixPattern matches volume labels like "AVATAR_FIRE_AND_ASH_DISC_1", +// "MOVIE-CD2", "TITLE PART 3", etc. Capture 1 is the stripped base name, +// capture 2 is the disc identifier (numeric or single letter). +var discSuffixPattern = regexp.MustCompile(`(?i)^(.+?)[ _\-]*(?:disc|cd|part|d|side)[ _\-]*([0-9]+|[a-z])$`) + +// discGroupKey computes the disc-grouping key and parsed disc number for +// an ISO. It prefers the volume label and falls back to the ISO filename +// (without extension) when the label is empty or doesn't match a disc +// pattern. Single-disc ISOs return key=, discNum=0. +func discGroupKey(label, isoFilename string) (string, int) { + candidates := []string{label} + if isoFilename != "" { + candidates = append(candidates, strings.TrimSuffix(isoFilename, filepath.Ext(isoFilename))) + } + for _, c := range candidates { + c = strings.TrimSpace(c) + if c == "" { + continue + } + if m := discSuffixPattern.FindStringSubmatch(c); m != nil { + base := normaliseGroupKey(m[1]) + return base, parseDiscNumber(m[2]) + } + } + for _, c := range candidates { + c = strings.TrimSpace(c) + if c != "" { + return normaliseGroupKey(c), 0 + } + } + return "", 0 +} + +func normaliseGroupKey(s string) string { + s = strings.TrimSpace(s) + s = strings.Trim(s, "_- ") + return strings.ToUpper(s) +} + +// parseDiscNumber turns "1" → 1, "2" → 2, "A" → 1, "B" → 2, etc. +func parseDiscNumber(s string) int { + if n, err := strconv.Atoi(s); err == nil { + return n + } + if len(s) == 1 { + c := strings.ToUpper(s)[0] + if c >= 'A' && c <= 'Z' { + return int(c-'A') + 1 + } + } + return 0 +} + +// mainFeatureFilename derives a sensible filename for the virtual concat. +// Downstream renaming (see rar/sevenzip aggregator post-processing) will +// usually replace the base name with the NZB release name; we only need a +// valid .m2ts extension here. +func mainFeatureFilename(groupKey, isoFilename string) string { + const ext = ".m2ts" + if groupKey != "" { + return fmt.Sprintf("%s%s", groupKey, ext) + } + if isoFilename != "" { + stem := strings.TrimSuffix(isoFilename, filepath.Ext(isoFilename)) + return stem + ext + } + return "main_feature" + ext +} diff --git a/internal/importer/archive/iso_expansion_test.go b/internal/importer/archive/iso_expansion_test.go new file mode 100644 index 00000000..e009e615 --- /dev/null +++ b/internal/importer/archive/iso_expansion_test.go @@ -0,0 +1,214 @@ +package archive + +import ( + "context" + "testing" + + "github.com/javi11/altmount/internal/importer/archive/iso" + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +func TestDiscGroupKey(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + label string + filename string + wantKey string + wantNum int + }{ + {"avatar disc 1 label", "AVATAR_FIRE_AND_ASH_DISC_1", "any.iso", "AVATAR_FIRE_AND_ASH", 1}, + {"avatar disc 2 label", "AVATAR_FIRE_AND_ASH_DISC_2", "any.iso", "AVATAR_FIRE_AND_ASH", 2}, + {"compact DISC2", "MOVIE_DISC2", "any.iso", "MOVIE", 2}, + {"CD suffix", "MOVIE-CD1", "any.iso", "MOVIE", 1}, + {"PART suffix with spaces", "TITLE PART 3", "any.iso", "TITLE", 3}, + {"letter disc identifier B → 2", "FOO_DISC_B", "any.iso", "FOO", 2}, + {"no suffix → solo", "PLAIN_MOVIE", "any.iso", "PLAIN_MOVIE", 0}, + {"empty label falls back to filename stem", "", "MyMovie_Disc_1.iso", "MYMOVIE", 1}, + {"empty label and weird filename", "", "thing.iso", "THING", 0}, + {"only label has disc, filename plain", "X_DISC_2", "anything.iso", "X", 2}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + gotKey, gotNum := discGroupKey(tc.label, tc.filename) + if gotKey != tc.wantKey || gotNum != tc.wantNum { + t.Errorf("discGroupKey(%q,%q) = (%q,%d), want (%q,%d)", + tc.label, tc.filename, gotKey, gotNum, tc.wantKey, tc.wantNum) + } + }) + } +} + +func TestParseDiscNumber(t *testing.T) { + t.Parallel() + + cases := map[string]int{ + "1": 1, + "2": 2, + "10": 10, + "A": 1, + "a": 1, + "B": 2, + "": 0, + "AB": 0, + "foo": 0, + } + for in, want := range cases { + if got := parseDiscNumber(in); got != want { + t.Errorf("parseDiscNumber(%q) = %d, want %d", in, got, want) + } + } +} + +func TestIsoFileContentToNestedSource(t *testing.T) { + t.Parallel() + + t.Run("unencrypted uses pre-sliced segments", func(t *testing.T) { + t.Parallel() + segs := []*metapb.SegmentData{ + {Id: "a", StartOffset: 0, EndOffset: 99, SegmentSize: 100}, + } + fc := iso.ISOFileContent{ + Filename: "00001.m2ts", + Size: 100, + Segments: segs, + } + ns := isoFileContentToNestedSource(fc) + if len(ns.Segments) != 1 || ns.InnerLength != 100 || ns.InnerOffset != 0 { + t.Fatalf("unexpected NestedSource: %+v", ns) + } + if len(ns.AesKey) != 0 { + t.Errorf("AesKey should be empty, got %v", ns.AesKey) + } + }) + + t.Run("encrypted carries offset and key", func(t *testing.T) { + t.Parallel() + segs := []*metapb.SegmentData{ + {Id: "outer", StartOffset: 0, EndOffset: 99999, SegmentSize: 100000}, + } + fc := iso.ISOFileContent{ + Filename: "00001.m2ts", + Size: 2048, + NestedSource: &iso.ISONestedSource{ + Segments: segs, + AesKey: []byte("0123456789abcdef0123456789abcdef"), + AesIV: []byte("0123456789abcdef"), + InnerOffset: 1024, + InnerLength: 2048, + InnerVolumeSize: 99999, + }, + } + ns := isoFileContentToNestedSource(fc) + if ns.InnerOffset != 1024 || ns.InnerLength != 2048 || ns.InnerVolumeSize != 99999 { + t.Fatalf("unexpected NestedSource offsets: %+v", ns) + } + if len(ns.AesKey) == 0 { + t.Error("AesKey should be carried through for encrypted source") + } + }) +} + +func TestBuildMainFeatureContent_TwoDiscs(t *testing.T) { + t.Parallel() + + // Helper to make a fake ISO main-feature ISOFileContent with given size + // and a single-segment outer slice (segment values are not interpreted + // by buildMainFeatureContent — only Size and the source attributes + // matter for the assembled NestedSources chain). + mkClip := func(name string, size int64) iso.ISOFileContent { + return iso.ISOFileContent{ + Filename: name, + Size: size, + Segments: []*metapb.SegmentData{ + {Id: name, StartOffset: 0, EndOffset: size - 1, SegmentSize: size}, + }, + } + } + + disc1 := analyzedISO{ + src: Content{Filename: "AVATAR_DISC_1.iso", NzbdavID: "nzb-1"}, + analyzed: &iso.AnalyzedISO{ + VolumeLabel: "AVATAR_DISC_1", + MainFeature: []iso.ISOFileContent{ + mkClip("00001.m2ts", 10_000_000), + mkClip("00002.m2ts", 20_000_000), + }, + }, + discNum: 1, + groupKey: "AVATAR", + } + disc2 := analyzedISO{ + src: Content{Filename: "AVATAR_DISC_2.iso", NzbdavID: "nzb-2"}, + analyzed: &iso.AnalyzedISO{ + VolumeLabel: "AVATAR_DISC_2", + MainFeature: []iso.ISOFileContent{ + mkClip("00003.m2ts", 30_000_000), + }, + }, + discNum: 2, + groupKey: "AVATAR", + } + + got, ok := buildMainFeatureContent(context.Background(), "AVATAR", []analyzedISO{disc1, disc2}) + if !ok { + t.Fatal("buildMainFeatureContent returned ok=false") + } + if got.ISOExpansionIndex != 1 { + t.Errorf("ISOExpansionIndex = %d, want 1", got.ISOExpansionIndex) + } + if got.NzbdavID != "nzb-1" { + t.Errorf("NzbdavID = %q, want nzb-1 (from first disc)", got.NzbdavID) + } + if len(got.NestedSources) != 3 { + t.Fatalf("NestedSources count = %d, want 3 (2 clips from disc 1 + 1 clip from disc 2)", len(got.NestedSources)) + } + wantSize := int64(10_000_000 + 20_000_000 + 30_000_000) + if got.Size != wantSize { + t.Errorf("Size = %d, want %d", got.Size, wantSize) + } + if got.PackedSize != wantSize { + t.Errorf("PackedSize = %d, want %d", got.PackedSize, wantSize) + } + // Order must follow disc-then-playlist (disc1.clip1, disc1.clip2, disc2.clip3). + wantOrder := []int64{10_000_000, 20_000_000, 30_000_000} + for i, ns := range got.NestedSources { + if ns.InnerLength != wantOrder[i] { + t.Errorf("NestedSources[%d].InnerLength = %d, want %d", i, ns.InnerLength, wantOrder[i]) + } + } + if got.Filename != "AVATAR.m2ts" { + t.Errorf("Filename = %q, want AVATAR.m2ts", got.Filename) + } +} + +func TestBuildLargestFileContent(t *testing.T) { + t.Parallel() + + files := []iso.ISOFileContent{ + {Filename: "small.mkv", Size: 500, Segments: []*metapb.SegmentData{ + {Id: "s", StartOffset: 0, EndOffset: 499, SegmentSize: 500}, + }}, + {Filename: "big.mkv", Size: 5_000_000, Segments: []*metapb.SegmentData{ + {Id: "b", StartOffset: 0, EndOffset: 4_999_999, SegmentSize: 5_000_000}, + }}, + } + src := Content{Filename: "thing.iso", NzbdavID: "id-1"} + + got, ok := buildLargestFileContent(src, files) + if !ok { + t.Fatal("buildLargestFileContent returned ok=false") + } + if got.Filename != "big.mkv" { + t.Errorf("Filename = %q, want big.mkv (largest)", got.Filename) + } + if got.ISOExpansionIndex != 1 { + t.Errorf("ISOExpansionIndex = %d, want 1", got.ISOExpansionIndex) + } + if got.NzbdavID != "id-1" { + t.Errorf("NzbdavID = %q, want id-1", got.NzbdavID) + } +} diff --git a/internal/importer/archive/rar/aggregator.go b/internal/importer/archive/rar/aggregator.go index 52f8d487..91138405 100644 --- a/internal/importer/archive/rar/aggregator.go +++ b/internal/importer/archive/rar/aggregator.go @@ -16,7 +16,6 @@ import ( "github.com/javi11/altmount/internal/encryption/aes" "github.com/javi11/altmount/internal/importer/archive" - "github.com/javi11/altmount/internal/importer/archive/iso" "github.com/javi11/altmount/internal/importer/filesystem" "github.com/javi11/altmount/internal/importer/parser" "github.com/javi11/altmount/internal/importer/utils" @@ -209,7 +208,7 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { } // Expand ISO files found inside the RAR archive into their inner media files - rarContents, err := expandISOContents(ctx, expandBlurayIso, rarContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) + rarContents, err := archive.ExpandISOContents(ctx, expandBlurayIso, rarContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) if err != nil { slog.WarnContext(ctx, "ISO expansion failed, proceeding without ISO contents", "error", err) } @@ -474,81 +473,6 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { return nil } -// expandISOContents replaces any .iso Content entries with the media files found -// inside them. Non-ISO entries are passed through unchanged. Per-file errors are -// non-fatal: on failure the original ISO Content is kept. -func expandISOContents( - ctx context.Context, - expand bool, - contents []Content, - poolManager pool.Manager, - maxPrefetch int, - readTimeout time.Duration, - allowedExtensions []string, -) ([]Content, error) { - if !expand { - return contents, nil - } - var result []Content - for _, c := range contents { - if c.IsDirectory || strings.ToLower(filepath.Ext(c.Filename)) != ".iso" { - result = append(result, c) - continue - } - - src := iso.ISOSource{ - Filename: c.Filename, - Segments: c.Segments, - AesKey: c.AesKey, - AesIV: c.AesIV, - Size: c.Size, - } - - isoFiles, err := iso.AnalyzeISOContent(ctx, src, poolManager, maxPrefetch, readTimeout, allowedExtensions) - if err != nil { - slog.WarnContext(ctx, "Failed to analyze ISO content, keeping ISO as-is", - "file", c.Filename, "error", err) - result = append(result, c) - continue - } - - if len(isoFiles) == 0 { - result = append(result, c) - continue - } - - // Sort ISO files by size descending so the largest (main feature) gets index 1. - sort.Slice(isoFiles, func(i, j int) bool { - return isoFiles[i].Size > isoFiles[j].Size - }) - - // Keep only the largest file (index 0 after sort); discard smaller streams. - f := isoFiles[0] - nc := Content{ - InternalPath: f.InternalPath, - Filename: f.Filename, - Size: f.Size, - PackedSize: f.Size, // raw ISO data — packed == unpacked - NzbdavID: c.NzbdavID, - ISOExpansionIndex: 1, - } - if f.NestedSource != nil { - nc.NestedSources = []NestedSource{{ - Segments: f.NestedSource.Segments, - AesKey: f.NestedSource.AesKey, - AesIV: f.NestedSource.AesIV, - InnerOffset: f.NestedSource.InnerOffset, - InnerLength: f.NestedSource.InnerLength, - InnerVolumeSize: f.NestedSource.InnerVolumeSize, - }} - } else { - nc.Segments = f.Segments - } - result = append(result, nc) - } - return result, nil -} - // GroupArchivesByBaseName groups ParsedFiles by their RAR base name (case-insensitive). // Returns groups in deterministic order (sorted by base name) for testability. func GroupArchivesByBaseName(files []parser.ParsedFile) [][]parser.ParsedFile { diff --git a/internal/importer/archive/sevenzip/aggregator.go b/internal/importer/archive/sevenzip/aggregator.go index f0214a29..4fbabce9 100644 --- a/internal/importer/archive/sevenzip/aggregator.go +++ b/internal/importer/archive/sevenzip/aggregator.go @@ -6,7 +6,6 @@ import ( "log/slog" "os" "path/filepath" - "sort" "strings" "sync/atomic" "time" @@ -14,7 +13,6 @@ import ( concpool "github.com/sourcegraph/conc/pool" "github.com/javi11/altmount/internal/importer/archive" - "github.com/javi11/altmount/internal/importer/archive/iso" "github.com/javi11/altmount/internal/importer/filesystem" "github.com/javi11/altmount/internal/importer/parser" "github.com/javi11/altmount/internal/importer/utils" @@ -186,7 +184,7 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { slog.InfoContext(ctx, "Successfully analyzed 7zip archive content", "files_in_archive", len(sevenZipContents)) // Expand ISO files found inside the 7zip archive into their inner media files - sevenZipContents, err = expandISOContents(ctx, expandBlurayIso, sevenZipContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) + sevenZipContents, err = archive.ExpandISOContents(ctx, expandBlurayIso, sevenZipContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) if err != nil { slog.WarnContext(ctx, "ISO expansion failed, proceeding without ISO contents", "error", err) } @@ -445,81 +443,6 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { return nil } -// expandISOContents replaces any .iso Content entries with the media files found -// inside them. Non-ISO entries are passed through unchanged. Per-file errors are -// non-fatal: on failure the original ISO Content is kept. -func expandISOContents( - ctx context.Context, - expand bool, - contents []Content, - poolManager pool.Manager, - maxPrefetch int, - readTimeout time.Duration, - allowedExtensions []string, -) ([]Content, error) { - if !expand { - return contents, nil - } - var result []Content - for _, c := range contents { - if c.IsDirectory || strings.ToLower(filepath.Ext(c.Filename)) != ".iso" { - result = append(result, c) - continue - } - - src := iso.ISOSource{ - Filename: c.Filename, - Segments: c.Segments, - AesKey: c.AesKey, - AesIV: c.AesIV, - Size: c.Size, - } - - isoFiles, err := iso.AnalyzeISOContent(ctx, src, poolManager, maxPrefetch, readTimeout, allowedExtensions) - if err != nil { - slog.WarnContext(ctx, "Failed to analyze ISO content, keeping ISO as-is", - "file", c.Filename, "error", err) - result = append(result, c) - continue - } - - if len(isoFiles) == 0 { - result = append(result, c) - continue - } - - // Sort ISO files by size descending so the largest (main feature) gets index 1. - sort.Slice(isoFiles, func(i, j int) bool { - return isoFiles[i].Size > isoFiles[j].Size - }) - - // Keep only the largest file (index 0 after sort); discard smaller streams. - f := isoFiles[0] - nc := Content{ - InternalPath: f.InternalPath, - Filename: f.Filename, - Size: f.Size, - PackedSize: f.Size, // raw ISO data — packed == unpacked - NzbdavID: c.NzbdavID, - ISOExpansionIndex: 1, - } - if f.NestedSource != nil { - nc.NestedSources = []NestedSource{{ - Segments: f.NestedSource.Segments, - AesKey: f.NestedSource.AesKey, - AesIV: f.NestedSource.AesIV, - InnerOffset: f.NestedSource.InnerOffset, - InnerLength: f.NestedSource.InnerLength, - InnerVolumeSize: f.NestedSource.InnerVolumeSize, - }} - } else { - nc.Segments = f.Segments - } - result = append(result, nc) - } - return result, nil -} - // normalizeArchiveReleaseFilename aligns the filename to the NZB basename while keeping the original extension. func normalizeArchiveReleaseFilename(nzbFilename, originalFilename string) string { releaseName := nzbtrim.TrimNzbExtension(nzbFilename) From b782ea2ba49ebe41b5af76a203ab5c8f0c5e7b30 Mon Sep 17 00:00:00 2001 From: javi11 Date: Wed, 20 May 2026 20:17:26 +0200 Subject: [PATCH 2/8] fix(iso): index BDMV/STREAM/SSIF/*.SSIF for 3D Blu-ray main features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a 3D-only Blu-ray release (e.g. AVATAR_FIRE_AND_ASH_3D), the main feature playlist references clips that exist only as SSIF files in BDMV/STREAM/SSIF/ — the M2TS directory holds short extras. The previous resolver indexed only M2TS, so the long 3D playlist failed to resolve any clips and a short extras playlist won by default, producing a ~177 MB virtual file for a movie whose NZB carries ~88 GB of source data. Resolve clip names against M2TS first (preserves the smaller, more compatible 2D version on hybrid 3D releases) and fall back to SSIF when only it can satisfy the playlist. Two new test cases cover the 3D-only-with-SSIF and hybrid-prefers-M2TS paths. --- internal/importer/archive/iso/bluray.go | 32 +++++--- internal/importer/archive/iso/bluray_test.go | 82 ++++++++++++++++++++ 2 files changed, 104 insertions(+), 10 deletions(-) diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go index e2ce4d73..43b0b10c 100644 --- a/internal/importer/archive/iso/bluray.go +++ b/internal/importer/archive/iso/bluray.go @@ -29,22 +29,31 @@ type MainFeaturePlaylist struct { // keep evaluating the rest, mirroring how every Blu-ray player tolerates // malformed entries in BDMV/PLAYLIST/. func ResolveMainFeature(rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlaylist { - // Build an index of all M2TS streams by their 5-digit clip stem (the - // part MPLS references). M2TS files live at BDMV/STREAM/.M2TS - // case-insensitively. - streamByClip := make(map[string]isoFileEntry) + // Build per-clip indexes. M2TS streams live at BDMV/STREAM/.M2TS + // and carry the 2D version (or the only version on a 2D disc). SSIF + // streams live at BDMV/STREAM/SSIF/.SSIF and carry the + // stereoscopic interleaved 3D version — on 3D-only Blu-ray releases + // the main feature playlist references SSIF clips, while the M2TS + // directory holds only extras. We prefer M2TS when both exist (smaller + // bytes, universal playback) and fall back to SSIF when only it + // resolves the playlist's clip names. + m2tsByClip := make(map[string]isoFileEntry) + ssifByClip := make(map[string]isoFileEntry) var playlistEntries []isoFileEntry for _, f := range files { up := strings.ToUpper(f.path) switch { case strings.HasPrefix(up, "BDMV/PLAYLIST/") && strings.HasSuffix(up, ".MPLS"): playlistEntries = append(playlistEntries, f) + case strings.HasPrefix(up, "BDMV/STREAM/SSIF/") && strings.HasSuffix(up, ".SSIF"): + base := up[len("BDMV/STREAM/SSIF/") : len(up)-len(".SSIF")] + ssifByClip[base] = f case strings.HasPrefix(up, "BDMV/STREAM/") && strings.HasSuffix(up, ".M2TS"): base := up[len("BDMV/STREAM/") : len(up)-len(".M2TS")] - streamByClip[base] = f + m2tsByClip[base] = f } } - if len(playlistEntries) == 0 || len(streamByClip) == 0 { + if len(playlistEntries) == 0 || (len(m2tsByClip) == 0 && len(ssifByClip) == 0) { return nil } @@ -65,14 +74,17 @@ func ResolveMainFeature(rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlay continue } - // Resolve clip names → M2TS entries, in playlist order. + // Resolve clip names in playlist order, preferring M2TS over SSIF. streams := make([]isoFileEntry, 0, len(pl.PlayItems)) for _, it := range pl.PlayItems { - entry, ok := streamByClip[strings.ToUpper(it.ClipName)] - if !ok { + name := strings.ToUpper(it.ClipName) + if entry, ok := m2tsByClip[name]; ok { + streams = append(streams, entry) continue } - streams = append(streams, entry) + if entry, ok := ssifByClip[name]; ok { + streams = append(streams, entry) + } } if len(streams) == 0 { continue diff --git a/internal/importer/archive/iso/bluray_test.go b/internal/importer/archive/iso/bluray_test.go index e548f61a..e788bafe 100644 --- a/internal/importer/archive/iso/bluray_test.go +++ b/internal/importer/archive/iso/bluray_test.go @@ -102,6 +102,88 @@ func TestResolveMainFeature(t *testing.T) { } }) + t.Run("3D BD: playlist resolves against SSIF when M2TS missing", func(t *testing.T) { + t.Parallel() + // Avatar-2-style 3D-only release: BDMV/STREAM/*.M2TS holds only + // extras (tiny). The real main feature lives in BDMV/STREAM/SSIF/ + // and is referenced by its own MPLS. The resolver must index SSIF + // so the long playlist resolves and wins. + extras := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00010", InTime: 0, OutTime: 90 * 45000}, // 90s extra + }, nil) + mainFeature3D := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00100", InTime: 0, OutTime: 60 * 60 * 45000}, + {ClipName: "00101", InTime: 0, OutTime: 60 * 60 * 45000}, + {ClipName: "00102", InTime: 0, OutTime: 12 * 60 * 45000}, // 132 min total + }, nil) + + rs := makeImage(t, map[uint32][]byte{ + 100: extras, + 110: mainFeature3D, + }) + + files := []isoFileEntry{ + {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: uint64(len(extras))}, + {path: "BDMV/PLAYLIST/00800.MPLS", lba: 110, size: uint64(len(mainFeature3D))}, + // Only the extras live as M2TS: + {path: "BDMV/STREAM/00010.M2TS", lba: 200, size: 50_000_000}, + // Main feature is SSIF only: + {path: "BDMV/STREAM/SSIF/00100.SSIF", lba: 300, size: 25_000_000_000}, + {path: "BDMV/STREAM/SSIF/00101.SSIF", lba: 400, size: 25_000_000_000}, + {path: "BDMV/STREAM/SSIF/00102.SSIF", lba: 500, size: 5_000_000_000}, + } + + got := ResolveMainFeature(rs, files) + if got == nil { + t.Fatal("ResolveMainFeature returned nil — SSIF index missing?") + } + if got.PlaylistName != "BDMV/PLAYLIST/00800.MPLS" { + t.Errorf("PlaylistName = %q, want 00800.MPLS (3D main feature)", got.PlaylistName) + } + if len(got.Streams) != 3 { + t.Fatalf("Streams len = %d, want 3 SSIF clips", len(got.Streams)) + } + wantOrder := []string{ + "BDMV/STREAM/SSIF/00100.SSIF", + "BDMV/STREAM/SSIF/00101.SSIF", + "BDMV/STREAM/SSIF/00102.SSIF", + } + for i, s := range got.Streams { + if s.path != wantOrder[i] { + t.Errorf("Streams[%d].path = %q, want %q", i, s.path, wantOrder[i]) + } + } + }) + + t.Run("hybrid 3D BD: prefers M2TS over SSIF when both exist", func(t *testing.T) { + t.Parallel() + // Both 2D MPLS (refs M2TS) and 3D MPLS (refs SSIF) point at clips + // of the same name. With both files present, the M2TS version is + // the right pick: smaller bytes, universal playback. The resolver + // should select it even if the 3D playlist is marginally longer. + mainFeature := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00100", InTime: 0, OutTime: 60 * 60 * 45000}, + }, nil) + rs := makeImage(t, map[uint32][]byte{100: mainFeature}) + + files := []isoFileEntry{ + {path: "BDMV/PLAYLIST/00800.MPLS", lba: 100, size: uint64(len(mainFeature))}, + {path: "BDMV/STREAM/00100.M2TS", lba: 200, size: 20_000_000_000}, + {path: "BDMV/STREAM/SSIF/00100.SSIF", lba: 300, size: 40_000_000_000}, + } + + got := ResolveMainFeature(rs, files) + if got == nil { + t.Fatal("ResolveMainFeature returned nil") + } + if len(got.Streams) != 1 { + t.Fatalf("Streams len = %d, want 1", len(got.Streams)) + } + if got.Streams[0].path != "BDMV/STREAM/00100.M2TS" { + t.Errorf("picked %q, want M2TS over SSIF", got.Streams[0].path) + } + }) + t.Run("playlist referencing missing M2TS yields nil", func(t *testing.T) { t.Parallel() // Playlist references a clip that has no corresponding M2TS entry. From 919ecad7b3c51cc07b0964f4fabda931ef0c87ce Mon Sep 17 00:00:00 2001 From: javi11 Date: Fri, 22 May 2026 08:57:02 +0200 Subject: [PATCH 3/8] chore(iso): instrument BDMV resolver with [DEBUG-isobd] tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A repeated 88GB-NZB run is still producing a 177MB virtual file with clips=2 — byte-identical to the pre-SSIF-fix output. Three hypotheses remain: stale binary, 'no actual SSIF in this BDMV' (release uses M2TS only), or SSIF lives at a non-standard path. Add one summary log per ISO (total files, playlist count, M2TS and SSIF clip counts, 12 sample paths) and one log per evaluated MPLS (resolved clip count, unresolved count, duration ticks, summed stream bytes) plus one 'picked' line. All prefixed with [DEBUG-isobd] for cheap cleanup and to confirm the new binary is live (the prefix won't appear in prior builds). --- internal/importer/archive/iso/bluray.go | 49 +++++++++++++++++++- internal/importer/archive/iso/bluray_test.go | 13 +++--- internal/importer/archive/iso/processor.go | 2 +- 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go index 43b0b10c..f129a0bd 100644 --- a/internal/importer/archive/iso/bluray.go +++ b/internal/importer/archive/iso/bluray.go @@ -1,7 +1,9 @@ package iso import ( + "context" "io" + "log/slog" "sort" "strings" ) @@ -28,7 +30,7 @@ type MainFeaturePlaylist struct { // Failures parsing individual playlists are non-fatal — we skip them and // keep evaluating the rest, mirroring how every Blu-ray player tolerates // malformed entries in BDMV/PLAYLIST/. -func ResolveMainFeature(rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlaylist { +func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlaylist { // Build per-clip indexes. M2TS streams live at BDMV/STREAM/.M2TS // and carry the 2D version (or the only version on a 2D disc). SSIF // streams live at BDMV/STREAM/SSIF/.SSIF and carry the @@ -53,6 +55,17 @@ func ResolveMainFeature(rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlay m2tsByClip[base] = f } } + // [DEBUG-isobd] One-shot summary of what the resolver actually sees in + // this ISO. Distinct prefix lets us confirm the live binary includes + // this instrumentation and lets users grep their logs cleanly. + slog.InfoContext(ctx, "[DEBUG-isobd] bdmv scan", + "total_files", len(files), + "playlists", len(playlistEntries), + "m2ts_clips", len(m2tsByClip), + "ssif_clips", len(ssifByClip), + "sample_paths", samplePaths(files, 12), + ) + if len(playlistEntries) == 0 || (len(m2tsByClip) == 0 && len(ssifByClip) == 0) { return nil } @@ -86,6 +99,21 @@ func ResolveMainFeature(rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlay streams = append(streams, entry) } } + // [DEBUG-isobd] Per-playlist evaluation so we can see which mpls + // resolved how many clips and why a given candidate won or lost. + var totalSize int64 + for _, s := range streams { + totalSize += int64(s.size) + } + slog.InfoContext(ctx, "[DEBUG-isobd] mpls evaluated", + "name", pe.path, + "items", len(pl.PlayItems), + "resolved_clips", len(streams), + "unresolved", len(pl.PlayItems)-len(streams), + "duration_ticks", pl.DurationTicks(), + "streams_total_bytes", totalSize, + ) + if len(streams) == 0 { continue } @@ -99,9 +127,28 @@ func ResolveMainFeature(rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlay best = cand } } + if best != nil { + slog.InfoContext(ctx, "[DEBUG-isobd] main feature picked", + "playlist", best.PlaylistName, + "clips", len(best.Streams), + "duration_ticks", best.DurationTicks, + ) + } return best } +// samplePaths returns up to max paths from files, intended for diagnostic +// logging. The list is taken in iteration order — not sorted — so the user +// sees what ListISOFiles actually emitted. +func samplePaths(files []isoFileEntry, max int) []string { + n := min(len(files), max) + out := make([]string, 0, n) + for i := range n { + out = append(out, files[i].path) + } + return out +} + // isBetterPlaylist returns true when cand should replace best. // Comparison: longer duration > more PlayItems > earlier filename. // The filename tie-break relies on playlistEntries being sorted before diff --git a/internal/importer/archive/iso/bluray_test.go b/internal/importer/archive/iso/bluray_test.go index e788bafe..30c983b2 100644 --- a/internal/importer/archive/iso/bluray_test.go +++ b/internal/importer/archive/iso/bluray_test.go @@ -2,6 +2,7 @@ package iso import ( "bytes" + "context" "io" "testing" ) @@ -60,7 +61,7 @@ func TestResolveMainFeature(t *testing.T) { {path: "BDMV/STREAM/00010.M2TS", lba: 500, size: 500_000}, } - got := ResolveMainFeature(rs, files) + got := ResolveMainFeature(context.Background(), rs, files) if got == nil { t.Fatal("ResolveMainFeature returned nil") } @@ -83,7 +84,7 @@ func TestResolveMainFeature(t *testing.T) { files := []isoFileEntry{ {path: "movie.mkv", lba: 100, size: 1_000_000}, } - if got := ResolveMainFeature(bytes.NewReader(make([]byte, 16*iso9660SectorSize)), files); got != nil { + if got := ResolveMainFeature(context.Background(), bytes.NewReader(make([]byte, 16*iso9660SectorSize)), files); got != nil { t.Errorf("expected nil for non-BDMV disc, got %+v", got) } }) @@ -97,7 +98,7 @@ func TestResolveMainFeature(t *testing.T) { {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: 15}, {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, } - if got := ResolveMainFeature(rs, files); got != nil { + if got := ResolveMainFeature(context.Background(), rs, files); got != nil { t.Errorf("expected nil for unparseable MPLS, got %+v", got) } }) @@ -133,7 +134,7 @@ func TestResolveMainFeature(t *testing.T) { {path: "BDMV/STREAM/SSIF/00102.SSIF", lba: 500, size: 5_000_000_000}, } - got := ResolveMainFeature(rs, files) + got := ResolveMainFeature(context.Background(), rs, files) if got == nil { t.Fatal("ResolveMainFeature returned nil — SSIF index missing?") } @@ -172,7 +173,7 @@ func TestResolveMainFeature(t *testing.T) { {path: "BDMV/STREAM/SSIF/00100.SSIF", lba: 300, size: 40_000_000_000}, } - got := ResolveMainFeature(rs, files) + got := ResolveMainFeature(context.Background(), rs, files) if got == nil { t.Fatal("ResolveMainFeature returned nil") } @@ -197,7 +198,7 @@ func TestResolveMainFeature(t *testing.T) { {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: uint64(len(data))}, {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, } - if got := ResolveMainFeature(rs, files); got != nil { + if got := ResolveMainFeature(context.Background(), rs, files); got != nil { t.Errorf("expected nil when MPLS references unknown clip, got %+v", got) } }) diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index 340b4acc..82ad5650 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -48,7 +48,7 @@ func AnalyzeISO( out.Files = append(out.Files, buildFileContent(src, e)) } - if mf := ResolveMainFeature(rs, entries); mf != nil { + if mf := ResolveMainFeature(ctx, rs, entries); mf != nil { out.DurationTicks = mf.DurationTicks for _, e := range mf.Streams { out.MainFeature = append(out.MainFeature, buildFileContent(src, e)) From 3aa16dad42ad08611d65099e8026cc2e6e81f625 Mon Sep 17 00:00:00 2001 From: javi11 Date: Fri, 22 May 2026 09:22:27 +0200 Subject: [PATCH 4/8] chore(iso): extend [DEBUG-isobd] scan with size distribution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Real-ISO run shows all 38 playlists with items=1, max duration 80s, max stream bytes 141MB — yet the NZB carries ~88GB across 2 ISOs. Either ListISOFiles is dropping huge files (UDF alloc-type 2/3 not handled) or reading wrong sizes for them. Add to the bdmv-scan log: - sum of every file size (across all entries) - sum of M2TS-only and SSIF-only sizes - the 6 largest files with human-readable sizes One log line will distinguish 'sizes truncated', 'big files missing', and 'release is genuinely tiny'. --- internal/importer/archive/iso/bluray.go | 51 +++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go index f129a0bd..10e1c8cf 100644 --- a/internal/importer/archive/iso/bluray.go +++ b/internal/importer/archive/iso/bluray.go @@ -2,6 +2,7 @@ package iso import ( "context" + "fmt" "io" "log/slog" "sort" @@ -58,11 +59,28 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn // [DEBUG-isobd] One-shot summary of what the resolver actually sees in // this ISO. Distinct prefix lets us confirm the live binary includes // this instrumentation and lets users grep their logs cleanly. + var ( + allSum, m2tsSum, ssifSum int64 + biggest = topNBySize(files, 6) + ) + for _, f := range files { + allSum += int64(f.size) + } + for _, f := range m2tsByClip { + m2tsSum += int64(f.size) + } + for _, f := range ssifByClip { + ssifSum += int64(f.size) + } slog.InfoContext(ctx, "[DEBUG-isobd] bdmv scan", "total_files", len(files), "playlists", len(playlistEntries), "m2ts_clips", len(m2tsByClip), "ssif_clips", len(ssifByClip), + "all_files_sum_bytes", allSum, + "m2ts_sum_bytes", m2tsSum, + "ssif_sum_bytes", ssifSum, + "top6_largest", biggest, "sample_paths", samplePaths(files, 12), ) @@ -149,6 +167,39 @@ func samplePaths(files []isoFileEntry, max int) []string { return out } +// topNBySize returns "path=size" entries for the n largest files. Used by +// diagnostic logging to reveal whether the ISO actually contains the +// multi-GB clips a real Blu-ray main feature would have. +func topNBySize(files []isoFileEntry, n int) []string { + if len(files) == 0 || n <= 0 { + return nil + } + cp := make([]isoFileEntry, len(files)) + copy(cp, files) + sort.Slice(cp, func(i, j int) bool { return cp[i].size > cp[j].size }) + k := min(len(cp), n) + out := make([]string, 0, k) + for i := range k { + out = append(out, cp[i].path+"="+formatBytes(int64(cp[i].size))) + } + return out +} + +// formatBytes renders a byte count compactly for log readability. +// Uses base-2 units (KiB, MiB, GiB) for clarity. +func formatBytes(b int64) string { + const unit = 1024 + if b < unit { + return fmt.Sprintf("%dB", b) + } + div, exp := int64(unit), 0 + for n := b / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f%ciB", float64(b)/float64(div), "KMGTPE"[exp]) +} + // isBetterPlaylist returns true when cand should replace best. // Comparison: longer duration > more PlayItems > earlier filename. // The filename tie-break relies on playlistEntries being sorted before From ec8a0483298497002ddff7ffb736b8717504157d Mon Sep 17 00:00:00 2001 From: javi11 Date: Fri, 22 May 2026 11:27:29 +0200 Subject: [PATCH 5/8] chore(iso): log ISO-size vs walker-coverage for each ISO Real run shows all_files_sum_bytes=1.13 GiB across 295 files, biggest single file 135 MiB. NZB is 88 GiB across 2 ISOs. Need to know whether src.Size (claimed ISO bytes from the outer RAR archive) matches the sum of what ListISOFiles enumerated, or whether the walker is missing multi-GB files. One [DEBUG-isobd] iso analyse line per ISO now prints filename, iso_size, listed_files, listed_sum, and coverage_pct so the discrepancy is impossible to miss. --- internal/importer/archive/iso/processor.go | 37 ++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index 82ad5650..dc490361 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -3,6 +3,7 @@ package iso import ( "context" "fmt" + "log/slog" "path/filepath" "strings" "time" @@ -39,6 +40,25 @@ func AnalyzeISO( return nil, fmt.Errorf("iso: listing files in %q: %w", src.Filename, err) } + // [DEBUG-isobd] Compare the ISO's claimed size (from the outer archive) + // against the sum of every file ListISOFiles returned. A huge ratio + // difference means the walker is silently skipping big files — + // almost certainly the multi-GB BDMV main-feature clips whose UDF + // allocation descriptors use a type our walker doesn't decode. + var listedSum int64 + for _, e := range entries { + listedSum += int64(e.size) + } + slog.InfoContext(ctx, "[DEBUG-isobd] iso analyse", + "filename", src.Filename, + "iso_size_bytes", src.Size, + "iso_size", formatBytes(src.Size), + "listed_files", len(entries), + "listed_sum_bytes", listedSum, + "listed_sum", formatBytes(listedSum), + "coverage_pct", coveragePercent(listedSum, src.Size), + ) + out := &AnalyzedISO{VolumeLabel: ReadVolumeLabel(rs)} for _, e := range entries { @@ -58,6 +78,23 @@ func AnalyzeISO( return out, nil } +// coveragePercent returns (listed/total)*100, clamped to [0, 999]. Used +// only by diagnostic logging so the user can see at a glance whether +// ListISOFiles is enumerating the whole ISO or only a fraction. +func coveragePercent(listed, total int64) int64 { + if total <= 0 { + return -1 + } + pct := listed * 100 / total + if pct < 0 { + return 0 + } + if pct > 999 { + return 999 + } + return pct +} + // buildFileContent turns one ISO directory entry into an ISOFileContent, // slicing or referencing the source's Usenet segments according to whether // the ISO is encrypted. From 62162e82d1184dadb3cdf9787d9265aacb31742b Mon Sep 17 00:00:00 2001 From: javi11 Date: Fri, 22 May 2026 11:41:10 +0200 Subject: [PATCH 6/8] fix(iso): read full directory extent in UDF walker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause of the 'main feature M2TS files invisible' bug. udfReadDirEntries parsed every File Identifier Descriptor in a directory but only ever read the FIRST 2048-byte sector of each allocation descriptor's extent — even when the extent's ad.length claimed it spanned many sectors. A Blu-ray BDMV/STREAM/ directory with ~2500 FIDs (~30 KiB of FID data) lost every entry past the first sector, including the multi-GB main-feature clips 00016/00017/00022/00023/00028/00029 and the corresponding SSIF files. Local repro against AVATAR_FIRE_AND_ASH_3D_DISC_1.iso (37 GiB): - Before: listed_files=298 sum=1.16 GiB coverage=3.1% (no clip >135 MiB) - After: listed_files=2523 sum=74 GiB (00022.m2ts=17 GiB ✓) Fix factors readMetaExtent / readICBExtent helpers that walk every sector of an extent until ad.length is exhausted. Both fail-soft on EOF so a malformed image returns partial data rather than aborting the import. The pre-existing TestUDFReadDirEntriesShortADClampsExtentLength was pinning the BUGGY behaviour (it asserted the walker would truncate to one sector); renamed to TestUDFReadDirEntriesTruncatedExtent and now asserts the new contract: when an extent claims more sectors than the image contains, the walker returns whatever data it could read without an error. Adds fs_local_test.go: an ALTMOUNT_LOCAL_ISO= gated integration test that catches this class of bug instantly against a real ISO. Skipped in CI. Also strips the [DEBUG-isobd] / [DEBUG-walk] instrumentation added during the investigation and tones the resolver / processor logs down to one production-grade INFO line per ISO and per main-feature pick. --- internal/importer/archive/iso/bluray.go | 93 +------------------ internal/importer/archive/iso/fs.go | 84 ++++++++++++++--- .../importer/archive/iso/fs_local_test.go | 76 +++++++++++++++ internal/importer/archive/iso/fs_test.go | 12 ++- internal/importer/archive/iso/processor.go | 34 +------ 5 files changed, 161 insertions(+), 138 deletions(-) create mode 100644 internal/importer/archive/iso/fs_local_test.go diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go index 10e1c8cf..a6ab8f64 100644 --- a/internal/importer/archive/iso/bluray.go +++ b/internal/importer/archive/iso/bluray.go @@ -2,7 +2,6 @@ package iso import ( "context" - "fmt" "io" "log/slog" "sort" @@ -56,34 +55,6 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn m2tsByClip[base] = f } } - // [DEBUG-isobd] One-shot summary of what the resolver actually sees in - // this ISO. Distinct prefix lets us confirm the live binary includes - // this instrumentation and lets users grep their logs cleanly. - var ( - allSum, m2tsSum, ssifSum int64 - biggest = topNBySize(files, 6) - ) - for _, f := range files { - allSum += int64(f.size) - } - for _, f := range m2tsByClip { - m2tsSum += int64(f.size) - } - for _, f := range ssifByClip { - ssifSum += int64(f.size) - } - slog.InfoContext(ctx, "[DEBUG-isobd] bdmv scan", - "total_files", len(files), - "playlists", len(playlistEntries), - "m2ts_clips", len(m2tsByClip), - "ssif_clips", len(ssifByClip), - "all_files_sum_bytes", allSum, - "m2ts_sum_bytes", m2tsSum, - "ssif_sum_bytes", ssifSum, - "top6_largest", biggest, - "sample_paths", samplePaths(files, 12), - ) - if len(playlistEntries) == 0 || (len(m2tsByClip) == 0 && len(ssifByClip) == 0) { return nil } @@ -117,21 +88,6 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn streams = append(streams, entry) } } - // [DEBUG-isobd] Per-playlist evaluation so we can see which mpls - // resolved how many clips and why a given candidate won or lost. - var totalSize int64 - for _, s := range streams { - totalSize += int64(s.size) - } - slog.InfoContext(ctx, "[DEBUG-isobd] mpls evaluated", - "name", pe.path, - "items", len(pl.PlayItems), - "resolved_clips", len(streams), - "unresolved", len(pl.PlayItems)-len(streams), - "duration_ticks", pl.DurationTicks(), - "streams_total_bytes", totalSize, - ) - if len(streams) == 0 { continue } @@ -146,60 +102,15 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn } } if best != nil { - slog.InfoContext(ctx, "[DEBUG-isobd] main feature picked", + slog.InfoContext(ctx, "Blu-ray main feature playlist resolved", "playlist", best.PlaylistName, "clips", len(best.Streams), - "duration_ticks", best.DurationTicks, + "duration_seconds", best.DurationTicks/45000, ) } return best } -// samplePaths returns up to max paths from files, intended for diagnostic -// logging. The list is taken in iteration order — not sorted — so the user -// sees what ListISOFiles actually emitted. -func samplePaths(files []isoFileEntry, max int) []string { - n := min(len(files), max) - out := make([]string, 0, n) - for i := range n { - out = append(out, files[i].path) - } - return out -} - -// topNBySize returns "path=size" entries for the n largest files. Used by -// diagnostic logging to reveal whether the ISO actually contains the -// multi-GB clips a real Blu-ray main feature would have. -func topNBySize(files []isoFileEntry, n int) []string { - if len(files) == 0 || n <= 0 { - return nil - } - cp := make([]isoFileEntry, len(files)) - copy(cp, files) - sort.Slice(cp, func(i, j int) bool { return cp[i].size > cp[j].size }) - k := min(len(cp), n) - out := make([]string, 0, k) - for i := range k { - out = append(out, cp[i].path+"="+formatBytes(int64(cp[i].size))) - } - return out -} - -// formatBytes renders a byte count compactly for log readability. -// Uses base-2 units (KiB, MiB, GiB) for clarity. -func formatBytes(b int64) string { - const unit = 1024 - if b < unit { - return fmt.Sprintf("%dB", b) - } - div, exp := int64(unit), 0 - for n := b / unit; n >= unit; n /= unit { - div *= unit - exp++ - } - return fmt.Sprintf("%.1f%ciB", float64(b)/float64(div), "KMGTPE"[exp]) -} - // isBetterPlaylist returns true when cand should replace best. // Comparison: longer duration > more PlayItems > earlier filename. // The filename tie-break relies on playlistEntries being sorted before diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index ace50d53..62c277f4 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -328,6 +328,66 @@ func udfResolveICB(loc udfLBA, metaMap []udfMetaSpan, partStart uint32) (uint32, return udfResolveMetaBlock(loc.block, metaMap, partStart) } +// readMetaExtent reads a contiguous extent of `length` bytes starting at +// logical metadata block `startBlock`, walking sector by sector through +// the metaMap so multi-sector extents (e.g. a 26 KiB directory) are +// returned in full. Without this, callers that read only the first +// 2048-byte sector silently lose every entry past the first sector — the +// root cause of the "main-feature M2TS files missing from listing" bug. +func readMetaExtent(rs io.ReadSeeker, startBlock uint32, length int, metaMap []udfMetaSpan, partStart uint32) ([]byte, error) { + if length <= 0 { + return nil, nil + } + out := make([]byte, 0, length) + remaining := length + for b := uint32(0); remaining > 0; b++ { + ps, err := udfResolveMetaBlock(startBlock+b, metaMap, partStart) + if err != nil { + return nil, err + } + _, sector, err := udfReadTag(rs, ps) + if err != nil { + // Malformed image (e.g. extent claims more sectors than exist): + // return what we successfully read rather than failing the + // entire walk. Callers parse partial directory data correctly. + return out, nil + } + take := min(remaining, len(sector)) + out = append(out, sector[:take]...) + remaining -= take + } + return out, nil +} + +// readICBExtent is the long_ad analogue of readMetaExtent: walks blocks +// by incrementing the logical-block field inside the ICB long_ad. +func readICBExtent(rs io.ReadSeeker, loc udfLBA, length int, metaMap []udfMetaSpan, partStart uint32) ([]byte, error) { + if length <= 0 { + return nil, nil + } + out := make([]byte, 0, length) + remaining := length + cur := loc + for remaining > 0 { + ps, err := udfResolveICB(cur, metaMap, partStart) + if err != nil { + return nil, err + } + _, sector, err := udfReadTag(rs, ps) + if err != nil { + // Malformed image (e.g. extent claims more sectors than exist): + // return what we successfully read rather than failing the + // entire walk. Callers parse partial directory data correctly. + return out, nil + } + take := min(remaining, len(sector)) + out = append(out, sector[:take]...) + remaining -= take + cur.block++ + } + return out, nil +} + // udfReadDirEntries reads all File Identifier Descriptor records from a // File Entry at physSect. func udfReadDirEntries(rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, partStart uint32) ([]udfDirEntry, error) { @@ -360,21 +420,22 @@ func udfReadDirEntries(rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, case 3: // inline dirData = buf[allocDescOff : allocDescOff+allocDescLen] case 0: // short_ad + // A single allocation descriptor describes an extent that can span + // many 2048-byte sectors. The previous version of this code read + // only the first sector and truncated the rest of the extent, + // silently dropping every directory entry past ~30 FIDs — which is + // why BDMV/STREAM/ on a real Blu-ray (~300 entries, ~26 KiB) lost + // every main-feature M2TS clip. We now walk the full extent. for off := 0; off+8 <= allocDescLen; off += 8 { ad := udfParseShortAD(buf[allocDescOff:], off) if ad.length == 0 { break } - ps, rerr := udfResolveMetaBlock(ad.block, metaMap, partStart) - if rerr != nil { - return nil, rerr - } - _, sector, rerr := udfReadTag(rs, ps) + data, rerr := readMetaExtent(rs, ad.block, int(ad.length), metaMap, partStart) if rerr != nil { return nil, rerr } - take := min(int(ad.length), len(sector)) - dirData = append(dirData, sector[:take]...) + dirData = append(dirData, data...) } case 1: // long_ad for off := 0; off+16 <= allocDescLen; off += 16 { @@ -382,16 +443,11 @@ func udfReadDirEntries(rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, if ad.length == 0 { break } - ps, rerr := udfResolveICB(ad.loc, metaMap, partStart) - if rerr != nil { - return nil, rerr - } - _, sector, rerr := udfReadTag(rs, ps) + data, rerr := readICBExtent(rs, ad.loc, int(ad.length), metaMap, partStart) if rerr != nil { return nil, rerr } - take := min(int(ad.length), len(sector)) - dirData = append(dirData, sector[:take]...) + dirData = append(dirData, data...) } } diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go new file mode 100644 index 00000000..b6f599ad --- /dev/null +++ b/internal/importer/archive/iso/fs_local_test.go @@ -0,0 +1,76 @@ +package iso + +import ( + "fmt" + "os" + "sort" + "testing" +) + +// TestLocalISO_DiscoverBigFiles is a manual integration test: it walks a +// real Blu-ray ISO from local disk and dumps a size-sorted summary. Skipped +// unless ALTMOUNT_LOCAL_ISO is set, so CI stays unaffected. +// +// Set ALTMOUNT_LOCAL_ISO=/abs/path/to.iso to run, e.g.: +// +// ALTMOUNT_LOCAL_ISO=/Volumes/.../DISC_1.iso go test \ +// ./internal/importer/archive/iso/... -run TestLocalISO -v +func TestLocalISO_DiscoverBigFiles(t *testing.T) { + path := os.Getenv("ALTMOUNT_LOCAL_ISO") + if path == "" { + t.Skip("ALTMOUNT_LOCAL_ISO not set") + } + f, err := os.Open(path) + if err != nil { + t.Fatalf("open %s: %v", path, err) + } + defer f.Close() + + stat, _ := f.Stat() + t.Logf("ISO: %s size=%d (%.2f GiB)", path, stat.Size(), float64(stat.Size())/(1<<30)) + + entries, err := ListISOFiles(f) + if err != nil { + t.Fatalf("ListISOFiles: %v", err) + } + + var sum int64 + for _, e := range entries { + sum += int64(e.size) + } + t.Logf("listed_files=%d listed_sum=%d (%.2f GiB) coverage=%.1f%%", + len(entries), sum, float64(sum)/(1<<30), 100*float64(sum)/float64(stat.Size())) + + // Top 25 by size — should match `ls -laS BDMV/STREAM/` if walker is sane. + sort.Slice(entries, func(i, j int) bool { return entries[i].size > entries[j].size }) + t.Logf("top 25 by size:") + for i, e := range entries { + if i >= 25 { + break + } + t.Logf(" %s size=%d (%.2f MiB) lba=%d", e.path, e.size, float64(e.size)/(1<<20), e.lba) + } + + // Sanity sentinels for the Avatar disc 1 main-feature clips. Each one + // is >1 GiB on disc, so if any are absent the walker dropped them. + want := []string{"BDMV/STREAM/00016.m2ts", "BDMV/STREAM/00022.m2ts", "BDMV/STREAM/00028.m2ts"} + have := make(map[string]uint64, len(entries)) + for _, e := range entries { + have[e.path] = e.size + } + for _, w := range want { + size, ok := have[w] + if !ok { + t.Errorf("missing %s — walker dropped this file", w) + continue + } + if size < 1<<30 { + t.Errorf("%s reported size=%d (%.2f MiB), want >1 GiB", + w, size, float64(size)/(1<<20)) + } + } + + if t.Failed() { + fmt.Println(">>> walker is dropping big files; this is the bug") + } +} diff --git a/internal/importer/archive/iso/fs_test.go b/internal/importer/archive/iso/fs_test.go index c03e1c95..1b2bfe38 100644 --- a/internal/importer/archive/iso/fs_test.go +++ b/internal/importer/archive/iso/fs_test.go @@ -6,7 +6,17 @@ import ( "testing" ) -func TestUDFReadDirEntriesShortADClampsExtentLength(t *testing.T) { +// TestUDFReadDirEntriesTruncatedExtent locks in the fix for the bug where +// a directory's allocation descriptor advertised an extent spanning +// multiple sectors but the walker read only the first sector and silently +// dropped every entry past it (~ the reason the Avatar BDMV main-feature +// M2TS files were invisible). Two assertions: +// - readMetaExtent must keep reading sectors until ad.length is +// satisfied (the fix); +// - if a sector read fails because the image is shorter than ad.length, +// the walk returns partial data without an error so a malformed ISO +// can't fail the entire import. +func TestUDFReadDirEntriesTruncatedExtent(t *testing.T) { image := make([]byte, iso9660SectorSize*21) dirICBSector := image[10*iso9660SectorSize : 11*iso9660SectorSize] binary.LittleEndian.PutUint16(dirICBSector[0:2], 261) diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index dc490361..2220b582 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -40,23 +40,10 @@ func AnalyzeISO( return nil, fmt.Errorf("iso: listing files in %q: %w", src.Filename, err) } - // [DEBUG-isobd] Compare the ISO's claimed size (from the outer archive) - // against the sum of every file ListISOFiles returned. A huge ratio - // difference means the walker is silently skipping big files — - // almost certainly the multi-GB BDMV main-feature clips whose UDF - // allocation descriptors use a type our walker doesn't decode. - var listedSum int64 - for _, e := range entries { - listedSum += int64(e.size) - } - slog.InfoContext(ctx, "[DEBUG-isobd] iso analyse", + slog.InfoContext(ctx, "ISO analysed", "filename", src.Filename, "iso_size_bytes", src.Size, - "iso_size", formatBytes(src.Size), - "listed_files", len(entries), - "listed_sum_bytes", listedSum, - "listed_sum", formatBytes(listedSum), - "coverage_pct", coveragePercent(listedSum, src.Size), + "files", len(entries), ) out := &AnalyzedISO{VolumeLabel: ReadVolumeLabel(rs)} @@ -78,23 +65,6 @@ func AnalyzeISO( return out, nil } -// coveragePercent returns (listed/total)*100, clamped to [0, 999]. Used -// only by diagnostic logging so the user can see at a glance whether -// ListISOFiles is enumerating the whole ISO or only a fraction. -func coveragePercent(listed, total int64) int64 { - if total <= 0 { - return -1 - } - pct := listed * 100 / total - if pct < 0 { - return 0 - } - if pct > 999 { - return 999 - } - return pct -} - // buildFileContent turns one ISO directory entry into an ISOFileContent, // slicing or referencing the source's Usenet segments according to whether // the ISO is encrypted. From 775871a3ac15c054a497997f1f25d9da8d5d6c16 Mon Sep 17 00:00:00 2001 From: javi11 Date: Fri, 22 May 2026 11:53:19 +0200 Subject: [PATCH 7/8] fix(iso): walk every extent of multi-extent UDF files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The directory-listing fix exposed a second latent bug downstream: the walker only stored ONE allocation descriptor's LBA per file even though huge Blu-ray clips are split across hundreds of extents (Avatar's 00022.m2ts: 945, 00023.m2ts: 945, 00028.m2ts: 294, 00016.m2ts: 238). For every multi-extent file, downstream reads of bytes past the first extent's length returned wrong sectors (whatever happened to live next to extent 1 on disc) instead of the file's real data — silent corruption ~50× the size of the visible bug. Changes: - isoFileEntry now carries []isoExtent instead of a single lba field. - collectFileExtents() walks every inline AD and chases Allocation Extent Descriptor (UDF tag 258) chains so files with more ADs than fit in the FE sector are fully enumerated. Caps total extent bytes at info_length so a malformed FE can't yield more data than the file claims. - ISOFileContent gains a Sources []ISONestedSource slice (one per extent) and drops the single-Segments / single-NestedSource fields. - buildFileContent emits one ISONestedSource per extent: unencrypted ISOs pre-slice outer segments to cover each extent; encrypted ISOs keep the full outer segments and seek via InnerOffset (AES-CBC IV chain still anchors at byte 0 of the outer ISO). - archive.isoFileContentToNestedSource → isoFileContentToNestedSources fans the slice out into one archive.NestedSource per extent. - buildMainFeatureContent and buildLargestFileContent thread the multi-source path so the final concat Content carries every extent of every clip in disc-then-playlist order. Verified against the real Avatar disc 1 ISO via fs_local_test.go: 00022.m2ts: 945 extents, sum-of-extent-lengths == 17 GiB info_length. TestLocalISO_DiscoverBigFiles asserts >=2 extents and full coverage for the sentinel big-clip set. --- internal/importer/archive/iso/bluray.go | 24 +- internal/importer/archive/iso/bluray_test.go | 49 ++-- internal/importer/archive/iso/fs.go | 194 ++++++++++++++-- .../importer/archive/iso/fs_local_test.go | 212 +++++++++++++++++- internal/importer/archive/iso/processor.go | 48 ++-- internal/importer/archive/iso/types.go | 29 ++- internal/importer/archive/iso_expansion.go | 55 +++-- .../importer/archive/iso_expansion_test.go | 99 +++++--- 8 files changed, 567 insertions(+), 143 deletions(-) diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go index a6ab8f64..18d47237 100644 --- a/internal/importer/archive/iso/bluray.go +++ b/internal/importer/archive/iso/bluray.go @@ -123,15 +123,21 @@ func isBetterPlaylist(cand, best *MainFeaturePlaylist, candItems, bestItems int) return candItems > bestItems } -// readISOFile reads the full contents of one isoFileEntry from rs. -// MPLS files are tiny (~KBs), so a one-shot read is fine. +// readISOFile reads the full contents of one isoFileEntry from rs, +// concatenating bytes across every on-disc extent. MPLS files are tiny +// (~KBs) and almost always single-extent, but multi-extent MPLS is +// legal so we iterate. func readISOFile(rs io.ReadSeeker, e isoFileEntry) ([]byte, error) { - if _, err := rs.Seek(int64(e.lba)*iso9660SectorSize, io.SeekStart); err != nil { - return nil, err - } - buf := make([]byte, e.size) - if _, err := io.ReadFull(rs, buf); err != nil { - return nil, err + out := make([]byte, 0, e.size) + for _, ext := range e.extents { + if _, err := rs.Seek(int64(ext.lba)*iso9660SectorSize, io.SeekStart); err != nil { + return nil, err + } + chunk := make([]byte, ext.length) + if _, err := io.ReadFull(rs, chunk); err != nil { + return nil, err + } + out = append(out, chunk...) } - return buf, nil + return out, nil } diff --git a/internal/importer/archive/iso/bluray_test.go b/internal/importer/archive/iso/bluray_test.go index 30c983b2..42b3c02c 100644 --- a/internal/importer/archive/iso/bluray_test.go +++ b/internal/importer/archive/iso/bluray_test.go @@ -7,6 +7,15 @@ import ( "testing" ) +// mkEntry builds a single-extent isoFileEntry — the common case for tests. +func mkEntry(path string, lba uint32, size uint64) isoFileEntry { + return isoFileEntry{ + path: path, + size: size, + extents: []isoExtent{{lba: lba, length: size}}, + } +} + // makeImage assembles an in-memory disc image by placing each piece of // data at the sector index given in its key. The returned reader can be // used as if it were a real ISO read-seeker. @@ -53,12 +62,12 @@ func TestResolveMainFeature(t *testing.T) { // File listing: two playlists and four M2TS clips (one extra). files := []isoFileEntry{ - {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: uint64(len(short))}, - {path: "BDMV/PLAYLIST/00800.MPLS", lba: 110, size: uint64(len(long))}, - {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, - {path: "BDMV/STREAM/00002.M2TS", lba: 300, size: 2_000_000}, - {path: "BDMV/STREAM/00003.M2TS", lba: 400, size: 3_000_000}, - {path: "BDMV/STREAM/00010.M2TS", lba: 500, size: 500_000}, + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(short))), + mkEntry("BDMV/PLAYLIST/00800.MPLS", 110, uint64(len(long))), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), + mkEntry("BDMV/STREAM/00002.M2TS", 300, 2_000_000), + mkEntry("BDMV/STREAM/00003.M2TS", 400, 3_000_000), + mkEntry("BDMV/STREAM/00010.M2TS", 500, 500_000), } got := ResolveMainFeature(context.Background(), rs, files) @@ -82,7 +91,7 @@ func TestResolveMainFeature(t *testing.T) { t.Run("non-BDMV disc returns nil", func(t *testing.T) { t.Parallel() files := []isoFileEntry{ - {path: "movie.mkv", lba: 100, size: 1_000_000}, + mkEntry("movie.mkv", 100, 1_000_000), } if got := ResolveMainFeature(context.Background(), bytes.NewReader(make([]byte, 16*iso9660SectorSize)), files); got != nil { t.Errorf("expected nil for non-BDMV disc, got %+v", got) @@ -95,8 +104,8 @@ func TestResolveMainFeature(t *testing.T) { 100: []byte("not a real mpls"), }) files := []isoFileEntry{ - {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: 15}, - {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, 15), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), } if got := ResolveMainFeature(context.Background(), rs, files); got != nil { t.Errorf("expected nil for unparseable MPLS, got %+v", got) @@ -124,14 +133,14 @@ func TestResolveMainFeature(t *testing.T) { }) files := []isoFileEntry{ - {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: uint64(len(extras))}, - {path: "BDMV/PLAYLIST/00800.MPLS", lba: 110, size: uint64(len(mainFeature3D))}, + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(extras))), + mkEntry("BDMV/PLAYLIST/00800.MPLS", 110, uint64(len(mainFeature3D))), // Only the extras live as M2TS: - {path: "BDMV/STREAM/00010.M2TS", lba: 200, size: 50_000_000}, + mkEntry("BDMV/STREAM/00010.M2TS", 200, 50_000_000), // Main feature is SSIF only: - {path: "BDMV/STREAM/SSIF/00100.SSIF", lba: 300, size: 25_000_000_000}, - {path: "BDMV/STREAM/SSIF/00101.SSIF", lba: 400, size: 25_000_000_000}, - {path: "BDMV/STREAM/SSIF/00102.SSIF", lba: 500, size: 5_000_000_000}, + mkEntry("BDMV/STREAM/SSIF/00100.SSIF", 300, 25_000_000_000), + mkEntry("BDMV/STREAM/SSIF/00101.SSIF", 400, 25_000_000_000), + mkEntry("BDMV/STREAM/SSIF/00102.SSIF", 500, 5_000_000_000), } got := ResolveMainFeature(context.Background(), rs, files) @@ -168,9 +177,9 @@ func TestResolveMainFeature(t *testing.T) { rs := makeImage(t, map[uint32][]byte{100: mainFeature}) files := []isoFileEntry{ - {path: "BDMV/PLAYLIST/00800.MPLS", lba: 100, size: uint64(len(mainFeature))}, - {path: "BDMV/STREAM/00100.M2TS", lba: 200, size: 20_000_000_000}, - {path: "BDMV/STREAM/SSIF/00100.SSIF", lba: 300, size: 40_000_000_000}, + mkEntry("BDMV/PLAYLIST/00800.MPLS", 100, uint64(len(mainFeature))), + mkEntry("BDMV/STREAM/00100.M2TS", 200, 20_000_000_000), + mkEntry("BDMV/STREAM/SSIF/00100.SSIF", 300, 40_000_000_000), } got := ResolveMainFeature(context.Background(), rs, files) @@ -195,8 +204,8 @@ func TestResolveMainFeature(t *testing.T) { 100: data, }) files := []isoFileEntry{ - {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: uint64(len(data))}, - {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(data))), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), } if got := ResolveMainFeature(context.Background(), rs, files); got != nil { t.Errorf("expected nil when MPLS references unknown clip, got %+v", got) diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index 62c277f4..d63c6e23 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -10,11 +10,32 @@ import ( const iso9660SectorSize = 2048 -// isoFileEntry is one non-directory file returned by ListISOFiles. +// isoFileEntry is one non-directory file returned by ListISOFiles. The +// file's data on disc may be split across multiple contiguous extents +// — Blu-ray main-feature M2TS files routinely use hundreds of extents +// chained via Allocation Extent Descriptors. extents is in disc order; +// concatenating their bytes yields the complete file. type isoFileEntry struct { - path string // full path within ISO (e.g. "BDMV/STREAM/00001.M2TS") - lba uint32 - size uint64 + path string + size uint64 + extents []isoExtent +} + +// firstLBA returns the start LBA of the file's first extent. Callers +// that only need a starting sector (e.g. reading a small MPLS file +// known to be single-extent) can use this. +func (e isoFileEntry) firstLBA() uint32 { + if len(e.extents) == 0 { + return 0 + } + return e.extents[0].lba +} + +// isoExtent is one contiguous run of sectors on disc that contributes +// length bytes to the logical file. +type isoExtent struct { + lba uint32 + length uint64 } // ───────────────────────────────────────────────────────────────────────────── @@ -100,7 +121,13 @@ func iso9660WalkAll(rs io.ReadSeeker, dirLBA uint32, dirSize uint64, prefix stri } result = append(result, sub...) } else { - result = append(result, isoFileEntry{path: entryPath, lba: e.lba, size: e.size}) + // ISO 9660 stores file data in a single contiguous extent. + // (Interleave mode exists but is essentially never used.) + result = append(result, isoFileEntry{ + path: entryPath, + size: e.size, + extents: []isoExtent{{lba: e.lba, length: e.size}}, + }) } } return result, nil @@ -605,24 +632,155 @@ func udfWalkAll(rs io.ReadSeeker, dirICB udfLongAD, metaMap []udfMetaSpan, partS allocDescLen = len(feBuf) - allocDescOff } - var fileLBA uint32 - switch allocType { - case 0: - if allocDescLen >= 8 { - ad := udfParseShortAD(feBuf[allocDescOff:], 0) - fileLBA = partStart + ad.block + extents := collectFileExtents(rs, feBuf[allocDescOff:allocDescOff+allocDescLen], allocType, metaMap, partStart, infoLen, fePhys) + if len(extents) == 0 { + continue + } + result = append(result, isoFileEntry{ + path: entryPath, + size: infoLen, + extents: extents, + }) + } + return result, nil +} + +// collectFileExtents walks the allocation descriptors of a UDF File Entry +// (or Extended File Entry), following Allocation Extent Descriptor chains +// when the inline AD area is exhausted, and returns one isoExtent per +// recorded data extent in disc order. +// +// allocType is the lower 3 bits of the FE's ICBTag flags: +// +// 0 → short_ad (8 bytes each) +// 1 → long_ad (16 bytes each) +// 2 → extended ad (20 bytes; rare, treated as short_ad-prefix here) +// 3 → file data embedded in the FE itself (small files) +// +// The high 2 bits of each AD's length field encode the AD "type": +// +// 0 → recorded & allocated extent (real data — emit) +// 1 → not recorded, allocated (sparse — skip, file should not see this on BD) +// 2 → not recorded, not allocated (hole — skip) +// 3 → next AD points at a continuation Allocation Extent Descriptor +// (tag 258) holding more ADs; chase the chain +// +// embeddedFEPhys is only meaningful for allocType 3 (it's the FE's own +// physical sector — the file data is inline at allocDescOff of that +// sector, so we materialise a single synthetic extent pointing at it). +func collectFileExtents(rs io.ReadSeeker, inlineADs []byte, allocType byte, metaMap []udfMetaSpan, partStart uint32, infoLen uint64, embeddedFEPhys uint32) []isoExtent { + if allocType == 3 { + // Embedded data — a single "extent" pointing at the FE sector + // itself with the inline-AD area treated as the file data. We + // can't emit a usable LBA for slicing because the data isn't + // sector-aligned. Skip for now; BD streams never use embedded. + return nil + } + var step int + switch allocType { + case 0: + step = 8 + case 1: + step = 16 + case 2: + step = 20 // first 16 bytes are a long_ad; trailing 4 bytes are impl-use + default: + return nil + } + + var extents []isoExtent + chase := inlineADs + safety := 0 + for { + safety++ + if safety > 4096 { + break // pathological — bail to avoid runaway IO + } + var chain *udfLongAD + for off := 0; off+step <= len(chase); off += step { + lenField := binary.LittleEndian.Uint32(chase[off:]) + adType := lenField >> 30 + adLen := lenField & 0x3FFFFFFF + if adLen == 0 && adType != 3 { + break + } + if adType == 3 { + var loc udfLongAD + switch step { + case 8: + // short_ad continuation: the 4 bytes after length + // are the next AED's logical block; partition is + // implicit (same as parent). + loc = udfLongAD{length: adLen, loc: udfLBA{block: binary.LittleEndian.Uint32(chase[off+4:])}} + default: + loc = udfParseLongAD(chase, off) + } + chain = &loc + break + } + if adType != 0 { + // Type 1 (allocated but not recorded) and type 2 (hole) + // don't carry real bytes. Skip — BD streams shouldn't + // have these in practice. + continue } - case 1: - if allocDescLen >= 16 { - ad := udfParseLongAD(feBuf[allocDescOff:], 0) - fileLBA, _ = udfResolveICB(ad.loc, metaMap, partStart) + var lba uint32 + switch step { + case 8: + ad := udfParseShortAD(chase, off) + resolved, err := udfResolveMetaBlock(ad.block, metaMap, partStart) + if err != nil { + continue + } + lba = resolved + default: + ad := udfParseLongAD(chase, off) + resolved, err := udfResolveICB(ad.loc, metaMap, partStart) + if err != nil { + continue + } + lba = resolved } + extents = append(extents, isoExtent{lba: lba, length: uint64(adLen)}) + } + if chain == nil { + break + } + ps, err := udfResolveICB(chain.loc, metaMap, partStart) + if err != nil { + break + } + _, aedBuf, err := udfReadTag(rs, ps) + if err != nil { + break } - if fileLBA > 0 { - result = append(result, isoFileEntry{path: entryPath, lba: fileLBA, size: infoLen}) + // Allocation Extent Descriptor layout: 16-byte tag + 4-byte + // previous-AED pointer + 4-byte length-of-allocation-descriptors, + // then the ADs themselves. + if len(aedBuf) < 24 { + break } + nextLen := int(binary.LittleEndian.Uint32(aedBuf[20:24])) + if nextLen <= 0 || 24+nextLen > len(aedBuf) { + break + } + chase = aedBuf[24 : 24+nextLen] } - return result, nil + + // Defensive: cap the total extent bytes at the FE's info_length so a + // malformed disc with mis-sized ADs can't return more bytes than the + // file legitimately contains. + var total uint64 + for i := range extents { + if total+extents[i].length > infoLen { + extents[i].length = infoLen - total + extents = extents[:i+1] + break + } + total += extents[i].length + } + _ = embeddedFEPhys + return extents } // ListISOFiles walks the ISO 9660/UDF filesystem and returns all non-directory diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go index b6f599ad..23693efe 100644 --- a/internal/importer/archive/iso/fs_local_test.go +++ b/internal/importer/archive/iso/fs_local_test.go @@ -1,6 +1,7 @@ package iso import ( + "encoding/binary" "fmt" "os" "sort" @@ -48,25 +49,41 @@ func TestLocalISO_DiscoverBigFiles(t *testing.T) { if i >= 25 { break } - t.Logf(" %s size=%d (%.2f MiB) lba=%d", e.path, e.size, float64(e.size)/(1<<20), e.lba) + t.Logf(" %s size=%d (%.2f MiB) extents=%d first_lba=%d", + e.path, e.size, float64(e.size)/(1<<20), len(e.extents), e.firstLBA()) } - // Sanity sentinels for the Avatar disc 1 main-feature clips. Each one - // is >1 GiB on disc, so if any are absent the walker dropped them. + // Sanity sentinels for the Avatar disc 1 main-feature clips. Each is + // >1 GiB and uses many on-disc extents (00022.m2ts has ~945). Assert + // the file is present, the size is right, AND the extents slice fully + // covers it — otherwise downstream concat reads wrong bytes past the + // first extent. want := []string{"BDMV/STREAM/00016.m2ts", "BDMV/STREAM/00022.m2ts", "BDMV/STREAM/00028.m2ts"} - have := make(map[string]uint64, len(entries)) + have := make(map[string]isoFileEntry, len(entries)) for _, e := range entries { - have[e.path] = e.size + have[e.path] = e } for _, w := range want { - size, ok := have[w] + e, ok := have[w] if !ok { t.Errorf("missing %s — walker dropped this file", w) continue } - if size < 1<<30 { + if e.size < 1<<30 { t.Errorf("%s reported size=%d (%.2f MiB), want >1 GiB", - w, size, float64(size)/(1<<20)) + w, e.size, float64(e.size)/(1<<20)) + } + if len(e.extents) < 2 { + t.Errorf("%s has only %d extents — expected multi-extent (BD main-feature clips fragment heavily)", + w, len(e.extents)) + } + var covered uint64 + for _, ext := range e.extents { + covered += ext.length + } + if covered != e.size { + t.Errorf("%s: sum of extent lengths = %d but file size = %d (delta %d)", + w, covered, e.size, int64(e.size)-int64(covered)) } } @@ -74,3 +91,182 @@ func TestLocalISO_DiscoverBigFiles(t *testing.T) { fmt.Println(">>> walker is dropping big files; this is the bug") } } + +// TestLocalISO_CountExtentsForBigFiles probes each entry's File Entry on the +// real ISO and reports how many allocation descriptors a file's data uses. +// The walker today reads only the first AD — if any of the multi-GiB main- +// feature clips reports >1 AD, downstream byte reads past the first extent +// will hit wrong sectors. Gated on ALTMOUNT_LOCAL_ISO same as the discovery +// test. +func TestLocalISO_CountExtentsForBigFiles(t *testing.T) { + path := os.Getenv("ALTMOUNT_LOCAL_ISO") + if path == "" { + t.Skip("ALTMOUNT_LOCAL_ISO not set") + } + f, err := os.Open(path) + if err != nil { + t.Fatalf("open: %v", err) + } + defer f.Close() + + partStart, metaMap, rootICB, err := udfSetup(f) + if err != nil { + t.Fatalf("udfSetup: %v", err) + } + + // Re-walk to get entries plus their ICB so we can re-read each FE and + // count its allocation descriptors. We can't reuse ListISOFiles output + // directly because isoFileEntry discards the ICB. + type probed struct { + path string + size uint64 + ads int // allocation descriptors observed (= number of on-disc extents) + alloc byte + } + + var probedAll []probed + var walk func(dirICB udfLongAD, prefix string) + walk = func(dirICB udfLongAD, prefix string) { + physSect, e := udfResolveICB(dirICB.loc, metaMap, partStart) + if e != nil { + return + } + entries, e := udfReadDirEntries(f, physSect, metaMap, partStart) + if e != nil { + return + } + for _, ent := range entries { + p := ent.name + if prefix != "" { + p = prefix + "/" + ent.name + } + if ent.isDir { + walk(ent.icb, p) + continue + } + fePhys, rerr := udfResolveICB(ent.icb.loc, metaMap, partStart) + if rerr != nil { + continue + } + feTag, feBuf, rerr := udfReadTag(f, fePhys) + if rerr != nil || (feTag.id != 261 && feTag.id != 266) { + continue + } + alloc := feBuf[34] & 0x07 + var adOff, adLen int + if feTag.id == 266 { + eaLen := int(binary.LittleEndian.Uint32(feBuf[208:212])) + adLen = int(binary.LittleEndian.Uint32(feBuf[212:216])) + adOff = 216 + eaLen + } else { + eaLen := int(binary.LittleEndian.Uint32(feBuf[168:172])) + adLen = int(binary.LittleEndian.Uint32(feBuf[172:176])) + adOff = 176 + eaLen + } + if adOff+adLen > len(feBuf) { + adLen = len(feBuf) - adOff + } + // Count extents using the UDF rules: high 2 bits of the + // length field encode the AD "type": + // 0 = recorded and allocated (real extent) + // 1 = not recorded, allocated (sparse / zero-fill) + // 2 = not recorded, not allocated (sparse hole) + // 3 = next AD points at a continuation AED sector, follow it + // We count types 0,1,2 as logical extents (each contributes + // length bytes to the file) and chase type 3 into AED chains. + n := 0 + step := 0 + switch alloc { + case 0: + step = 8 + case 1: + step = 16 + case 2: + step = 20 + case 3: + n = 1 // embedded + } + if step > 0 { + countADs := func(buf []byte) (extents int, chain *udfLongAD) { + for off := 0; off+step <= len(buf); off += step { + lenField := binary.LittleEndian.Uint32(buf[off:]) + adType := lenField >> 30 + adLen := lenField & 0x3FFFFFFF + if adLen == 0 && adType != 3 { + break + } + if adType == 3 { + var loc udfLongAD + switch step { + case 8: + loc = udfLongAD{length: adLen, loc: udfLBA{block: binary.LittleEndian.Uint32(buf[off+4:])}} + case 16: + loc = udfParseLongAD(buf, off) + } + return extents, &loc + } + extents++ + } + return extents, nil + } + cnt, chain := countADs(feBuf[adOff : adOff+adLen]) + n = cnt + safety := 0 + for chain != nil && safety < 100 { + safety++ + ps, e := udfResolveICB(chain.loc, metaMap, partStart) + if e != nil { + break + } + _, aedBuf, e := udfReadTag(f, ps) + if e != nil { + break + } + // AED layout: 16-byte tag + 4-byte previous-AED pointer + // + 4-byte length-of-allocation-descriptors + ADs. + if len(aedBuf) < 24 { + break + } + aedLen := int(binary.LittleEndian.Uint32(aedBuf[20:24])) + if aedLen <= 0 || 24+aedLen > len(aedBuf) { + break + } + more, nextChain := countADs(aedBuf[24 : 24+aedLen]) + n += more + chain = nextChain + } + } + probedAll = append(probedAll, probed{ + path: p, + size: binary.LittleEndian.Uint64(feBuf[56:64]), + ads: n, + alloc: alloc, + }) + } + } + walk(rootICB, "") + + // Report the big files specifically + any file with >1 AD. + sort.Slice(probedAll, func(i, j int) bool { return probedAll[i].size > probedAll[j].size }) + t.Logf("top 15 by size (with extent count):") + for i, p := range probedAll { + if i >= 15 { + break + } + t.Logf(" %s size=%d (%.2f MiB) alloc_type=%d extents=%d", + p.path, p.size, float64(p.size)/(1<<20), p.alloc, p.ads) + } + + multi := 0 + for _, p := range probedAll { + if p.ads > 1 { + multi++ + } + } + t.Logf("files with >1 extent: %d / %d", multi, len(probedAll)) + if multi == 0 { + t.Logf("CONCLUSION: all files are contiguous — single-LBA model is sufficient for this ISO") + } else { + t.Logf("CONCLUSION: fragmentation present — single-LBA walker yields WRONG bytes past extent 1") + } +} diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index 2220b582..71076a29 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -66,29 +66,43 @@ func AnalyzeISO( } // buildFileContent turns one ISO directory entry into an ISOFileContent, -// slicing or referencing the source's Usenet segments according to whether -// the ISO is encrypted. +// emitting one ISONestedSource per on-disc extent. Concatenating the +// sources' byte ranges yields the complete file. This is the path that +// previously fed BAD bytes for multi-extent files like Avatar's 17 GiB +// 00022.m2ts (945 extents) — only the first extent's data was correct. func buildFileContent(src ISOSource, e isoFileEntry) ISOFileContent { - isoOffset := int64(e.lba) * iso9660SectorSize fc := ISOFileContent{ InternalPath: e.path, Filename: filepath.Base(e.path), Size: int64(e.size), + Sources: make([]ISONestedSource, 0, len(e.extents)), } - if len(src.AesKey) == 0 { - // Unencrypted: pre-slice segments so this content stands alone. - sliced, _ := sliceSegmentsForRange(src.Segments, isoOffset, int64(e.size)) - fc.Segments = sliced - } else { - // Encrypted: AES-CBC requires the full inner volume + offset so - // the cipher can chain IVs from the start of the ISO. - fc.NestedSource = &ISONestedSource{ - Segments: src.Segments, - AesKey: src.AesKey, - AesIV: src.AesIV, - InnerOffset: isoOffset, - InnerLength: int64(e.size), - InnerVolumeSize: src.Size, + for _, ext := range e.extents { + isoOffset := int64(ext.lba) * iso9660SectorSize + extLen := int64(ext.length) + if len(src.AesKey) == 0 { + // Unencrypted: pre-slice outer segments to cover this extent + // only. The downstream nested reader treats InnerOffset as + // an offset within the (already-sliced) segment chain. + sliced, _ := sliceSegmentsForRange(src.Segments, isoOffset, extLen) + fc.Sources = append(fc.Sources, ISONestedSource{ + Segments: sliced, + InnerOffset: 0, + InnerLength: extLen, + InnerVolumeSize: extLen, + }) + } else { + // Encrypted: AES-CBC needs the IV chain from byte 0 of the + // outer ISO, so every source gets the full outer segments + // and the cipher seeks via InnerOffset. + fc.Sources = append(fc.Sources, ISONestedSource{ + Segments: src.Segments, + AesKey: src.AesKey, + AesIV: src.AesIV, + InnerOffset: isoOffset, + InnerLength: extLen, + InnerVolumeSize: src.Size, + }) } } return fc diff --git a/internal/importer/archive/iso/types.go b/internal/importer/archive/iso/types.go index b755fe1e..09e0aad5 100644 --- a/internal/importer/archive/iso/types.go +++ b/internal/importer/archive/iso/types.go @@ -11,27 +11,34 @@ type ISOSource struct { Size int64 // Decrypted ISO size } -// ISOFileContent represents one file found inside the ISO. +// ISOFileContent represents one file found inside the ISO. The file's +// data may be split across multiple on-disc extents (Blu-ray main-feature +// M2TS files routinely use hundreds), so Sources is a slice of inner +// sources in disc order. Concatenating their byte ranges yields the +// complete file content. type ISOFileContent struct { InternalPath string // e.g. "BDMV/STREAM/00001.m2ts" Filename string // Base filename - Size int64 // File size in bytes + Size int64 // Total file size in bytes (sum of Sources.InnerLength) NzbdavID string // Carried from parent archive Content - // Unencrypted case: Segments sliced to cover exactly this file - Segments []*metapb.SegmentData - // Encrypted case: nil Segments + populated NestedSource - NestedSource *ISONestedSource + Sources []ISONestedSource } -// ISONestedSource holds everything needed to decrypt and seek into the ISO -// for a single inner file. +// ISONestedSource is one extent of an inner file. For unencrypted ISOs, +// Segments is pre-sliced to cover exactly this extent and AesKey is nil +// (InnerOffset is 0, InnerLength equals the extent length). For encrypted +// ISOs, AesKey/AesIV are populated, Segments cover the full outer ISO, +// InnerOffset is the byte offset of this extent within the decrypted +// ISO, and InnerVolumeSize is the full decrypted ISO size — the cipher +// chain needs to start at byte 0 so multi-extent encrypted reads use +// the same outer-ISO data with different inner offsets. type ISONestedSource struct { Segments []*metapb.SegmentData AesKey []byte AesIV []byte - InnerOffset int64 // lba * 2048 - InnerLength int64 // file size - InnerVolumeSize int64 // ISO total decrypted size + InnerOffset int64 + InnerLength int64 + InnerVolumeSize int64 } // AnalyzedISO is the full result of inspecting one ISO image. Files mirrors diff --git a/internal/importer/archive/iso_expansion.go b/internal/importer/archive/iso_expansion.go index daa8e4fc..d9d9fbbf 100644 --- a/internal/importer/archive/iso_expansion.go +++ b/internal/importer/archive/iso_expansion.go @@ -145,12 +145,13 @@ func buildMainFeatureContent(ctx context.Context, groupKey string, g []analyzedI nzbdavID = e.src.NzbdavID } for _, fc := range e.analyzed.MainFeature { - ns := isoFileContentToNestedSource(fc) - if ns.InnerLength <= 0 { - continue + for _, ns := range isoFileContentToNestedSources(fc) { + if ns.InnerLength <= 0 { + continue + } + sources = append(sources, ns) + totalSize += ns.InnerLength } - sources = append(sources, ns) - totalSize += ns.InnerLength } } if len(sources) == 0 { @@ -194,36 +195,30 @@ func buildLargestFileContent(src Content, files []iso.ISOFileContent) (Content, NzbdavID: src.NzbdavID, ISOExpansionIndex: 1, } - if f.NestedSource != nil { - nc.NestedSources = []NestedSource{isoFileContentToNestedSource(f)} - } else { - nc.Segments = f.Segments + nc.NestedSources = isoFileContentToNestedSources(f) + if len(nc.NestedSources) == 0 { + return Content{}, false } return nc, true } -// isoFileContentToNestedSource converts an ISOFileContent into a -// NestedSource. For unencrypted ISOs the segments are already sliced to -// cover exactly this file, so InnerOffset is 0 and InnerVolumeSize equals -// the file size (unused when AesKey is empty — see -// MetadataVirtualFile.createNestedSourceReader). -func isoFileContentToNestedSource(fc iso.ISOFileContent) NestedSource { - if fc.NestedSource != nil { - return NestedSource{ - Segments: fc.NestedSource.Segments, - AesKey: fc.NestedSource.AesKey, - AesIV: fc.NestedSource.AesIV, - InnerOffset: fc.NestedSource.InnerOffset, - InnerLength: fc.NestedSource.InnerLength, - InnerVolumeSize: fc.NestedSource.InnerVolumeSize, - } - } - return NestedSource{ - Segments: fc.Segments, - InnerOffset: 0, - InnerLength: fc.Size, - InnerVolumeSize: fc.Size, +// isoFileContentToNestedSources fans an ISOFileContent's on-disc extents +// out into one NestedSource per extent, preserving disc order. Concating +// the resulting sources yields the file's bytes — the multi-extent fix +// for Blu-ray main-feature M2TS files lives here. +func isoFileContentToNestedSources(fc iso.ISOFileContent) []NestedSource { + out := make([]NestedSource, 0, len(fc.Sources)) + for _, s := range fc.Sources { + out = append(out, NestedSource{ + Segments: s.Segments, + AesKey: s.AesKey, + AesIV: s.AesIV, + InnerOffset: s.InnerOffset, + InnerLength: s.InnerLength, + InnerVolumeSize: s.InnerVolumeSize, + }) } + return out } // discSuffixPattern matches volume labels like "AVATAR_FIRE_AND_ASH_DISC_1", diff --git a/internal/importer/archive/iso_expansion_test.go b/internal/importer/archive/iso_expansion_test.go index e009e615..bff8bfd1 100644 --- a/internal/importer/archive/iso_expansion_test.go +++ b/internal/importer/archive/iso_expansion_test.go @@ -63,50 +63,78 @@ func TestParseDiscNumber(t *testing.T) { } } -func TestIsoFileContentToNestedSource(t *testing.T) { +func TestIsoFileContentToNestedSources(t *testing.T) { t.Parallel() - t.Run("unencrypted uses pre-sliced segments", func(t *testing.T) { + t.Run("single unencrypted extent → one NestedSource", func(t *testing.T) { t.Parallel() - segs := []*metapb.SegmentData{ - {Id: "a", StartOffset: 0, EndOffset: 99, SegmentSize: 100}, - } fc := iso.ISOFileContent{ Filename: "00001.m2ts", Size: 100, - Segments: segs, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{{Id: "a", StartOffset: 0, EndOffset: 99, SegmentSize: 100}}, + InnerOffset: 0, + InnerLength: 100, + InnerVolumeSize: 100, + }}, } - ns := isoFileContentToNestedSource(fc) - if len(ns.Segments) != 1 || ns.InnerLength != 100 || ns.InnerOffset != 0 { - t.Fatalf("unexpected NestedSource: %+v", ns) + got := isoFileContentToNestedSources(fc) + if len(got) != 1 { + t.Fatalf("want 1 source, got %d", len(got)) } - if len(ns.AesKey) != 0 { - t.Errorf("AesKey should be empty, got %v", ns.AesKey) + if got[0].InnerLength != 100 || got[0].InnerOffset != 0 || len(got[0].AesKey) != 0 { + t.Fatalf("unexpected NestedSource: %+v", got[0]) } }) - t.Run("encrypted carries offset and key", func(t *testing.T) { + t.Run("multi-extent file → one NestedSource per extent in order", func(t *testing.T) { t.Parallel() - segs := []*metapb.SegmentData{ - {Id: "outer", StartOffset: 0, EndOffset: 99999, SegmentSize: 100000}, + // The bug we just fixed: a 17 GiB M2TS spans hundreds of extents. + // Each extent must become its own NestedSource so the downstream + // concat reader stitches them in disc order. + fc := iso.ISOFileContent{ + Filename: "00022.m2ts", + Size: 30, + Sources: []iso.ISONestedSource{ + {Segments: []*metapb.SegmentData{{Id: "e1"}}, InnerLength: 10}, + {Segments: []*metapb.SegmentData{{Id: "e2"}}, InnerLength: 10}, + {Segments: []*metapb.SegmentData{{Id: "e3"}}, InnerLength: 10}, + }, } + got := isoFileContentToNestedSources(fc) + if len(got) != 3 { + t.Fatalf("want 3 sources, got %d", len(got)) + } + wantIDs := []string{"e1", "e2", "e3"} + for i, ns := range got { + if len(ns.Segments) != 1 || ns.Segments[0].Id != wantIDs[i] { + t.Errorf("source %d: want segment id %q, got %+v", i, wantIDs[i], ns.Segments) + } + } + }) + + t.Run("encrypted source carries key + IV through", func(t *testing.T) { + t.Parallel() fc := iso.ISOFileContent{ Filename: "00001.m2ts", Size: 2048, - NestedSource: &iso.ISONestedSource{ - Segments: segs, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{{Id: "outer", StartOffset: 0, EndOffset: 99999, SegmentSize: 100000}}, AesKey: []byte("0123456789abcdef0123456789abcdef"), AesIV: []byte("0123456789abcdef"), InnerOffset: 1024, InnerLength: 2048, InnerVolumeSize: 99999, - }, + }}, + } + got := isoFileContentToNestedSources(fc) + if len(got) != 1 { + t.Fatalf("want 1 source, got %d", len(got)) } - ns := isoFileContentToNestedSource(fc) - if ns.InnerOffset != 1024 || ns.InnerLength != 2048 || ns.InnerVolumeSize != 99999 { - t.Fatalf("unexpected NestedSource offsets: %+v", ns) + if got[0].InnerOffset != 1024 || got[0].InnerLength != 2048 || got[0].InnerVolumeSize != 99999 { + t.Fatalf("offsets mangled: %+v", got[0]) } - if len(ns.AesKey) == 0 { + if len(got[0].AesKey) == 0 { t.Error("AesKey should be carried through for encrypted source") } }) @@ -123,9 +151,13 @@ func TestBuildMainFeatureContent_TwoDiscs(t *testing.T) { return iso.ISOFileContent{ Filename: name, Size: size, - Segments: []*metapb.SegmentData{ - {Id: name, StartOffset: 0, EndOffset: size - 1, SegmentSize: size}, - }, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{ + {Id: name, StartOffset: 0, EndOffset: size - 1, SegmentSize: size}, + }, + InnerLength: size, + InnerVolumeSize: size, + }}, } } @@ -188,13 +220,20 @@ func TestBuildMainFeatureContent_TwoDiscs(t *testing.T) { func TestBuildLargestFileContent(t *testing.T) { t.Parallel() + mkFile := func(name string, size int64, segID string) iso.ISOFileContent { + return iso.ISOFileContent{ + Filename: name, + Size: size, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{{Id: segID, StartOffset: 0, EndOffset: size - 1, SegmentSize: size}}, + InnerLength: size, + InnerVolumeSize: size, + }}, + } + } files := []iso.ISOFileContent{ - {Filename: "small.mkv", Size: 500, Segments: []*metapb.SegmentData{ - {Id: "s", StartOffset: 0, EndOffset: 499, SegmentSize: 500}, - }}, - {Filename: "big.mkv", Size: 5_000_000, Segments: []*metapb.SegmentData{ - {Id: "b", StartOffset: 0, EndOffset: 4_999_999, SegmentSize: 5_000_000}, - }}, + mkFile("small.mkv", 500, "s"), + mkFile("big.mkv", 5_000_000, "b"), } src := Content{Filename: "thing.iso", NzbdavID: "id-1"} From a7cfcc17f2c7dd6212e060e5b2c9ef3b538b2308 Mon Sep 17 00:00:00 2001 From: javi11 Date: Sun, 24 May 2026 17:29:24 +0200 Subject: [PATCH 8/8] perf(iso): coalesce physically contiguous UDF extents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A BD3D SSIF often emits a dozen separate UDF allocation descriptors for what's a single contiguous run of sectors on disc. After the multi- extent fix, each AD became its own NestedSource — bloating the proto metadata, the validation-sample surface, and the per-file open-handle count for what is logically one extent. coalesceExtents merges adjacent extents whose physical sectors follow the previous extent's last sector. Measured against the real Avatar disc 1 ISO: - BDMV/STREAM/SSIF/00022.ssif (22 GiB): 23 extents -> 2 - BDMV/STREAM/SSIF/00028.ssif (7 GiB): 7 extents -> 1 - BDMV/STREAM/SSIF/00016.ssif (6 GiB): 6 extents -> 1 M2TS files keep their full extent list because BD authoring genuinely interleaves the M2TS clips with the SSIF dependent-view data on disc. Note: the recent import failure ("not a valid ISO 9660 or UDF image" on disc 1, segment "44c89668..." unreachable during validation) is a Usenet-side issue — disc 2 analysed cleanly in 30 seconds with the same code path; disc 1 timed out reading its first sectors for 9 minutes before giving up. The coalescing change reduces the surface where transient flakes can bite but cannot eliminate it. --- internal/importer/archive/iso/fs.go | 35 +++++++++++++ .../importer/archive/iso/fs_local_test.go | 50 +++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index d63c6e23..31415fbd 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -779,10 +779,45 @@ func collectFileExtents(rs io.ReadSeeker, inlineADs []byte, allocType byte, meta } total += extents[i].length } + + // Coalesce physically contiguous extents — many BD3D SSIF files have + // dozens of small ADs that sit right next to each other on disc. The + // underlying bytes are one contiguous run; merging the ADs collapses + // the NestedSources count proportionally (Avatar SSIF: 23 → 2) and + // shrinks both the metadata proto and the validation surface. + extents = coalesceExtents(extents) _ = embeddedFEPhys return extents } +// coalesceExtents merges adjacent extents whose physical sectors are +// contiguous (next.lba == prev.lba + prev.length/sector). Returns the +// possibly-shorter slice in disc order. A file whose extents are +// physically scattered (typical for BD M2TS clips interleaved with SSIF +// dependent-view data) is returned unchanged. +func coalesceExtents(in []isoExtent) []isoExtent { + if len(in) <= 1 { + return in + } + out := make([]isoExtent, 0, len(in)) + cur := in[0] + for i := 1; i < len(in); i++ { + next := in[i] + // length must be a whole number of sectors for the contiguity + // arithmetic to apply; if it isn't (final partial sector of a + // file), fall through and start a new run after. + if cur.length%iso9660SectorSize == 0 && + next.lba == cur.lba+uint32(cur.length/iso9660SectorSize) { + cur.length += next.length + continue + } + out = append(out, cur) + cur = next + } + out = append(out, cur) + return out +} + // ListISOFiles walks the ISO 9660/UDF filesystem and returns all non-directory // entries. It tries UDF first (correct 64-bit sizes, authoritative for Blu-ray) // and falls back to ISO 9660 for plain discs without UDF. diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go index 23693efe..1c52d65b 100644 --- a/internal/importer/archive/iso/fs_local_test.go +++ b/internal/importer/archive/iso/fs_local_test.go @@ -270,3 +270,53 @@ func TestLocalISO_CountExtentsForBigFiles(t *testing.T) { t.Logf("CONCLUSION: fragmentation present — single-LBA walker yields WRONG bytes past extent 1") } } + +// TestLocalISO_CountAdjacentExtents checks whether multi-extent files have +// physically contiguous extents that could be coalesced. If yes, segment +// count downstream can be reduced dramatically — the importer hit +// total_segments_to_validate=888,903 on this NZB precisely because every +// AD became its own NestedSource even when adjacent ADs sat next to each +// other on disc. +func TestLocalISO_CountAdjacentExtents(t *testing.T) { + path := os.Getenv("ALTMOUNT_LOCAL_ISO") + if path == "" { + t.Skip("ALTMOUNT_LOCAL_ISO not set") + } + f, err := os.Open(path) + if err != nil { + t.Fatalf("open: %v", err) + } + defer f.Close() + + entries, err := ListISOFiles(f) + if err != nil { + t.Fatalf("list: %v", err) + } + sort.Slice(entries, func(i, j int) bool { return entries[i].size > entries[j].size }) + + const lookAt = 15 + for i, e := range entries { + if i >= lookAt { + break + } + if len(e.extents) <= 1 { + continue + } + // Count adjacent runs (where next.lba == this.lba + this.length/sector). + adjacent := 0 + distinctRuns := 1 + for j := 1; j < len(e.extents); j++ { + prev := e.extents[j-1] + next := e.extents[j] + expectedNextLBA := prev.lba + uint32(prev.length/iso9660SectorSize) + if next.lba == expectedNextLBA { + adjacent++ + } else { + distinctRuns++ + } + } + t.Logf(" %s: extents=%d adjacent_pairs=%d distinct_runs=%d coalesce_ratio=%.1fx", + e.path, len(e.extents), adjacent, distinctRuns, + float64(len(e.extents))/float64(distinctRuns)) + } +}