From 0bf9208ee20296a1a59a9b5343df868c5887a1ec Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 7 May 2026 13:13:49 -0400 Subject: [PATCH 1/2] parse: Consume record-size padding in ParseEvent::End MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xref https://github.com/composefs/composefs-rs#290 GNU tar and Python's tarfile both pad archives to a record boundary (default 20×512 = 10,240 bytes) after the two mandatory end-of-archive zero blocks. Previously ParseEvent::End only reported consumed: 2×512, leaving the trailing padding bytes unaccounted for. Normally this doesn't even matter, because callers will only be interested in parsing the stream and discarding the underlying bytes. But composefs wants to do a "splitstream" which preserves those bytes. Assisted-by: OpenCode (claude-sonnet-4-6@default) Signed-off-by: Colin Walters --- src/parse.rs | 34 +++++-- tests/stream_parser.rs | 207 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 234 insertions(+), 7 deletions(-) diff --git a/src/parse.rs b/src/parse.rs index 28e9ef2..4c62b04 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -830,9 +830,15 @@ impl Parser { if !slices.is_empty() { return Err(ParseError::OrphanedMetadata); } - return Ok(ParseEvent::End { - consumed: 2 * HEADER_SIZE, - }); + // Consume the two mandatory EOA blocks plus any additional + // record-size padding (GNU tar pads to 20×512, Go's archive/tar + // pads to 4×512, etc.). + let consumed = input + .chunks_exact(HEADER_SIZE) + .take_while(|block| block.iter().all(|&b| b == 0)) + .count() + * HEADER_SIZE; + return Ok(ParseEvent::End { consumed }); } // Not end of archive — single stray zero block; skip it and // continue with the next block as a header. @@ -1697,9 +1703,16 @@ mod tests { other => panic!("Expected Entry, got {:?}", other), } - // Now parse end + // Now parse end — consumed must be >= 1024 (two EOA blocks) and + // may include additional trailing zeros from the 2048-byte buffer. let event = parser.parse(&data[512..]).unwrap(); - assert!(matches!(event, ParseEvent::End { consumed: 1024 })); + match event { + ParseEvent::End { consumed } => assert!( + consumed >= 1024, + "expected consumed >= 1024, got {consumed}" + ), + other => panic!("Expected End, got {:?}", other), + } } #[test] @@ -1743,9 +1756,16 @@ mod tests { // Content at data[512..517], padded to 512. // Caller skips past content + padding, then parses the next header. - // Parse end (zero blocks at 1024..2048) + // Parse end (zero blocks at 1024..2048) — consumed must be >= 1024 + // and may include additional trailing zeros from the 2560-byte buffer. let event = parser.parse(&data[1024..]).unwrap(); - assert!(matches!(event, ParseEvent::End { consumed: 1024 })); + match event { + ParseEvent::End { consumed } => assert!( + consumed >= 1024, + "expected consumed >= 1024, got {consumed}" + ), + other => panic!("Expected End, got {:?}", other), + } } // ========================================================================= diff --git a/tests/stream_parser.rs b/tests/stream_parser.rs index 0ed381c..048a6b2 100644 --- a/tests/stream_parser.rs +++ b/tests/stream_parser.rs @@ -1083,3 +1083,210 @@ mod proptest_tests { } } } + +// ============================================================================= +// GNU/POSIX record-size padding tests +// ============================================================================= + +/// GNU tar default record size: 20 blocks × 512 bytes = 10,240 bytes. +const GNU_RECORD_SIZE: usize = 20 * HEADER_SIZE; + +/// Pad a tar archive to the given total size by appending zero bytes. +/// +/// `target_size` must be >= `data.len()` and a multiple of `HEADER_SIZE`. +fn pad_to(data: Vec, target_size: usize) -> Vec { + assert_eq!( + target_size % HEADER_SIZE, + 0, + "target_size {target_size} is not a multiple of HEADER_SIZE {HEADER_SIZE}", + ); + assert!( + target_size >= data.len(), + "target_size {target_size} < data.len() {}", + data.len() + ); + let mut out = data; + out.resize(target_size, 0u8); + out +} + +#[test] +fn test_gnu_record_size_padding() { + // Build a minimal archive with one file then pad it to GNU_RECORD_SIZE. + let base = create_tar_with(|b| { + append_file(b, "hello.txt", b"hello"); + }); + // The base archive must be <= GNU_RECORD_SIZE to be paddable. + assert!( + base.len() <= GNU_RECORD_SIZE, + "base archive too large: {} bytes", + base.len() + ); + let data = pad_to(base, GNU_RECORD_SIZE); + assert_eq!(data.len(), GNU_RECORD_SIZE); + + // Must parse exactly 1 entry with no error under all chunk sizes. + for &(seed, mc) in TEST_READ_PARAMS { + let entries = parse_all_chunked(&data, seed, mc); + assert_eq!( + entries.len(), + 1, + "expected 1 entry (seed={seed}, max_chunk={mc})" + ); + assert_eq!( + entries[0].path, b"hello.txt", + "wrong path (seed={seed}, max_chunk={mc})" + ); + } +} + +#[test] +fn test_gnu_record_size_padding_sans_io() { + use tar_core::parse::{ParseEvent, Parser}; + + let base = create_tar_with(|b| { + append_file(b, "hello.txt", b"hello"); + }); + let data = pad_to(base, GNU_RECORD_SIZE); + + let mut parser = Parser::new(Limits::default()); + let mut offset = 0usize; + let mut entry_count = 0usize; + + loop { + match parser.parse(&data[offset..]).expect("parse should succeed") { + ParseEvent::NeedData { .. } => panic!("unexpected NeedData"), + ParseEvent::Entry { consumed, entry } => { + entry_count += 1; + let padded = entry.padded_size() as usize; + // Verify path before the entry is dropped. + assert_eq!(entry.path.as_ref(), b"hello.txt"); + offset += consumed + padded; + } + ParseEvent::SparseEntry { + consumed, entry, .. + } => { + entry_count += 1; + offset += consumed + entry.padded_size() as usize; + } + ParseEvent::GlobalExtensions { consumed, .. } => { + offset += consumed; + } + ParseEvent::End { consumed } => { + // The consumed count should cover all padding through GNU_RECORD_SIZE. + assert_eq!( + offset + consumed, + GNU_RECORD_SIZE, + "End consumed should cover the full padded archive" + ); + break; + } + } + } + + assert_eq!(entry_count, 1, "expected 1 entry from sans-IO parser"); +} + +#[test] +fn test_record_padding_various_sizes() { + // Test different total archive sizes (all zero-padded from the end-of-archive marker). + let base = create_tar_with(|b| { + append_file(b, "test.txt", b"data"); + }); + + let base_len = base.len(); + + // Round up base_len to the next multiple of HEADER_SIZE. + let min_target = base_len.next_multiple_of(HEADER_SIZE); + + // Try several target sizes that are >= base_len and multiples of HEADER_SIZE. + let target_block_counts = [ + min_target / HEADER_SIZE, // minimum: just enough to hold the base + 5, + 20, // GNU_RECORD_SIZE / HEADER_SIZE + 21, + ]; + + for &block_count in &target_block_counts { + let target = block_count * HEADER_SIZE; + if target < base_len { + // Skip targets smaller than the archive itself. + continue; + } + let data = pad_to(base.clone(), target); + assert_eq!(data.len(), target); + + for &(seed, mc) in TEST_READ_PARAMS { + let entries = parse_all_chunked(&data, seed, mc); + assert_eq!( + entries.len(), + 1, + "expected 1 entry with {block_count} blocks of padding (seed={seed}, max_chunk={mc})" + ); + } + } +} + +#[test] +fn test_real_gnu_tar_padding() { + use std::process::Command; + + // Check if `tar` is available. + let tar_available = Command::new("tar") + .arg("--version") + .output() + .map(|o| o.status.success()) + .unwrap_or(false); + + if !tar_available { + eprintln!("skipping test_real_gnu_tar_padding: tar not available"); + return; + } + + // Create a temp directory with one file, then build a real GNU tar archive. + let tmp = tempfile::TempDir::new().expect("create tempdir"); + let file_path = tmp.path().join("hello.txt"); + std::fs::write(&file_path, b"hello from real tar").expect("write test file"); + + let tar_path = tmp.path().join("test.tar"); + + // Build a PAX-format tar archive with the default record size (20 blocks). + let status = Command::new("tar") + .args([ + "--format=pax", + "-cf", + tar_path.to_str().unwrap(), + "-C", + tmp.path().to_str().unwrap(), + "hello.txt", + ]) + .status() + .expect("run tar"); + + if !status.success() { + eprintln!("skipping test_real_gnu_tar_padding: tar failed to create archive"); + return; + } + + let data = std::fs::read(&tar_path).expect("read tar archive"); + + assert!( + data.len() % HEADER_SIZE == 0, + "archive not aligned to block size: {} bytes", + data.len() + ); + + // Parse with all chunk sizes — must succeed with exactly 1 entry. + for &(seed, mc) in TEST_READ_PARAMS { + let entries = parse_all_chunked(&data, seed, mc); + assert_eq!( + entries.len(), + 1, + "expected 1 entry from real GNU tar archive (seed={seed}, max_chunk={mc})" + ); + assert_eq!( + entries[0].path, b"hello.txt", + "wrong path (seed={seed}, max_chunk={mc})" + ); + } +} From b80d67655335688b37c1af40b8f4efe55df7e63c Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 7 May 2026 13:35:06 -0400 Subject: [PATCH 2/2] ci: Fix fuzz job by pinning cargo-fuzz 0.13.1 without --locked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The published lockfile for cargo-fuzz pins rustix 0.36, which uses rustc_layout_scalar_valid_range_* attributes that nightly ≥ 1.97 (2026-05-03) now reserves for compiler-internal use, causing a compile error and breaking the fuzz CI job. Pin to 0.13.1 and drop --locked so cargo resolves fresh deps and picks up a compatible rustix version. Mirrors the same fix applied in composefs-rs (commit 7c9a927f). Assisted-by: OpenCode (claude-sonnet-4-6@default) Signed-off-by: Colin Walters --- .github/workflows/ci.yaml | 5 ++++- .github/workflows/fuzz-extended.yaml | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3676d87..fbc0b42 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -87,7 +87,10 @@ jobs: - name: Install nightly toolchain uses: dtolnay/rust-toolchain@nightly - name: Install cargo-fuzz - run: cargo install cargo-fuzz --locked + # Pin to 0.13.1 without --locked so cargo resolves fresh deps; the + # published lockfile pins rustix 0.36 which fails with nightly ≥ 1.97 + # (rustc_layout_scalar_valid_range_* attributes now reserved for rustc). + run: cargo install cargo-fuzz@0.13.1 - name: Install just uses: extractions/setup-just@v3 - name: Cache Dependencies diff --git a/.github/workflows/fuzz-extended.yaml b/.github/workflows/fuzz-extended.yaml index fb72b06..c75569b 100644 --- a/.github/workflows/fuzz-extended.yaml +++ b/.github/workflows/fuzz-extended.yaml @@ -23,7 +23,10 @@ jobs: - name: Install nightly toolchain uses: dtolnay/rust-toolchain@nightly - name: Install cargo-fuzz - run: cargo install cargo-fuzz --locked + # Pin to 0.13.1 without --locked so cargo resolves fresh deps; the + # published lockfile pins rustix 0.36 which fails with nightly ≥ 1.97 + # (rustc_layout_scalar_valid_range_* attributes now reserved for rustc). + run: cargo install cargo-fuzz@0.13.1 - name: Install just uses: extractions/setup-just@v3 - name: Cache Dependencies