From 74c676fe2c0f7ca4960d99b32c5f147545a0d66a Mon Sep 17 00:00:00 2001 From: dts Date: Wed, 29 Apr 2026 18:28:27 +0200 Subject: [PATCH 1/3] fix: cap decompress size hint when caller passes None The decompress() function previously computed an implicit size hint as compressed.len() * 100 (LZ4) or * 10 (Zstd) when the caller passed expected_size = None. The unchecked multiplication can overflow usize (panic) and even when it doesn't, allocating up to 100x the compressed size on attacker-supplied or otherwise-large inputs is a clear memory-blowup hazard. atompack's own callers always pass Some(uncompressed_size) from the on-disk index entry, so this is a defensive fix for external callers of the public lib.rs::decompress_bytes re-export, not a regression in any internal path. Changes: - Use saturating_mul + a 1 GiB cap (DEFAULT_MAX_DECOMPRESSED_SIZE) when expected_size = None. - For LZ4, validate the hint fits in i32 before passing to lz4::block, returning a clear error if it doesn't (previously cast wrapped). - Added two unit tests: one that exercises the None-hint path on both codecs, and one that confirms an oversized i32 hint errors cleanly. --- atompack/src/compression.rs | 69 ++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 9 deletions(-) diff --git a/atompack/src/compression.rs b/atompack/src/compression.rs index 48a5811..e06b5fe 100644 --- a/atompack/src/compression.rs +++ b/atompack/src/compression.rs @@ -53,7 +53,19 @@ pub fn compress(data: &[u8], compression: CompressionType) -> Result> { } } -/// Decompress bytes that were compressed with the specified algorithm +/// Cap on the auto-derived decompressed-size hint when the caller passes +/// `expected_size = None`. atompack's own callers always pass `Some(...)` +/// from the index entry; this cap protects external callers of the public +/// `decompress`/`decompress_bytes` re-export from accidental memory blowup +/// (or `usize` overflow) on attacker-supplied or otherwise-large inputs. +const DEFAULT_MAX_DECOMPRESSED_SIZE: usize = 1 << 30; // 1 GiB + +/// Decompress bytes that were compressed with the specified algorithm. +/// +/// `expected_size` is the upper bound on the decompressed payload. When +/// `None`, a heuristic derived from the compressed size is used, capped at +/// 1 GiB so unbounded multiplication can't overflow `usize` or trigger a +/// pathological allocation. pub fn decompress( compressed: &[u8], compression: CompressionType, @@ -62,16 +74,31 @@ pub fn decompress( match compression { CompressionType::None => Ok(compressed.to_vec()), CompressionType::Lz4 => { - // LZ4 decompression - if we have expected size, use it as a hint - let max_size = expected_size.unwrap_or(compressed.len() * 100); // Conservative estimate - lz4::block::decompress(compressed, Some(max_size as i32)) + let max_size = expected_size.unwrap_or_else(|| { + compressed + .len() + .saturating_mul(100) + .min(DEFAULT_MAX_DECOMPRESSED_SIZE) + }); + let max_i32: i32 = max_size.try_into().map_err(|_| { + Error::Compression(format!( + "LZ4 decompressed-size hint {} exceeds i32::MAX", + max_size + )) + })?; + lz4::block::decompress(compressed, Some(max_i32)) .map_err(|e| Error::Compression(format!("LZ4 decompression failed: {}", e))) } - CompressionType::Zstd(_) => zstd::bulk::decompress( - compressed, - expected_size.unwrap_or(compressed.len() * 10), // Estimate if not provided - ) - .map_err(|e| Error::Compression(format!("Zstd decompression failed: {}", e))), + CompressionType::Zstd(_) => { + let capacity = expected_size.unwrap_or_else(|| { + compressed + .len() + .saturating_mul(10) + .min(DEFAULT_MAX_DECOMPRESSED_SIZE) + }); + zstd::bulk::decompress(compressed, capacity) + .map_err(|e| Error::Compression(format!("Zstd decompression failed: {}", e))) + } } } @@ -114,6 +141,30 @@ mod tests { } } + #[test] + fn test_decompress_without_expected_size_does_not_panic() { + // Compressed inputs with no caller-supplied size hint must not panic + // on the internal heuristic, regardless of compressed length. + for compression in &[CompressionType::Lz4, CompressionType::Zstd(3)] { + let original = b"small payload"; + let compressed = compress(original, *compression).unwrap(); + let out = decompress(&compressed, *compression, None).unwrap(); + assert_eq!(out.as_slice(), original.as_slice()); + } + } + + #[test] + fn test_decompress_lz4_rejects_size_hint_exceeding_i32() { + // The lz4 crate takes a signed i32 buffer-size hint; if the caller + // passes a usize larger than i32::MAX, decompress must error cleanly + // instead of silently wrapping. + let original = b"hello"; + let compressed = compress(original, CompressionType::Lz4).unwrap(); + let bad_hint = (i32::MAX as usize) + 1; + let err = decompress(&compressed, CompressionType::Lz4, Some(bad_hint)); + assert!(err.is_err()); + } + #[test] fn test_atom_compression() { let atoms = vec![ From d794e1ae7f6542f5fa5365b992571f72c700dd58 Mon Sep 17 00:00:00 2001 From: dts Date: Wed, 29 Apr 2026 19:18:38 +0200 Subject: [PATCH 2/3] test: tighten compression cap tests after independent review - Extract auto_max_size() helper so the saturating + cap math is unit- testable without round-tripping a multi-MiB payload. - New test_auto_max_size_caps_at_default_max asserts: - small inputs pass through unchanged, - usize::MAX saturates to the 1 GiB cap, - len*multiplier just above the cap clamps to the cap exactly. This actually exercises the saturate+clamp guard the PR claims to add. - test_decompress_lz4_rejects_size_hint_exceeding_i32 now asserts the specific Error::Compression variant and string-matches "exceeds i32::MAX" so a future change that breaks the try_into guard but errors elsewhere will fail loudly instead of pass. - Renamed test_decompress_without_expected_size_does_not_panic to ..._roundtrips_small_input to be honest: it's a happy-path roundtrip, not a guard test. --- atompack/src/compression.rs | 63 ++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/atompack/src/compression.rs b/atompack/src/compression.rs index e06b5fe..f768804 100644 --- a/atompack/src/compression.rs +++ b/atompack/src/compression.rs @@ -60,12 +60,24 @@ pub fn compress(data: &[u8], compression: CompressionType) -> Result> { /// (or `usize` overflow) on attacker-supplied or otherwise-large inputs. const DEFAULT_MAX_DECOMPRESSED_SIZE: usize = 1 << 30; // 1 GiB +/// Compute the auto-derived decompressed-size hint. +/// +/// Uses `saturating_mul` so the multiplication can't overflow `usize`, then +/// clamps to `DEFAULT_MAX_DECOMPRESSED_SIZE` so a pathological compressed +/// input can't drive an arbitrary allocation. +fn auto_max_size(compressed_len: usize, multiplier: usize) -> usize { + compressed_len + .saturating_mul(multiplier) + .min(DEFAULT_MAX_DECOMPRESSED_SIZE) +} + /// Decompress bytes that were compressed with the specified algorithm. /// /// `expected_size` is the upper bound on the decompressed payload. When /// `None`, a heuristic derived from the compressed size is used, capped at /// 1 GiB so unbounded multiplication can't overflow `usize` or trigger a -/// pathological allocation. +/// pathological allocation. Callers that legitimately need >1 GiB outputs +/// must pass an explicit `Some(n)`. pub fn decompress( compressed: &[u8], compression: CompressionType, @@ -74,12 +86,7 @@ pub fn decompress( match compression { CompressionType::None => Ok(compressed.to_vec()), CompressionType::Lz4 => { - let max_size = expected_size.unwrap_or_else(|| { - compressed - .len() - .saturating_mul(100) - .min(DEFAULT_MAX_DECOMPRESSED_SIZE) - }); + let max_size = expected_size.unwrap_or_else(|| auto_max_size(compressed.len(), 100)); let max_i32: i32 = max_size.try_into().map_err(|_| { Error::Compression(format!( "LZ4 decompressed-size hint {} exceeds i32::MAX", @@ -90,12 +97,7 @@ pub fn decompress( .map_err(|e| Error::Compression(format!("LZ4 decompression failed: {}", e))) } CompressionType::Zstd(_) => { - let capacity = expected_size.unwrap_or_else(|| { - compressed - .len() - .saturating_mul(10) - .min(DEFAULT_MAX_DECOMPRESSED_SIZE) - }); + let capacity = expected_size.unwrap_or_else(|| auto_max_size(compressed.len(), 10)); zstd::bulk::decompress(compressed, capacity) .map_err(|e| Error::Compression(format!("Zstd decompression failed: {}", e))) } @@ -142,9 +144,8 @@ mod tests { } #[test] - fn test_decompress_without_expected_size_does_not_panic() { - // Compressed inputs with no caller-supplied size hint must not panic - // on the internal heuristic, regardless of compressed length. + fn test_decompress_without_expected_size_roundtrips_small_input() { + // Sanity: small payloads with no caller-supplied size hint round-trip. for compression in &[CompressionType::Lz4, CompressionType::Zstd(3)] { let original = b"small payload"; let compressed = compress(original, *compression).unwrap(); @@ -153,16 +154,36 @@ mod tests { } } + #[test] + fn test_auto_max_size_caps_at_default_max() { + // The heuristic must saturate (no overflow panic) and clamp to the + // 1 GiB cap. Without the cap, len*multiplier is unbounded by usize::MAX. + assert_eq!(auto_max_size(1024, 100), 102_400); + assert_eq!(auto_max_size(usize::MAX, 100), DEFAULT_MAX_DECOMPRESSED_SIZE); + // Boundary: 11 MiB compressed × 100 = 1.1 GiB > cap. + let just_above = (DEFAULT_MAX_DECOMPRESSED_SIZE / 100) + 1; + assert_eq!(auto_max_size(just_above, 100), DEFAULT_MAX_DECOMPRESSED_SIZE); + } + #[test] fn test_decompress_lz4_rejects_size_hint_exceeding_i32() { - // The lz4 crate takes a signed i32 buffer-size hint; if the caller - // passes a usize larger than i32::MAX, decompress must error cleanly - // instead of silently wrapping. + // The lz4 crate takes a signed i32 buffer-size hint. A usize hint + // larger than i32::MAX must surface a specific Compression error + // ("exceeds i32::MAX") instead of silently wrapping. let original = b"hello"; let compressed = compress(original, CompressionType::Lz4).unwrap(); let bad_hint = (i32::MAX as usize) + 1; - let err = decompress(&compressed, CompressionType::Lz4, Some(bad_hint)); - assert!(err.is_err()); + let result = decompress(&compressed, CompressionType::Lz4, Some(bad_hint)); + match result { + Err(Error::Compression(msg)) => { + assert!( + msg.contains("exceeds i32::MAX"), + "unexpected error message: {}", + msg + ); + } + other => panic!("expected Compression error, got {:?}", other), + } } #[test] From 55a1cddb841c90ecdc0faa92f15048b6830b06d5 Mon Sep 17 00:00:00 2001 From: dts Date: Thu, 30 Apr 2026 21:33:27 +0200 Subject: [PATCH 3/3] style: cargo fmt for long assert_eq! lines in cap test The new test_auto_max_size_caps_at_default_max test had two assert_eq! calls that exceeded max_width=100 once DEFAULT_MAX_DECOMPRESSED_SIZE was inlined. rustfmt's canonical form splits them across multiple lines. Caught proactively by mirroring the cargo fmt --all --check pass that PR #12 needed for its CI. --- atompack/src/compression.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/atompack/src/compression.rs b/atompack/src/compression.rs index f768804..3293e31 100644 --- a/atompack/src/compression.rs +++ b/atompack/src/compression.rs @@ -159,10 +159,16 @@ mod tests { // The heuristic must saturate (no overflow panic) and clamp to the // 1 GiB cap. Without the cap, len*multiplier is unbounded by usize::MAX. assert_eq!(auto_max_size(1024, 100), 102_400); - assert_eq!(auto_max_size(usize::MAX, 100), DEFAULT_MAX_DECOMPRESSED_SIZE); + assert_eq!( + auto_max_size(usize::MAX, 100), + DEFAULT_MAX_DECOMPRESSED_SIZE + ); // Boundary: 11 MiB compressed × 100 = 1.1 GiB > cap. let just_above = (DEFAULT_MAX_DECOMPRESSED_SIZE / 100) + 1; - assert_eq!(auto_max_size(just_above, 100), DEFAULT_MAX_DECOMPRESSED_SIZE); + assert_eq!( + auto_max_size(just_above, 100), + DEFAULT_MAX_DECOMPRESSED_SIZE + ); } #[test]