From 200284c29e069b87cfce3774d0716fcf903beae4 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Tue, 21 Apr 2026 22:44:42 +0800 Subject: [PATCH 01/47] feat: simple compute delta --- gix-pack/src/data/delta.rs | 50 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index f5b7dfb05e..cad24ccd59 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -85,3 +85,53 @@ pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<( Ok(()) } + +enum Instruction { + Copy { offset: usize, size: usize }, + Add { data: Vec }, +} + +fn compute_delta(source: &[u8], target: &[u8]) -> Vec { + let mut common_prefix_len = 0; + for (s, t) in source.iter().zip(target) { + if s == t { + common_prefix_len += 1; + } else { + break; + } + } + vec![ + Instruction::Copy { + offset: 0, + size: common_prefix_len, + }, + Instruction::Add { + data: target[common_prefix_len..].into(), + }, + ] +} + +#[cfg(test)] +mod tests { + use super::*; + + fn apply_delta(source: &[u8], delta: Vec) -> Vec { + let mut buf = Vec::new(); + for inst in delta { + match inst { + Instruction::Add { data } => buf.extend_from_slice(&data), + Instruction::Copy { offset, size } => buf.extend_from_slice(&source[offset..offset + size]), + } + } + buf + } + + #[test] + fn make_it_right() { + let source = "hello, world".as_bytes(); + let target = "hello, gitoxide".as_bytes(); + let delta = compute_delta(source, target); + let restored = apply_delta(source, delta); + assert_eq!(target, restored); + } +} From 503ed418326a070585df96f37b773578b32e0456 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Tue, 21 Apr 2026 23:06:12 +0800 Subject: [PATCH 02/47] feat: encode_size --- gix-pack/src/data/delta.rs | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index cad24ccd59..4bbf97069f 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -30,6 +30,22 @@ pub(crate) fn decode_header_size(d: &[u8]) -> (u64, usize) { (size, consumed) } +fn encode_size(mut n: u64) -> Vec { + let mut buf = Vec::with_capacity(8); + loop { + let mut byte = (n & 0x7F) as u8; + n >>= 7; + if n != 0 { + byte |= 0x80; + buf.push(byte); + } else { + buf.push(byte); + break; + } + } + buf +} + pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<(), apply::Error> { let mut i = 0; while let Some(cmd) = data.get(i) { @@ -91,6 +107,16 @@ enum Instruction { Add { data: Vec }, } +impl Instruction { + pub fn encode(&self) -> Vec { + match self { + Self::Copy { offset, size } => todo!(), + Self::Add { data } => todo!(), + } + todo!() + } +} + fn compute_delta(source: &[u8], target: &[u8]) -> Vec { let mut common_prefix_len = 0; for (s, t) in source.iter().zip(target) { @@ -115,6 +141,16 @@ fn compute_delta(source: &[u8], target: &[u8]) -> Vec { mod tests { use super::*; + #[test] + fn encode_size_works() { + let cases: Vec = vec![0x00, 0x01, 0x7f, 0xff, 0x7777, 1795265022, 3_825_123_056_546_413_051]; + for n in cases { + let encoded = encode_size(n); + let (restored_n, _) = decode_header_size(&encoded); + assert_eq!(n, restored_n); + } + } + fn apply_delta(source: &[u8], delta: Vec) -> Vec { let mut buf = Vec::new(); for inst in delta { From 3839de374c4203447fd39535918fbc500a04cf41 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Wed, 22 Apr 2026 00:08:38 +0800 Subject: [PATCH 03/47] feat: encode instruction --- gix-pack/src/data/delta.rs | 98 +++++++++++++++++++++++++++++++++----- 1 file changed, 86 insertions(+), 12 deletions(-) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 4bbf97069f..83464f3993 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -1,3 +1,5 @@ +use std::io::Write; + /// pub mod apply { /// Returned when failing to apply deltas. @@ -13,6 +15,21 @@ pub mod apply { } } +/// +pub mod encode { + /// Returned when failing to encode deltas. + #[derive(thiserror::Error, Debug)] + #[allow(missing_docs)] + pub enum Error { + #[error("Failed to write bytes")] + IOError, + #[error("Too large size in Copy instruction, should <= 0x00ffffff")] + TooLargeSize, + #[error("Too large data in Add instruction, length should <= 127")] + TooLargeData, + } +} + /// Given the decompressed pack delta `d`, decode a size in bytes (either the base object size or the result object size) /// Equivalent to [this canonical git function](https://github.com/git/git/blob/311531c9de557d25ac087c1637818bd2aad6eb3a/delta.h#L89) pub(crate) fn decode_header_size(d: &[u8]) -> (u64, usize) { @@ -49,8 +66,10 @@ fn encode_size(mut n: u64) -> Vec { pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<(), apply::Error> { let mut i = 0; while let Some(cmd) = data.get(i) { + eprintln!("index: {i}, cmd: {cmd}"); i += 1; match cmd { + // Copy cmd if cmd & 0b1000_0000 != 0 => { let (mut ofs, mut size): (u32, u32) = (0, 0); if cmd & 0b0000_0001 != 0 { @@ -85,12 +104,14 @@ pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<( size = 0x10000; // 65536 } let ofs = ofs as usize; - std::io::Write::write(&mut target, &base[ofs..ofs + size as usize]) + Write::write(&mut target, &base[ofs..ofs + size as usize]) .map_err(|_e| apply::Error::DeltaCopyBaseSliceMismatch)?; } + // Reserved 0 => return Err(apply::Error::UnsupportedCommandCode), + // Add size => { - std::io::Write::write(&mut target, &data[i..i + *size as usize]) + Write::write(&mut target, &data[i..i + *size as usize]) .map_err(|_e| apply::Error::DeltaCopyDataSliceMismatch)?; i += *size as usize; } @@ -102,23 +123,63 @@ pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<( Ok(()) } +#[derive(Debug)] enum Instruction { - Copy { offset: usize, size: usize }, + Copy { offset: u32, size: u32 }, Add { data: Vec }, } impl Instruction { - pub fn encode(&self) -> Vec { + pub fn encode(self, mut writer: impl Write) -> Result<(), encode::Error> { match self { - Self::Copy { offset, size } => todo!(), - Self::Add { data } => todo!(), + Self::Copy { offset, mut size } => { + let mut header = 0x80u8; + let mut buf = [0u8; 7]; + let mut n = 0; + + if size == 0x10000 { + size = 0; + } else if size > 0x00ffffff { + return Err(encode::Error::TooLargeSize); + } + + for i in 0..4 { + let byte = (offset >> (i * 8)) as u8; + if byte != 0 { + header |= 1 << i; + buf[n] = byte; + n += 1; + } + } + for i in 0..3 { + let byte = (size >> (i * 8)) as u8; + if byte != 0 { + header |= 1 << (4 + i); + buf[n] = byte; + n += 1; + } + } + + writer.write_all(&[header]).map_err(|_| encode::Error::IOError)?; + writer.write_all(&buf[..n]).map_err(|_| encode::Error::IOError)?; + Ok(()) + } + Self::Add { data } => { + if data.len() > 127 { + return Err(encode::Error::TooLargeData); + } + + let header = data.len() as u8; + writer.write(&[header]).map_err(|_| encode::Error::IOError)?; + writer.write(data.as_slice()).map_err(|_| encode::Error::IOError)?; + Ok(()) + } } - todo!() } } fn compute_delta(source: &[u8], target: &[u8]) -> Vec { - let mut common_prefix_len = 0; + let mut common_prefix_len: usize = 0; for (s, t) in source.iter().zip(target) { if s == t { common_prefix_len += 1; @@ -129,7 +190,7 @@ fn compute_delta(source: &[u8], target: &[u8]) -> Vec { vec![ Instruction::Copy { offset: 0, - size: common_prefix_len, + size: common_prefix_len as u32, }, Instruction::Add { data: target[common_prefix_len..].into(), @@ -151,12 +212,14 @@ mod tests { } } - fn apply_delta(source: &[u8], delta: Vec) -> Vec { + fn apply_delta(source: &[u8], delta: &Vec) -> Vec { let mut buf = Vec::new(); for inst in delta { match inst { Instruction::Add { data } => buf.extend_from_slice(&data), - Instruction::Copy { offset, size } => buf.extend_from_slice(&source[offset..offset + size]), + Instruction::Copy { offset, size } => { + buf.extend_from_slice(&source[(*offset as usize)..(*offset as usize + *size as usize)]) + } } } buf @@ -167,7 +230,18 @@ mod tests { let source = "hello, world".as_bytes(); let target = "hello, gitoxide".as_bytes(); let delta = compute_delta(source, target); - let restored = apply_delta(source, delta); + let restored = apply_delta(source, &delta); assert_eq!(target, restored); + + let mut delta_data = Vec::new(); + for inst in delta { + eprintln!("inst: {inst:?}"); + inst.encode(&mut delta_data).unwrap(); + } + + let mut restored_target = vec![0u8; target.len()]; + eprintln!("delta_data: {delta_data:?}"); + apply(source, &mut restored_target, &delta_data).unwrap(); + assert_eq!(target, restored_target); } } From c3c963e022fe42951cc0ebefc5c5f71cb1a6e4f8 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Wed, 22 Apr 2026 06:13:15 +0800 Subject: [PATCH 04/47] refactor: iter_from_counts --- .../src/data/output/entry/iter_from_counts.rs | 248 +++++++++--------- 1 file changed, 125 insertions(+), 123 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 5760bbb22f..2c9b97e7cf 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -97,119 +97,131 @@ pub(crate) mod function { .expect("infallible - we ignore none-existing objects"); progress.lock().show_throughput(start); } - let counts_range_by_pack_id = match mode { + match mode { Mode::PackCopyAndBaseObjects => { - let mut progress = progress.add_child_with_id("sorting".into(), ProgressId::SortEntries.into()); - progress.init(Some(counts.len()), gix_features::progress::count("counts")); - let start = std::time::Instant::now(); + let counts_range_by_pack_id = { + let mut progress = progress.add_child_with_id("sorting".into(), ProgressId::SortEntries.into()); + progress.init(Some(counts.len()), gix_features::progress::count("counts")); + let start = std::time::Instant::now(); - use crate::data::output::count::PackLocation::*; - counts.sort_by(|lhs, rhs| match (&lhs.entry_pack_location, &rhs.entry_pack_location) { - (LookedUp(None), LookedUp(None)) => Ordering::Equal, - (LookedUp(Some(_)), LookedUp(None)) => Ordering::Greater, - (LookedUp(None), LookedUp(Some(_))) => Ordering::Less, - (LookedUp(Some(lhs)), LookedUp(Some(rhs))) => lhs - .pack_id - .cmp(&rhs.pack_id) - .then(lhs.pack_offset.cmp(&rhs.pack_offset)), - (_, _) => unreachable!("counts were resolved beforehand"), - }); - - let mut index: Vec<(u32, std::ops::Range)> = Vec::new(); - let mut chunks_pack_start = counts.partition_point(|e| e.entry_pack_location.is_none()); - let mut slice = &counts[chunks_pack_start..]; - while !slice.is_empty() { - let current_pack_id = slice[0].entry_pack_location.as_ref().expect("packed object").pack_id; - let pack_end = slice.partition_point(|e| { - e.entry_pack_location.as_ref().expect("packed object").pack_id == current_pack_id + use crate::data::output::count::PackLocation::*; + counts.sort_by(|lhs, rhs| match (&lhs.entry_pack_location, &rhs.entry_pack_location) { + (LookedUp(None), LookedUp(None)) => Ordering::Equal, + (LookedUp(Some(_)), LookedUp(None)) => Ordering::Greater, + (LookedUp(None), LookedUp(Some(_))) => Ordering::Less, + (LookedUp(Some(lhs)), LookedUp(Some(rhs))) => lhs + .pack_id + .cmp(&rhs.pack_id) + .then(lhs.pack_offset.cmp(&rhs.pack_offset)), + (_, _) => unreachable!("counts were resolved beforehand"), }); - index.push((current_pack_id, chunks_pack_start..chunks_pack_start + pack_end)); - slice = &slice[pack_end..]; - chunks_pack_start += pack_end; - } - progress.set(counts.len()); - progress.show_throughput(start); + let mut index: Vec<(u32, std::ops::Range)> = Vec::new(); + let mut chunks_pack_start = counts.partition_point(|e| e.entry_pack_location.is_none()); + let mut slice = &counts[chunks_pack_start..]; + while !slice.is_empty() { + let current_pack_id = slice[0].entry_pack_location.as_ref().expect("packed object").pack_id; + let pack_end = slice.partition_point(|e| { + e.entry_pack_location.as_ref().expect("packed object").pack_id == current_pack_id + }); + index.push((current_pack_id, chunks_pack_start..chunks_pack_start + pack_end)); + slice = &slice[pack_end..]; + chunks_pack_start += pack_end; + } - index - } - }; + progress.set(counts.len()); + progress.show_throughput(start); - let counts = Arc::new(counts); - let progress = Arc::new(parking_lot::Mutex::new(progress)); - let chunks = util::ChunkRanges::new(chunk_size, counts.len()); + index + }; + let counts = Arc::new(counts); + let progress = Arc::new(parking_lot::Mutex::new(progress)); + let chunks = util::ChunkRanges::new(chunk_size, counts.len()); - parallel::reduce::Stepwise::new( - chunks.enumerate(), - thread_limit, - { - let progress = Arc::clone(&progress); - move |n| { - ( - Vec::new(), // object data buffer - progress - .lock() - .add_child_with_id(format!("thread {n}"), gix_features::progress::UNKNOWN), - ) - } - }, - { - let counts = Arc::clone(&counts); - move |(chunk_id, chunk_range): (SequenceId, std::ops::Range), (buf, progress)| { - let mut out = Vec::new(); - let chunk = &counts[chunk_range]; - let mut stats = Outcome::default(); - let mut pack_offsets_to_id = None; - progress.init(Some(chunk.len()), gix_features::progress::count("objects")); + parallel::reduce::Stepwise::new( + chunks.enumerate(), + thread_limit, + { + let progress = Arc::clone(&progress); + move |n| { + ( + Vec::new(), // object data buffer + progress + .lock() + .add_child_with_id(format!("thread {n}"), gix_features::progress::UNKNOWN), + ) + } + }, + { + let counts = Arc::clone(&counts); + move |(chunk_id, chunk_range): (SequenceId, std::ops::Range), (buf, progress)| { + let mut out = Vec::new(); + let chunk = &counts[chunk_range]; + let mut stats = Outcome::default(); + let mut pack_offsets_to_id = None; + progress.init(Some(chunk.len()), gix_features::progress::count("objects")); - for count in chunk.iter() { - out.push(match count - .entry_pack_location - .as_ref() - .and_then(|l| db.entry_by_location(l).map(|pe| (l, pe))) - { - Some((location, pack_entry)) => { - if let Some((cached_pack_id, _)) = &pack_offsets_to_id { - if *cached_pack_id != location.pack_id { - pack_offsets_to_id = None; - } - } - let pack_range = counts_range_by_pack_id[counts_range_by_pack_id - .binary_search_by_key(&location.pack_id, |e| e.0) - .expect("pack-id always present")] - .1 - .clone(); - let base_index_offset = pack_range.start; - let counts_in_pack = &counts[pack_range]; - let entry = output::Entry::from_pack_entry( - pack_entry, - count, - counts_in_pack, - base_index_offset, - allow_thin_pack.then_some({ - |pack_id, base_offset| { - let (cached_pack_id, cache) = pack_offsets_to_id.get_or_insert_with(|| { - db.pack_offsets_and_oid(pack_id) - .map(|mut v| { - v.sort_by_key(|e| e.0); - (pack_id, v) - }) - .expect("pack used for counts is still available") - }); - debug_assert_eq!(*cached_pack_id, pack_id); - stats.ref_delta_objects += 1; - cache - .binary_search_by_key(&base_offset, |e| e.0) - .ok() - .map(|idx| cache[idx].1) + for count in chunk.iter() { + out.push(match count + .entry_pack_location + .as_ref() + .and_then(|l| db.entry_by_location(l).map(|pe| (l, pe))) + { + Some((location, pack_entry)) => { + if let Some((cached_pack_id, _)) = &pack_offsets_to_id { + if *cached_pack_id != location.pack_id { + pack_offsets_to_id = None; + } + } + let pack_range = counts_range_by_pack_id[counts_range_by_pack_id + .binary_search_by_key(&location.pack_id, |e| e.0) + .expect("pack-id always present")] + .1 + .clone(); + let base_index_offset = pack_range.start; + let counts_in_pack = &counts[pack_range]; + let entry = output::Entry::from_pack_entry( + pack_entry, + count, + counts_in_pack, + base_index_offset, + allow_thin_pack.then_some({ + |pack_id, base_offset| { + let (cached_pack_id, cache) = pack_offsets_to_id + .get_or_insert_with(|| { + db.pack_offsets_and_oid(pack_id) + .map(|mut v| { + v.sort_by_key(|e| e.0); + (pack_id, v) + }) + .expect("pack used for counts is still available") + }); + debug_assert_eq!(*cached_pack_id, pack_id); + stats.ref_delta_objects += 1; + cache + .binary_search_by_key(&base_offset, |e| e.0) + .ok() + .map(|idx| cache[idx].1) + } + }), + version, + ); + match entry { + Some(entry) => { + stats.objects_copied_from_pack += 1; + entry + } + None => match db.try_find(&count.id, buf).map_err(Error::Find)? { + Some((obj, _location)) => { + stats.decoded_and_recompressed_objects += 1; + output::Entry::from_data(count, &obj) + } + None => { + stats.missing_objects += 1; + Ok(output::Entry::invalid()) + } + }, } - }), - version, - ); - match entry { - Some(entry) => { - stats.objects_copied_from_pack += 1; - entry } None => match db.try_find(&count.id, buf).map_err(Error::Find)? { Some((obj, _location)) => { @@ -221,26 +233,16 @@ pub(crate) mod function { Ok(output::Entry::invalid()) } }, - } + }?); + progress.inc(); } - None => match db.try_find(&count.id, buf).map_err(Error::Find)? { - Some((obj, _location)) => { - stats.decoded_and_recompressed_objects += 1; - output::Entry::from_data(count, &obj) - } - None => { - stats.missing_objects += 1; - Ok(output::Entry::invalid()) - } - }, - }?); - progress.inc(); - } - Ok((chunk_id, out, stats)) - } - }, - reduce::Statistics::default(), - ) + Ok((chunk_id, out, stats)) + } + }, + reduce::Statistics::default(), + ) + } + } } } From 8e498627e8aa3c220f5643d03dad17dea87846e8 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Wed, 22 Apr 2026 06:21:02 +0800 Subject: [PATCH 05/47] fix: remove dead derive for Options and Mode --- gix-pack/src/data/output/entry/iter_from_counts.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 2c9b97e7cf..f9a32d1f4a 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -357,7 +357,7 @@ mod types { } /// The way the iterator operates. - #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] + #[derive(Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum Mode { /// Copy base objects and deltas from packs, while non-packed objects will be treated as base objects @@ -368,7 +368,7 @@ mod types { } /// Configuration options for the pack generation functions provided in [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()]. - #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] + #[derive(Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Options { /// The amount of threads to use at most when resolving the pack. If `None`, all logical cores are used. From 88e611d34feaa3859419c5e060706518adbc733f Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Wed, 22 Apr 2026 06:34:28 +0800 Subject: [PATCH 06/47] feat: Mode::CustomedDeltaTopo --- Cargo.lock | 1 + gix-hashtable/Cargo.toml | 1 + gix-hashtable/src/lib.rs | 66 +++++++++++++++++++ .../src/data/output/entry/iter_from_counts.rs | 8 +++ 4 files changed, 76 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 749068e376..48d309ffc2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1900,6 +1900,7 @@ dependencies = [ "gix-hash", "hashbrown 0.16.1", "parking_lot", + "serde", ] [[package]] diff --git a/gix-hashtable/Cargo.toml b/gix-hashtable/Cargo.toml index f8ba8df217..fc910f5758 100644 --- a/gix-hashtable/Cargo.toml +++ b/gix-hashtable/Cargo.toml @@ -22,6 +22,7 @@ sha1 = ["gix-hash/sha1"] parking_lot = "0.12.4" hashbrown = { version = "0.16.0", default-features = false, features = ["inline-more"] } gix-hash = { version = "^0.23.0", path = "../gix-hash" } +serde = "1.0.228" [dev-dependencies] gix-hash = { path = "../gix-hash", features = ["sha1"] } diff --git a/gix-hashtable/src/lib.rs b/gix-hashtable/src/lib.rs index 592729ea68..ee801743d7 100644 --- a/gix-hashtable/src/lib.rs +++ b/gix-hashtable/src/lib.rs @@ -26,6 +26,7 @@ pub use hashbrown::{hash_map, hash_set, hash_table, Equivalent}; /// thread-safe types pub mod sync { /// A map for associating data with object ids in a thread-safe fashion. It should scale well up to 256 threads. + #[derive(Debug)] pub struct ObjectIdMap { /// Sharing is done by the first byte of the incoming object id. shards: [parking_lot::Mutex>; 256], @@ -47,6 +48,71 @@ pub mod sync { self.shards[key.as_slice()[0] as usize].lock().insert(key, value) } } + + impl serde::Serialize for ObjectIdMap + where + V: serde::Serialize, + { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeMap as _; + + let total_len: usize = self.shards.iter().map(|shard| shard.lock().len()).sum(); + + let mut map = serializer.serialize_map(Some(total_len))?; + for shard_mutex in &self.shards { + let shard = shard_mutex.lock(); + for (key, value) in shard.iter() { + map.serialize_entry(key, value)?; + } + } + map.end() + } + } + + impl<'de, V> serde::Deserialize<'de> for ObjectIdMap + where + V: serde::Deserialize<'de>, + { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + use serde::de::{Deserialize, MapAccess, Visitor}; + use std::fmt; + use std::marker::PhantomData; + + struct ObjectIdMapVisitor { + _marker: PhantomData V>, + } + + impl<'de, V> Visitor<'de> for ObjectIdMapVisitor + where + V: Deserialize<'de>, + { + type Value = ObjectIdMap; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_str("a map of ObjectId to V") + } + + fn visit_map(self, mut access: M) -> Result + where + M: MapAccess<'de>, + { + let map = ObjectIdMap::default(); + while let Some((key, value)) = access.next_entry::()? { + map.insert(key, value); + } + Ok(map) + } + } + + deserializer.deserialize_map(ObjectIdMapVisitor { _marker: PhantomData }) + } + } } /// diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index f9a32d1f4a..8c5cf0ce29 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -242,6 +242,7 @@ pub(crate) mod function { reduce::Statistics::default(), ) } + Mode::CustomedDeltaTopo(topo) => todo!(), } } } @@ -322,6 +323,9 @@ mod reduce { } mod types { + use gix_hash::ObjectId; + use gix_hashtable::sync::ObjectIdMap; + use crate::data::output::entry; /// Information gathered during the run of [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()]. @@ -365,6 +369,10 @@ mod types { /// from existing pack compression and spending the smallest possible time on compressing unpacked objects at /// the cost of bandwidth. PackCopyAndBaseObjects, + /// Determine whether an object is a base or a delta based on topological relationships. + /// `Option::is_none` signifies a base object, while `Option::is_some(src)` signifies a delta. + /// If the required delta does not exist, it will be computed. + CustomedDeltaTopo(ObjectIdMap>), } /// Configuration options for the pack generation functions provided in [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()]. From 4f9416591930b59a8b4415d1ea2e67de8586bbfa Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Wed, 22 Apr 2026 06:37:31 +0800 Subject: [PATCH 07/47] fix typo --- gix-pack/src/data/output/entry/iter_from_counts.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 8c5cf0ce29..04b71e59ae 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -242,7 +242,7 @@ pub(crate) mod function { reduce::Statistics::default(), ) } - Mode::CustomedDeltaTopo(topo) => todo!(), + Mode::CustomizedDeltaTopo(topo) => todo!(), } } } @@ -372,7 +372,7 @@ mod types { /// Determine whether an object is a base or a delta based on topological relationships. /// `Option::is_none` signifies a base object, while `Option::is_some(src)` signifies a delta. /// If the required delta does not exist, it will be computed. - CustomedDeltaTopo(ObjectIdMap>), + CustomizedDeltaTopo(ObjectIdMap>), } /// Configuration options for the pack generation functions provided in [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()]. From 63f21c86e85cb1661d32aa7f2cc089c7cac2f80d Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Wed, 22 Apr 2026 07:24:37 +0800 Subject: [PATCH 08/47] refactor: add help for iter_from_counts --- .../src/data/output/entry/iter_from_counts.rs | 149 ++++++++++-------- 1 file changed, 80 insertions(+), 69 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 04b71e59ae..b71ea9bd4a 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -99,44 +99,10 @@ pub(crate) mod function { } match mode { Mode::PackCopyAndBaseObjects => { - let counts_range_by_pack_id = { - let mut progress = progress.add_child_with_id("sorting".into(), ProgressId::SortEntries.into()); - progress.init(Some(counts.len()), gix_features::progress::count("counts")); - let start = std::time::Instant::now(); - - use crate::data::output::count::PackLocation::*; - counts.sort_by(|lhs, rhs| match (&lhs.entry_pack_location, &rhs.entry_pack_location) { - (LookedUp(None), LookedUp(None)) => Ordering::Equal, - (LookedUp(Some(_)), LookedUp(None)) => Ordering::Greater, - (LookedUp(None), LookedUp(Some(_))) => Ordering::Less, - (LookedUp(Some(lhs)), LookedUp(Some(rhs))) => lhs - .pack_id - .cmp(&rhs.pack_id) - .then(lhs.pack_offset.cmp(&rhs.pack_offset)), - (_, _) => unreachable!("counts were resolved beforehand"), - }); - - let mut index: Vec<(u32, std::ops::Range)> = Vec::new(); - let mut chunks_pack_start = counts.partition_point(|e| e.entry_pack_location.is_none()); - let mut slice = &counts[chunks_pack_start..]; - while !slice.is_empty() { - let current_pack_id = slice[0].entry_pack_location.as_ref().expect("packed object").pack_id; - let pack_end = slice.partition_point(|e| { - e.entry_pack_location.as_ref().expect("packed object").pack_id == current_pack_id - }); - index.push((current_pack_id, chunks_pack_start..chunks_pack_start + pack_end)); - slice = &slice[pack_end..]; - chunks_pack_start += pack_end; - } - - progress.set(counts.len()); - progress.show_throughput(start); - - index - }; - let counts = Arc::new(counts); + let counts_range_by_pack_id = rearrange_counts_by_pack_id(&mut counts, &mut progress); + let sorted_counts = Arc::new(counts); let progress = Arc::new(parking_lot::Mutex::new(progress)); - let chunks = util::ChunkRanges::new(chunk_size, counts.len()); + let chunks = util::ChunkRanges::new(chunk_size, sorted_counts.len()); parallel::reduce::Stepwise::new( chunks.enumerate(), @@ -153,12 +119,12 @@ pub(crate) mod function { } }, { - let counts = Arc::clone(&counts); + let sorted_counts = Arc::clone(&sorted_counts); move |(chunk_id, chunk_range): (SequenceId, std::ops::Range), (buf, progress)| { let mut out = Vec::new(); - let chunk = &counts[chunk_range]; + let chunk = &sorted_counts[chunk_range]; let mut stats = Outcome::default(); - let mut pack_offsets_to_id = None; + let mut latest_pack_mapping = None; progress.init(Some(chunk.len()), gix_features::progress::count("objects")); for count in chunk.iter() { @@ -167,27 +133,33 @@ pub(crate) mod function { .as_ref() .and_then(|l| db.entry_by_location(l).map(|pe| (l, pe))) { + // Existing in a pack Some((location, pack_entry)) => { - if let Some((cached_pack_id, _)) = &pack_offsets_to_id { + // Unset latest_pack_offsets_to_id if outside the pack range + if let Some((cached_pack_id, _)) = &latest_pack_mapping { if *cached_pack_id != location.pack_id { - pack_offsets_to_id = None; + latest_pack_mapping = None; } } - let pack_range = counts_range_by_pack_id[counts_range_by_pack_id - .binary_search_by_key(&location.pack_id, |e| e.0) - .expect("pack-id always present")] - .1 - .clone(); - let base_index_offset = pack_range.start; - let counts_in_pack = &counts[pack_range]; - let entry = output::Entry::from_pack_entry( + + // Params for pack finding + let (base_index_offset, counts_in_pack) = { + let index = counts_range_by_pack_id + .binary_search_by_key(&location.pack_id, |e| e.0) + .expect("pack-id always present"); + let pack_range = counts_range_by_pack_id[index].1.clone(); + (pack_range.start, &sorted_counts[pack_range]) + }; + + // First try to find existing entry in existing packs + if let Some(entry) = output::Entry::from_pack_entry( pack_entry, count, counts_in_pack, base_index_offset, allow_thin_pack.then_some({ |pack_id, base_offset| { - let (cached_pack_id, cache) = pack_offsets_to_id + let (cached_pack_id, offsets_oid_mapping) = latest_pack_mapping .get_or_insert_with(|| { db.pack_offsets_and_oid(pack_id) .map(|mut v| { @@ -197,32 +169,33 @@ pub(crate) mod function { .expect("pack used for counts is still available") }); debug_assert_eq!(*cached_pack_id, pack_id); + stats.ref_delta_objects += 1; - cache + offsets_oid_mapping .binary_search_by_key(&base_offset, |e| e.0) .ok() - .map(|idx| cache[idx].1) + .map(|idx| offsets_oid_mapping[idx].1) } }), version, - ); - match entry { - Some(entry) => { - stats.objects_copied_from_pack += 1; - entry - } - None => match db.try_find(&count.id, buf).map_err(Error::Find)? { - Some((obj, _location)) => { - stats.decoded_and_recompressed_objects += 1; - output::Entry::from_data(count, &obj) - } - None => { - stats.missing_objects += 1; - Ok(output::Entry::invalid()) - } - }, + ) { + stats.objects_copied_from_pack += 1; + entry + } + // Fallback to find in Object Database + else if let Some((obj, _location)) = + db.try_find(&count.id, buf).map_err(Error::Find)? + { + stats.decoded_and_recompressed_objects += 1; + output::Entry::from_data(count, &obj) + } + // If both missing, return Entry::invalid + else { + stats.missing_objects += 1; + Ok(output::Entry::invalid()) } } + // Existing as a loose object None => match db.try_find(&count.id, buf).map_err(Error::Find)? { Some((obj, _location)) => { stats.decoded_and_recompressed_objects += 1; @@ -245,6 +218,44 @@ pub(crate) mod function { Mode::CustomizedDeltaTopo(topo) => todo!(), } } + + fn rearrange_counts_by_pack_id( + counts: &mut Vec, + progress: &mut Box, + ) -> Vec<(u32, std::ops::Range)> { + let mut progress = progress.add_child_with_id("sorting".into(), ProgressId::SortEntries.into()); + progress.init(Some(counts.len()), gix_features::progress::count("counts")); + let start = std::time::Instant::now(); + + use crate::data::output::count::PackLocation::*; + counts.sort_by(|lhs, rhs| match (&lhs.entry_pack_location, &rhs.entry_pack_location) { + (LookedUp(None), LookedUp(None)) => Ordering::Equal, + (LookedUp(Some(_)), LookedUp(None)) => Ordering::Greater, + (LookedUp(None), LookedUp(Some(_))) => Ordering::Less, + (LookedUp(Some(lhs)), LookedUp(Some(rhs))) => lhs + .pack_id + .cmp(&rhs.pack_id) + .then(lhs.pack_offset.cmp(&rhs.pack_offset)), + (_, _) => unreachable!("counts were resolved beforehand"), + }); + + let mut index: Vec<(u32, std::ops::Range)> = Vec::new(); + let mut chunks_pack_start = counts.partition_point(|e| e.entry_pack_location.is_none()); + let mut slice = &counts[chunks_pack_start..]; + while !slice.is_empty() { + let current_pack_id = slice[0].entry_pack_location.as_ref().expect("packed object").pack_id; + let pack_end = slice + .partition_point(|e| e.entry_pack_location.as_ref().expect("packed object").pack_id == current_pack_id); + index.push((current_pack_id, chunks_pack_start..chunks_pack_start + pack_end)); + slice = &slice[pack_end..]; + chunks_pack_start += pack_end; + } + + progress.set(counts.len()); + progress.show_throughput(start); + + index + } } mod util { From 9e5a6bb8260ddc5f202585b79ca44d05abf6bd50 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Wed, 22 Apr 2026 08:57:52 +0800 Subject: [PATCH 09/47] wip: feat: CustomizedDeltaTopo --- .../src/data/output/entry/iter_from_counts.rs | 170 +++++++++++++++++- 1 file changed, 167 insertions(+), 3 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index b71ea9bd4a..4a3ab14330 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -9,6 +9,7 @@ pub(crate) mod function { Progress, }, }; + use gix_hash::ObjectId; use super::{reduce, util, Error, Mode, Options, Outcome, ProgressId}; use crate::data::output; @@ -183,6 +184,7 @@ pub(crate) mod function { entry } // Fallback to find in Object Database + // TODO: useless decompress then compress here else if let Some((obj, _location)) = db.try_find(&count.id, buf).map_err(Error::Find)? { @@ -215,7 +217,168 @@ pub(crate) mod function { reduce::Statistics::default(), ) } - Mode::CustomizedDeltaTopo(topo) => todo!(), + Mode::CustomizedDeltaTopo(topo) => { + let counts_range_by_pack_id = rearrange_counts_by_pack_id(&mut counts, &mut progress); + let sorted_counts = Arc::new(counts); + let progress = Arc::new(parking_lot::Mutex::new(progress)); + let chunks = util::ChunkRanges::new(chunk_size, sorted_counts.len()); + + parallel::reduce::Stepwise::new( + chunks.enumerate(), + thread_limit, + { + let progress = Arc::clone(&progress); + move |n| { + ( + Vec::new(), // object data buffer + progress + .lock() + .add_child_with_id(format!("thread {n}"), gix_features::progress::UNKNOWN), + ) + } + }, + { + let sorted_counts = Arc::clone(&sorted_counts); + move |(chunk_id, chunk_range): (SequenceId, std::ops::Range), (buf, progress)| { + enum ToDo { + /// For packed objects + Dedelta { target: ObjectId, source: ObjectId }, + /// For loose objects + Decompress(ObjectId), + /// For reuse delta + ReuseDelta(Entry), + } + let mut out = Vec::new(); + let mut to_delta_buf = Vec::new(); + let chunk = &sorted_counts[chunk_range]; + let mut stats = Outcome::default(); + let mut latest_pack_mapping = None; + progress.init(Some(chunk.len()), gix_features::progress::count("objects")); + + for count in chunk.iter() { + let delta_target = count.id; + let delta_source = topo.get(&delta_target); + + out.push(match count + .entry_pack_location + .as_ref() + .and_then(|l| db.entry_by_location(l).map(|pe| (l, pe))) + { + // Existing in a pack + Some((location, pack_entry)) => { + // Unset latest_pack_offsets_to_id if outside the pack range + if let Some((cached_pack_id, _)) = &latest_pack_mapping { + if *cached_pack_id != location.pack_id { + latest_pack_mapping = None; + } + } + + // Params for pack finding + let (base_index_offset, counts_in_pack) = { + let index = counts_range_by_pack_id + .binary_search_by_key(&location.pack_id, |e| e.0) + .expect("pack-id always present"); + let pack_range = counts_range_by_pack_id[index].1.clone(); + (pack_range.start, &sorted_counts[pack_range]) + }; + + // Try to reuse delta + if let Some(entry) = output::Entry::from_pack_entry( + pack_entry, + count, + counts_in_pack, + base_index_offset, + allow_thin_pack.then_some({ + |pack_id, base_offset| { + let (cached_pack_id, offsets_oid_mapping) = latest_pack_mapping + .get_or_insert_with(|| { + db.pack_offsets_and_oid(pack_id) + .map(|mut v| { + v.sort_by_key(|e| e.0); + (pack_id, v) + }) + .expect("pack used for counts is still available") + }); + debug_assert_eq!(*cached_pack_id, pack_id); + + stats.ref_delta_objects += 1; + offsets_oid_mapping + .binary_search_by_key(&base_offset, |e| e.0) + .ok() + .map(|idx| offsets_oid_mapping[idx].1) + } + }), + version, + ) { + stats.objects_copied_from_pack += 1; + entry.map(|entry| { + use super::super::Kind; + + match entry.kind { + Kind::DeltaRef { object_index } => { + let current_delta_source = { + let pack_location = + count.entry_pack_location.as_ref().expect("packed"); + let (_, offsets_oid_mapping) = latest_pack_mapping + .get_or_insert_with(|| { + db.pack_offsets_and_oid(pack_location.pack_id) + .map(|mut v| { + v.sort_by_key(|e| e.0); + (pack_location.pack_id, v) + }) + .expect( + "pack used for counts is still available", + ) + }); + offsets_oid_mapping + .binary_search_by_key(&pack_location.pack_offset, |e| { + e.0 + }) + .ok() + .map(|idx| offsets_oid_mapping[idx].1) + .expect("pack offset is valid") + }; + if let Some(delta_source) = delta_source { + // Reuse delta + if *delta_source == current_delta_source { + stats.objects_copied_from_pack += 1; + ToDo::ReuseDelta(entry) + } else { + ToDo::Dedelta { + target: entry.id, + source: current_delta_source, + } + } + } else { + ToDo::Dedelta { + target: entry.id, + source: current_delta_source, + } + } + } + Kind::Base(kind) => ToDo::Decompress(entry.id), + Kind::DeltaOid { id } => ToDo::Dedelta { + target: entry.id, + source: id, + }, + } + }) + } else { + Ok(ToDo::Decompress(count.id)) + } + } + // Existing as a loose object + None => Ok(ToDo::Decompress(count.id)), + }?); + progress.inc(); + } + let out = todo!(); + Ok((chunk_id, out, stats)) + } + }, + reduce::Statistics::default(), + ) + } } } @@ -381,9 +544,10 @@ mod types { /// the cost of bandwidth. PackCopyAndBaseObjects, /// Determine whether an object is a base or a delta based on topological relationships. - /// `Option::is_none` signifies a base object, while `Option::is_some(src)` signifies a delta. + /// Key object refers to delta target, value object refers to delta source. + /// Treat objects missing in keys as base objects. /// If the required delta does not exist, it will be computed. - CustomizedDeltaTopo(ObjectIdMap>), + CustomizedDeltaTopo(std::collections::HashMap), } /// Configuration options for the pack generation functions provided in [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()]. From f1db7907d80ff68ace7bc9c42d3723e233c03dcc Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Wed, 22 Apr 2026 23:56:01 +0800 Subject: [PATCH 10/47] fix: remove Serialize & Deserialize for ObjectIdMap --- gix-hashtable/src/lib.rs | 65 ---------------------------------------- 1 file changed, 65 deletions(-) diff --git a/gix-hashtable/src/lib.rs b/gix-hashtable/src/lib.rs index ee801743d7..f24e686fd8 100644 --- a/gix-hashtable/src/lib.rs +++ b/gix-hashtable/src/lib.rs @@ -48,71 +48,6 @@ pub mod sync { self.shards[key.as_slice()[0] as usize].lock().insert(key, value) } } - - impl serde::Serialize for ObjectIdMap - where - V: serde::Serialize, - { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - use serde::ser::SerializeMap as _; - - let total_len: usize = self.shards.iter().map(|shard| shard.lock().len()).sum(); - - let mut map = serializer.serialize_map(Some(total_len))?; - for shard_mutex in &self.shards { - let shard = shard_mutex.lock(); - for (key, value) in shard.iter() { - map.serialize_entry(key, value)?; - } - } - map.end() - } - } - - impl<'de, V> serde::Deserialize<'de> for ObjectIdMap - where - V: serde::Deserialize<'de>, - { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - use serde::de::{Deserialize, MapAccess, Visitor}; - use std::fmt; - use std::marker::PhantomData; - - struct ObjectIdMapVisitor { - _marker: PhantomData V>, - } - - impl<'de, V> Visitor<'de> for ObjectIdMapVisitor - where - V: Deserialize<'de>, - { - type Value = ObjectIdMap; - - fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - formatter.write_str("a map of ObjectId to V") - } - - fn visit_map(self, mut access: M) -> Result - where - M: MapAccess<'de>, - { - let map = ObjectIdMap::default(); - while let Some((key, value)) = access.next_entry::()? { - map.insert(key, value); - } - Ok(map) - } - } - - deserializer.deserialize_map(ObjectIdMapVisitor { _marker: PhantomData }) - } - } } /// From 503d57d358e4561863bbde26743d6afbf3797348 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Thu, 23 Apr 2026 09:12:15 +0800 Subject: [PATCH 11/47] feat: CustomizedDeltaTopo poc --- gix-pack/src/data/delta.rs | 23 +- .../src/data/output/entry/iter_from_counts.rs | 217 +++++------------- gix-pack/src/data/output/entry/mod.rs | 31 ++- 3 files changed, 101 insertions(+), 170 deletions(-) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 83464f3993..8a01c3ed96 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -123,13 +123,25 @@ pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<( Ok(()) } +/// Delta instruction #[derive(Debug)] -enum Instruction { - Copy { offset: u32, size: u32 }, - Add { data: Vec }, +pub enum Instruction { + /// Copy data from source + Copy { + /// Start position to copy + offset: u32, + /// Data length in bytes + size: u32, + }, + /// Add data embedded in instruction + Add { + /// Data to add + data: Vec, // TODO: use borrow here + }, } impl Instruction { + /// Encode instruction to bytes. pub fn encode(self, mut writer: impl Write) -> Result<(), encode::Error> { match self { Self::Copy { offset, mut size } => { @@ -178,7 +190,10 @@ impl Instruction { } } -fn compute_delta(source: &[u8], target: &[u8]) -> Vec { +/// Calcuate delta instructions from `source` to `target`. +pub fn compute_delta(source: &[u8], target: &[u8]) -> Vec { + // TODO: more efficient + // TODO: more configurable let mut common_prefix_len: usize = 0; for (s, t) in source.iter().zip(target) { if s == t { diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 4a3ab14330..954bf62fae 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -12,7 +12,7 @@ pub(crate) mod function { use gix_hash::ObjectId; use super::{reduce, util, Error, Mode, Options, Outcome, ProgressId}; - use crate::data::output; + use crate::{data::output, FindExt}; /// Given a known list of object `counts`, calculate entries ready to be put into a data pack. /// @@ -189,7 +189,7 @@ pub(crate) mod function { db.try_find(&count.id, buf).map_err(Error::Find)? { stats.decoded_and_recompressed_objects += 1; - output::Entry::from_data(count, &obj) + output::Entry::from_base(count, &obj) } // If both missing, return Entry::invalid else { @@ -201,7 +201,7 @@ pub(crate) mod function { None => match db.try_find(&count.id, buf).map_err(Error::Find)? { Some((obj, _location)) => { stats.decoded_and_recompressed_objects += 1; - output::Entry::from_data(count, &obj) + output::Entry::from_base(count, &obj) } None => { stats.missing_objects += 1; @@ -217,167 +217,52 @@ pub(crate) mod function { reduce::Statistics::default(), ) } - Mode::CustomizedDeltaTopo(topo) => { - let counts_range_by_pack_id = rearrange_counts_by_pack_id(&mut counts, &mut progress); - let sorted_counts = Arc::new(counts); - let progress = Arc::new(parking_lot::Mutex::new(progress)); - let chunks = util::ChunkRanges::new(chunk_size, sorted_counts.len()); - - parallel::reduce::Stepwise::new( - chunks.enumerate(), - thread_limit, - { - let progress = Arc::clone(&progress); - move |n| { - ( - Vec::new(), // object data buffer - progress - .lock() - .add_child_with_id(format!("thread {n}"), gix_features::progress::UNKNOWN), - ) - } - }, - { - let sorted_counts = Arc::clone(&sorted_counts); - move |(chunk_id, chunk_range): (SequenceId, std::ops::Range), (buf, progress)| { - enum ToDo { - /// For packed objects - Dedelta { target: ObjectId, source: ObjectId }, - /// For loose objects - Decompress(ObjectId), - /// For reuse delta - ReuseDelta(Entry), + Mode::CustomizedDeltaTopo { topo, cache_capacity } => { + // TODO: parallel, progress + // TODO: reuse delta + if allow_thin_pack { + todo!("support allow_thin_pack"); + } + let mut buffer_a = Vec::new(); + let mut buffer_b = Vec::new(); + let mut buffer_c = Vec::new(); + let mut obj_cache = crate::cache::lru::MemoryCappedHashmap::new(cache_capacity); + let oid_index_mapping = counts + .iter() + .enumerate() + .map(|(index, count)| (count.id, index)) + .collect::>(); + let out = counts + .iter() + .map(|count| { + let oid = count.id; + if let Some(soruce_oid) = topo.get(&oid) { + let (mut target, _) = db + .try_find_cached(&oid, &mut buffer_a, &mut obj_cache) + .unwrap() + .unwrap(); // TODO: replace with map_err + let (source, _) = db + .try_find_cached(&soruce_oid, &mut buffer_b, &mut obj_cache) + .unwrap() + .unwrap(); // TODO: replace with map_err + let delta_insts = crate::data::delta::compute_delta(source.data, target.data); + buffer_c.clear(); + for inst in delta_insts { + inst.encode(&mut buffer_c).unwrap(); // TODO: replace with map_err } - let mut out = Vec::new(); - let mut to_delta_buf = Vec::new(); - let chunk = &sorted_counts[chunk_range]; - let mut stats = Outcome::default(); - let mut latest_pack_mapping = None; - progress.init(Some(chunk.len()), gix_features::progress::count("objects")); - - for count in chunk.iter() { - let delta_target = count.id; - let delta_source = topo.get(&delta_target); - - out.push(match count - .entry_pack_location - .as_ref() - .and_then(|l| db.entry_by_location(l).map(|pe| (l, pe))) - { - // Existing in a pack - Some((location, pack_entry)) => { - // Unset latest_pack_offsets_to_id if outside the pack range - if let Some((cached_pack_id, _)) = &latest_pack_mapping { - if *cached_pack_id != location.pack_id { - latest_pack_mapping = None; - } - } - - // Params for pack finding - let (base_index_offset, counts_in_pack) = { - let index = counts_range_by_pack_id - .binary_search_by_key(&location.pack_id, |e| e.0) - .expect("pack-id always present"); - let pack_range = counts_range_by_pack_id[index].1.clone(); - (pack_range.start, &sorted_counts[pack_range]) - }; - - // Try to reuse delta - if let Some(entry) = output::Entry::from_pack_entry( - pack_entry, - count, - counts_in_pack, - base_index_offset, - allow_thin_pack.then_some({ - |pack_id, base_offset| { - let (cached_pack_id, offsets_oid_mapping) = latest_pack_mapping - .get_or_insert_with(|| { - db.pack_offsets_and_oid(pack_id) - .map(|mut v| { - v.sort_by_key(|e| e.0); - (pack_id, v) - }) - .expect("pack used for counts is still available") - }); - debug_assert_eq!(*cached_pack_id, pack_id); - - stats.ref_delta_objects += 1; - offsets_oid_mapping - .binary_search_by_key(&base_offset, |e| e.0) - .ok() - .map(|idx| offsets_oid_mapping[idx].1) - } - }), - version, - ) { - stats.objects_copied_from_pack += 1; - entry.map(|entry| { - use super::super::Kind; - - match entry.kind { - Kind::DeltaRef { object_index } => { - let current_delta_source = { - let pack_location = - count.entry_pack_location.as_ref().expect("packed"); - let (_, offsets_oid_mapping) = latest_pack_mapping - .get_or_insert_with(|| { - db.pack_offsets_and_oid(pack_location.pack_id) - .map(|mut v| { - v.sort_by_key(|e| e.0); - (pack_location.pack_id, v) - }) - .expect( - "pack used for counts is still available", - ) - }); - offsets_oid_mapping - .binary_search_by_key(&pack_location.pack_offset, |e| { - e.0 - }) - .ok() - .map(|idx| offsets_oid_mapping[idx].1) - .expect("pack offset is valid") - }; - if let Some(delta_source) = delta_source { - // Reuse delta - if *delta_source == current_delta_source { - stats.objects_copied_from_pack += 1; - ToDo::ReuseDelta(entry) - } else { - ToDo::Dedelta { - target: entry.id, - source: current_delta_source, - } - } - } else { - ToDo::Dedelta { - target: entry.id, - source: current_delta_source, - } - } - } - Kind::Base(kind) => ToDo::Decompress(entry.id), - Kind::DeltaOid { id } => ToDo::Dedelta { - target: entry.id, - source: id, - }, - } - }) - } else { - Ok(ToDo::Decompress(count.id)) - } - } - // Existing as a loose object - None => Ok(ToDo::Decompress(count.id)), - }?); - progress.inc(); - } - let out = todo!(); - Ok((chunk_id, out, stats)) + target.data = buffer_c.as_slice(); + // TODO: replace with map_err + output::Entry::from_delta_ref(count, &target, *oid_index_mapping.get(&oid).unwrap()) + } else { + let (data, _) = db + .try_find_cached(&oid, &mut buffer_a, &mut obj_cache) + .unwrap() + .unwrap(); // TODO: replace with map_err + output::Entry::from_base(count, &data) } - }, - reduce::Statistics::default(), - ) + }) + .collect::>(); + todo!() } } } @@ -498,7 +383,6 @@ mod reduce { mod types { use gix_hash::ObjectId; - use gix_hashtable::sync::ObjectIdMap; use crate::data::output::entry; @@ -547,7 +431,12 @@ mod types { /// Key object refers to delta target, value object refers to delta source. /// Treat objects missing in keys as base objects. /// If the required delta does not exist, it will be computed. - CustomizedDeltaTopo(std::collections::HashMap), + CustomizedDeltaTopo { + /// A mapping from a delta target's Object ID to its corresponding delta source (base) ID. + topo: std::collections::HashMap, + /// The maximum cache capacity to store object data while find object. Count in bytes. + cache_capacity: usize, + }, } /// Configuration options for the pack generation functions provided in [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()]. diff --git a/gix-pack/src/data/output/entry/mod.rs b/gix-pack/src/data/output/entry/mod.rs index 18d20dd2c0..39cdcbec3e 100644 --- a/gix-pack/src/data/output/entry/mod.rs +++ b/gix-pack/src/data/output/entry/mod.rs @@ -131,8 +131,8 @@ impl output::Entry { }) } - /// Create a new instance from the given `oid` and its corresponding git object data `obj`. - pub fn from_data(count: &output::Count, obj: &gix_object::Data<'_>) -> Result { + /// Create a new instance with type Base from the given `oid` and its corresponding git object data `obj`. + pub fn from_base(count: &output::Count, obj: &gix_object::Data<'_>) -> Result { Ok(output::Entry { id: count.id.to_owned(), kind: Kind::Base(obj.kind), @@ -151,6 +151,33 @@ impl output::Entry { }) } + /// Like [`from_base()`], but with type OfsDelta. + /// `object_index` is the absolute index to the object. + pub fn from_delta_ref( + count: &output::Count, + obj: &gix_object::Data<'_>, + object_index: usize, + ) -> Result { + Ok(output::Entry { + id: count.id.to_owned(), + kind: Kind::DeltaRef { + object_index: object_index, + }, + decompressed_size: obj.data.len(), + compressed_data: { + let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new()); + if let Err(err) = std::io::copy(&mut &*obj.data, &mut out) { + match err.kind() { + std::io::ErrorKind::Other => return Err(Error::ZlibDeflate(err)), + err => unreachable!("Should never see other errors than zlib, but got {:?}", err), + } + } + out.flush()?; + out.into_inner() + }, + }) + } + /// Transform ourselves into pack entry header of `version` which can be written into a pack. /// /// `index_to_pack(object_index) -> pack_offset` is a function to convert the base object's index into From 609143a1f74fb98f13d4503fe21baffad60e70f7 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Thu, 23 Apr 2026 12:24:30 +0800 Subject: [PATCH 12/47] feat: basic parallel --- gix-pack/src/data/delta.rs | 26 ---- .../src/data/output/entry/iter_from_counts.rs | 120 ++++++++++++------ 2 files changed, 80 insertions(+), 66 deletions(-) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 8a01c3ed96..719adccdc7 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -47,22 +47,6 @@ pub(crate) fn decode_header_size(d: &[u8]) -> (u64, usize) { (size, consumed) } -fn encode_size(mut n: u64) -> Vec { - let mut buf = Vec::with_capacity(8); - loop { - let mut byte = (n & 0x7F) as u8; - n >>= 7; - if n != 0 { - byte |= 0x80; - buf.push(byte); - } else { - buf.push(byte); - break; - } - } - buf -} - pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<(), apply::Error> { let mut i = 0; while let Some(cmd) = data.get(i) { @@ -217,16 +201,6 @@ pub fn compute_delta(source: &[u8], target: &[u8]) -> Vec { mod tests { use super::*; - #[test] - fn encode_size_works() { - let cases: Vec = vec![0x00, 0x01, 0x7f, 0xff, 0x7777, 1795265022, 3_825_123_056_546_413_051]; - for n in cases { - let encoded = encode_size(n); - let (restored_n, _) = decode_header_size(&encoded); - assert_eq!(n, restored_n); - } - } - fn apply_delta(source: &[u8], delta: &Vec) -> Vec { let mut buf = Vec::new(); for inst in delta { diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 954bf62fae..ea5e4b5f8e 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -9,10 +9,9 @@ pub(crate) mod function { Progress, }, }; - use gix_hash::ObjectId; use super::{reduce, util, Error, Mode, Options, Outcome, ProgressId}; - use crate::{data::output, FindExt}; + use crate::data::output; /// Given a known list of object `counts`, calculate entries ready to be put into a data pack. /// @@ -218,51 +217,92 @@ pub(crate) mod function { ) } Mode::CustomizedDeltaTopo { topo, cache_capacity } => { - // TODO: parallel, progress + let sorted_counts = Arc::new(counts); + let progress = Arc::new(parking_lot::Mutex::new(progress)); + let chunks = util::ChunkRanges::new(chunk_size, sorted_counts.len()); + // TODO: reuse delta if allow_thin_pack { todo!("support allow_thin_pack"); } - let mut buffer_a = Vec::new(); - let mut buffer_b = Vec::new(); - let mut buffer_c = Vec::new(); - let mut obj_cache = crate::cache::lru::MemoryCappedHashmap::new(cache_capacity); - let oid_index_mapping = counts - .iter() - .enumerate() - .map(|(index, count)| (count.id, index)) - .collect::>(); - let out = counts - .iter() - .map(|count| { - let oid = count.id; - if let Some(soruce_oid) = topo.get(&oid) { - let (mut target, _) = db - .try_find_cached(&oid, &mut buffer_a, &mut obj_cache) - .unwrap() - .unwrap(); // TODO: replace with map_err - let (source, _) = db - .try_find_cached(&soruce_oid, &mut buffer_b, &mut obj_cache) - .unwrap() + + let cache = Arc::new(std::sync::Mutex::new(crate::cache::lru::MemoryCappedHashmap::new( + cache_capacity, + ))); // TODO: use parking_lot::Mutex + let oid_index_mapping = Arc::new( + sorted_counts + .iter() + .enumerate() + .map(|(index, count)| (count.id, index)) + .collect::>(), + ); // TODO: rearrange delta solving order or lru to avoid cache peak + parallel::reduce::Stepwise::new( + chunks.enumerate(), + thread_limit, + { + let progress = Arc::clone(&progress); + move |n| { + ( + Vec::new(), // buffer object data for target + Vec::new(), // buffer object data for source + progress + .lock() + .add_child_with_id(format!("thread {n}"), gix_features::progress::UNKNOWN), + ) + } + }, + { + let sorted_counts = Arc::clone(&sorted_counts); + let oid_index_mapping = Arc::clone(&oid_index_mapping); + let cache = Arc::clone(&cache); + move |(chunk_id, chunk_range): (SequenceId, std::ops::Range), + (buf_t, buf_s, progress)| { + let mut out = Vec::new(); + let chunk = &sorted_counts[chunk_range]; + let stats = Outcome::default(); + progress.init(Some(chunk.len()), gix_features::progress::count("objects")); + + for count in chunk.iter() { + let oid = count.id; + let entry = if let Some(soruce_oid) = topo.get(&oid) { + let (mut target, _) = db + .try_find_cached(&oid, buf_t, &mut *cache.lock().unwrap()) // TODO: replace with map_err + .unwrap() + .unwrap(); // TODO: replace with map_err + let (source, _) = db + .try_find_cached(&soruce_oid, buf_s, &mut *cache.lock().unwrap()) + .unwrap() + .unwrap(); // TODO: replace with map_err + let delta_insts = crate::data::delta::compute_delta(source.data, target.data); + let mut delta_data_buf = Vec::new(); + for inst in delta_insts { + inst.encode(&mut delta_data_buf).unwrap(); + // TODO: replace with map_err + } + target.data = delta_data_buf.as_slice(); + let entry = output::Entry::from_delta_ref( + count, + &target, + *oid_index_mapping.get(&oid).unwrap(), + ); // TODO: replace with map_err + // target is dropped here, releasing the borrow on delta_data + entry + } else { + let (data, _) = db + .try_find_cached(&oid, buf_t, &mut *cache.lock().unwrap()) + .unwrap() + .unwrap(); // TODO: replace with map_err + output::Entry::from_base(count, &data) + } .unwrap(); // TODO: replace with map_err - let delta_insts = crate::data::delta::compute_delta(source.data, target.data); - buffer_c.clear(); - for inst in delta_insts { - inst.encode(&mut buffer_c).unwrap(); // TODO: replace with map_err + out.push(entry); + progress.inc(); } - target.data = buffer_c.as_slice(); - // TODO: replace with map_err - output::Entry::from_delta_ref(count, &target, *oid_index_mapping.get(&oid).unwrap()) - } else { - let (data, _) = db - .try_find_cached(&oid, &mut buffer_a, &mut obj_cache) - .unwrap() - .unwrap(); // TODO: replace with map_err - output::Entry::from_base(count, &data) + Ok((chunk_id, out, stats)) } - }) - .collect::>(); - todo!() + }, + reduce::Statistics::default(), + ) } } } From 79cf84eedc6181c394aeea36306895020afec37c Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Thu, 23 Apr 2026 15:48:31 +0800 Subject: [PATCH 13/47] fix: use map_err on Results --- .../src/data/output/entry/iter_from_counts.rs | 64 +++++++++++-------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index ea5e4b5f8e..02cb9627d8 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -264,37 +264,45 @@ pub(crate) mod function { for count in chunk.iter() { let oid = count.id; + let db_find_cached = |oid, buf| { + db.try_find_cached( + oid, + buf, + &mut *cache.lock().expect("other thread should not panic on cache"), + ) + .map_err(Error::Find) + }; let entry = if let Some(soruce_oid) = topo.get(&oid) { - let (mut target, _) = db - .try_find_cached(&oid, buf_t, &mut *cache.lock().unwrap()) // TODO: replace with map_err - .unwrap() - .unwrap(); // TODO: replace with map_err - let (source, _) = db - .try_find_cached(&soruce_oid, buf_s, &mut *cache.lock().unwrap()) - .unwrap() - .unwrap(); // TODO: replace with map_err - let delta_insts = crate::data::delta::compute_delta(source.data, target.data); - let mut delta_data_buf = Vec::new(); - for inst in delta_insts { - inst.encode(&mut delta_data_buf).unwrap(); - // TODO: replace with map_err + if let Some((mut target, _)) = db_find_cached(&oid, buf_t)? { + if let Some((source, _)) = db_find_cached(&soruce_oid, buf_s)? { + let delta_insts = + crate::data::delta::compute_delta(source.data, target.data); + let mut delta_data_buf = Vec::new(); + for inst in delta_insts { + inst.encode(&mut delta_data_buf) + .expect("delta instruction should valid"); + } + target.data = delta_data_buf.as_slice(); + let entry = output::Entry::from_delta_ref( + count, + &target, + *oid_index_mapping.get(&oid).unwrap(), // FIXIT: incorrect index + ); + // target is dropped here, releasing the borrow on delta_data + entry + } else { + Ok(output::Entry::invalid()) + } + } else { + Ok(output::Entry::invalid()) } - target.data = delta_data_buf.as_slice(); - let entry = output::Entry::from_delta_ref( - count, - &target, - *oid_index_mapping.get(&oid).unwrap(), - ); // TODO: replace with map_err - // target is dropped here, releasing the borrow on delta_data - entry } else { - let (data, _) = db - .try_find_cached(&oid, buf_t, &mut *cache.lock().unwrap()) - .unwrap() - .unwrap(); // TODO: replace with map_err - output::Entry::from_base(count, &data) - } - .unwrap(); // TODO: replace with map_err + if let Some((data, _)) = db_find_cached(&oid, buf_t)? { + output::Entry::from_base(count, &data) + } else { + Ok(output::Entry::invalid()) + } + }?; out.push(entry); progress.inc(); } From 9eaf5ecd38a0f74c896d3d36dc12ed23f703a2db Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Thu, 23 Apr 2026 15:50:58 +0800 Subject: [PATCH 14/47] refactor errors --- gix-pack/src/cache/delta/traverse/mod.rs | 2 +- gix-pack/src/data/delta.rs | 70 +++++++++++------------- gix-pack/src/data/file/decode/mod.rs | 2 +- 3 files changed, 34 insertions(+), 40 deletions(-) diff --git a/gix-pack/src/cache/delta/traverse/mod.rs b/gix-pack/src/cache/delta/traverse/mod.rs index 1e0026af78..ac898ce333 100644 --- a/gix-pack/src/cache/delta/traverse/mod.rs +++ b/gix-pack/src/cache/delta/traverse/mod.rs @@ -42,7 +42,7 @@ pub enum Error { #[error("Failed to spawn thread when switching to work-stealing mode")] SpawnThread(#[from] std::io::Error), #[error(transparent)] - Delta(#[from] crate::data::delta::apply::Error), + Delta(#[from] crate::data::delta::ApplyError), } /// Additional context passed to the `inspect_object(…)` function of the [`Tree::traverse()`] method. diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 719adccdc7..3af513a223 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -1,33 +1,27 @@ use std::io::Write; -/// -pub mod apply { - /// Returned when failing to apply deltas. - #[derive(thiserror::Error, Debug)] - #[allow(missing_docs)] - pub enum Error { - #[error("Encountered unsupported command code: 0")] - UnsupportedCommandCode, - #[error("Delta copy from base: byte slices must match")] - DeltaCopyBaseSliceMismatch, - #[error("Delta copy data: byte slices must match")] - DeltaCopyDataSliceMismatch, - } +/// Returned when failing to apply deltas. +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum ApplyError { + #[error("Encountered unsupported command code: 0")] + UnsupportedCommandCode, + #[error("Delta copy from base: byte slices must match")] + DeltaCopyBaseSliceMismatch, + #[error("Delta copy data: byte slices must match")] + DeltaCopyDataSliceMismatch, } -/// -pub mod encode { - /// Returned when failing to encode deltas. - #[derive(thiserror::Error, Debug)] - #[allow(missing_docs)] - pub enum Error { - #[error("Failed to write bytes")] - IOError, - #[error("Too large size in Copy instruction, should <= 0x00ffffff")] - TooLargeSize, - #[error("Too large data in Add instruction, length should <= 127")] - TooLargeData, - } +/// Returned when failing to encode deltas. +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum EncodeError { + #[error("Failed to write bytes")] + IOError, + #[error("Too large size in Copy instruction, should <= 0x00ffffff")] + TooLargeSize, + #[error("Too large data in Add instruction, length should <= 127")] + TooLargeData, } /// Given the decompressed pack delta `d`, decode a size in bytes (either the base object size or the result object size) @@ -47,7 +41,7 @@ pub(crate) fn decode_header_size(d: &[u8]) -> (u64, usize) { (size, consumed) } -pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<(), apply::Error> { +pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<(), ApplyError> { let mut i = 0; while let Some(cmd) = data.get(i) { eprintln!("index: {i}, cmd: {cmd}"); @@ -89,14 +83,14 @@ pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<( } let ofs = ofs as usize; Write::write(&mut target, &base[ofs..ofs + size as usize]) - .map_err(|_e| apply::Error::DeltaCopyBaseSliceMismatch)?; + .map_err(|_e| ApplyError::DeltaCopyBaseSliceMismatch)?; } // Reserved - 0 => return Err(apply::Error::UnsupportedCommandCode), + 0 => return Err(ApplyError::UnsupportedCommandCode), // Add size => { Write::write(&mut target, &data[i..i + *size as usize]) - .map_err(|_e| apply::Error::DeltaCopyDataSliceMismatch)?; + .map_err(|_e| ApplyError::DeltaCopyDataSliceMismatch)?; i += *size as usize; } } @@ -117,7 +111,7 @@ pub enum Instruction { /// Data length in bytes size: u32, }, - /// Add data embedded in instruction + /// Insert bytes embedded in instruction Add { /// Data to add data: Vec, // TODO: use borrow here @@ -126,7 +120,7 @@ pub enum Instruction { impl Instruction { /// Encode instruction to bytes. - pub fn encode(self, mut writer: impl Write) -> Result<(), encode::Error> { + pub fn encode(self, mut writer: impl Write) -> Result<(), EncodeError> { match self { Self::Copy { offset, mut size } => { let mut header = 0x80u8; @@ -136,7 +130,7 @@ impl Instruction { if size == 0x10000 { size = 0; } else if size > 0x00ffffff { - return Err(encode::Error::TooLargeSize); + return Err(EncodeError::TooLargeSize); } for i in 0..4 { @@ -156,18 +150,18 @@ impl Instruction { } } - writer.write_all(&[header]).map_err(|_| encode::Error::IOError)?; - writer.write_all(&buf[..n]).map_err(|_| encode::Error::IOError)?; + writer.write_all(&[header]).map_err(|_| EncodeError::IOError)?; + writer.write_all(&buf[..n]).map_err(|_| EncodeError::IOError)?; Ok(()) } Self::Add { data } => { if data.len() > 127 { - return Err(encode::Error::TooLargeData); + return Err(EncodeError::TooLargeData); } let header = data.len() as u8; - writer.write(&[header]).map_err(|_| encode::Error::IOError)?; - writer.write(data.as_slice()).map_err(|_| encode::Error::IOError)?; + writer.write(&[header]).map_err(|_| EncodeError::IOError)?; + writer.write(data.as_slice()).map_err(|_| EncodeError::IOError)?; Ok(()) } } diff --git a/gix-pack/src/data/file/decode/mod.rs b/gix-pack/src/data/file/decode/mod.rs index 71bbf1595c..6867547523 100644 --- a/gix-pack/src/data/file/decode/mod.rs +++ b/gix-pack/src/data/file/decode/mod.rs @@ -20,7 +20,7 @@ pub enum Error { #[error("Entry too large to fit in memory")] OutOfMemory, #[error(transparent)] - Delta(#[from] crate::data::delta::apply::Error), + Delta(#[from] crate::data::delta::ApplyError), } impl From for Error { From 3aeada3c35234df49a9ef020420091a7faebd8d9 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Thu, 23 Apr 2026 19:55:09 +0800 Subject: [PATCH 15/47] fix dynamic method dispatch --- gitoxide-core/src/pack/create.rs | 6 ++-- .../src/data/output/entry/iter_from_counts.rs | 29 +++++++++++++++---- .../pack/data/output/count_and_entries.rs | 7 ++--- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/gitoxide-core/src/pack/create.rs b/gitoxide-core/src/pack/create.rs index 7e9ebcb285..b631e0b421 100644 --- a/gitoxide-core/src/pack/create.rs +++ b/gitoxide-core/src/pack/create.rs @@ -2,8 +2,8 @@ use std::{ffi::OsStr, io, path::Path, str::FromStr, time::Instant}; use anyhow::anyhow; use gix::{ - hash, hash::ObjectId, interrupt, objs::bstr::ByteVec, odb::pack, parallel::InOrderIter, prelude::Finalize, - progress, traverse, Count, NestedProgress, Progress, + hash, hash::ObjectId, interrupt, objs::bstr::ByteVec, odb::pack, parallel::InOrderIter, progress, traverse, Count, + NestedProgress, Progress, }; use crate::OutputFormat; @@ -284,7 +284,7 @@ where } else { writeln!(out, "{pack_name}")?; } - stats.entries = in_order_entries.inner.finalize()?; + stats.entries = in_order_entries.inner.finalize_boxed()?; write_progress.show_throughput(start); entries_progress.show_throughput(start); diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 02cb9627d8..c4d434f927 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -13,6 +13,24 @@ pub(crate) mod function { use super::{reduce, util, Error, Mode, Options, Outcome, ProgressId}; use crate::data::output; + type Item = Result<(SequenceId, Vec), Error>; + type Stats = reduce::Statistics; + type StatsOutput = ::Output; + type StatsError = ::Error; + + pub trait DynFinalizeIterator: Iterator { + fn finalize_boxed(self: Box) -> Result; + } + + impl DynFinalizeIterator for T + where + T: Iterator + parallel::reduce::Finalize, + { + fn finalize_boxed(self: Box) -> Result { + self.finalize() + } + } + /// Given a known list of object `counts`, calculate entries ready to be put into a data pack. /// /// This allows objects to be written quite soon without having to wait for the entire pack to be built in memory. @@ -53,8 +71,7 @@ pub(crate) mod function { thread_limit, chunk_size, }: Options, - ) -> impl Iterator), Error>> - + parallel::reduce::Finalize> + ) -> Box where Find: crate::Find + Send + Clone + 'static, { @@ -104,7 +121,7 @@ pub(crate) mod function { let progress = Arc::new(parking_lot::Mutex::new(progress)); let chunks = util::ChunkRanges::new(chunk_size, sorted_counts.len()); - parallel::reduce::Stepwise::new( + Box::new(parallel::reduce::Stepwise::new( chunks.enumerate(), thread_limit, { @@ -214,7 +231,7 @@ pub(crate) mod function { } }, reduce::Statistics::default(), - ) + )) } Mode::CustomizedDeltaTopo { topo, cache_capacity } => { let sorted_counts = Arc::new(counts); @@ -236,7 +253,7 @@ pub(crate) mod function { .map(|(index, count)| (count.id, index)) .collect::>(), ); // TODO: rearrange delta solving order or lru to avoid cache peak - parallel::reduce::Stepwise::new( + Box::new(parallel::reduce::Stepwise::new( chunks.enumerate(), thread_limit, { @@ -310,7 +327,7 @@ pub(crate) mod function { } }, reduce::Statistics::default(), - ) + )) } } } diff --git a/gix-pack/tests/pack/data/output/count_and_entries.rs b/gix-pack/tests/pack/data/output/count_and_entries.rs index af27e468e9..b0f2ff3c90 100644 --- a/gix-pack/tests/pack/data/output/count_and_entries.rs +++ b/gix-pack/tests/pack/data/output/count_and_entries.rs @@ -1,9 +1,6 @@ use std::sync::atomic::AtomicBool; -use gix_features::{ - parallel::{reduce::Finalize, InOrderIter}, - progress, -}; +use gix_features::{parallel::InOrderIter, progress}; use gix_odb::{pack, pack::FindExt}; use gix_pack::data::{ output, @@ -304,7 +301,7 @@ fn traversals() -> crate::Result { }); assert_eq!(actual_count, expected_count); assert_eq!(counts_len, expected_count.total()); - let stats = entries_iter.finalize()?; + let stats = entries_iter.finalize_boxed()?; assert_eq!(stats, expected_entries_outcome); assert_eq!( From 53ad1c5aff59098765c1fc1bc9584b165f34627f Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Thu, 23 Apr 2026 20:33:45 +0800 Subject: [PATCH 16/47] add test --- gix-pack/src/data/delta.rs | 19 +-- .../src/data/output/entry/iter_from_counts.rs | 8 +- .../pack/data/output/count_and_entries.rs | 139 ++++++++++++++++++ 3 files changed, 152 insertions(+), 14 deletions(-) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 3af513a223..122b985bab 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -180,15 +180,16 @@ pub fn compute_delta(source: &[u8], target: &[u8]) -> Vec { break; } } - vec![ - Instruction::Copy { - offset: 0, - size: common_prefix_len as u32, - }, - Instruction::Add { - data: target[common_prefix_len..].into(), - }, - ] + + let mut insts = Vec::new(); + insts.push(Instruction::Copy { + offset: 0, + size: common_prefix_len as u32, + }); + for chunk in target[common_prefix_len..].chunks(127) { + insts.push(Instruction::Add { data: chunk.to_vec() }); + } + insts } #[cfg(test)] diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index c4d434f927..6c63202c49 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -300,13 +300,11 @@ pub(crate) mod function { .expect("delta instruction should valid"); } target.data = delta_data_buf.as_slice(); - let entry = output::Entry::from_delta_ref( + output::Entry::from_delta_ref( count, &target, - *oid_index_mapping.get(&oid).unwrap(), // FIXIT: incorrect index - ); - // target is dropped here, releasing the borrow on delta_data - entry + *oid_index_mapping.get(&oid).unwrap(), + ) } else { Ok(output::Entry::invalid()) } diff --git a/gix-pack/tests/pack/data/output/count_and_entries.rs b/gix-pack/tests/pack/data/output/count_and_entries.rs index b0f2ff3c90..818327a57b 100644 --- a/gix-pack/tests/pack/data/output/count_and_entries.rs +++ b/gix-pack/tests/pack/data/output/count_and_entries.rs @@ -419,3 +419,142 @@ fn write_and_verify( Ok(()) } + +#[test] +fn customized_delta_topo() -> crate::Result { + use gix_pack::data::output::entry::iter_from_counts::{Mode, Options}; + + #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] + struct Count { + trees: usize, + commits: usize, + blobs: usize, + tags: usize, + delta_ref: usize, + delta_oid: usize, + } + impl Count { + fn add(&mut self, kind: output::entry::Kind) { + use gix_object::Kind::*; + use output::entry::Kind::*; + match kind { + Base(Tree) => self.trees += 1, + Base(Commit) => self.commits += 1, + Base(Blob) => self.blobs += 1, + Base(Tag) => self.tags += 1, + DeltaRef { .. } => self.delta_ref += 1, + DeltaOid { .. } => self.delta_oid += 1, + } + } + } + + for db_kind in [ + DbKind::DeterministicGeneratedContent, + DbKind::DeterministicGeneratedContentMultiIndex, + ] { + let db = db(db_kind)?; + + // Get commits for testing + let head = hex_to_id("dfcb5e39ac6eb30179808bbab721e8a28ce1b52e"); + let commits: Vec<_> = gix_traverse::commit::Simple::new(Some(head), db.clone()) + .map(Result::unwrap) + .map(|c| c.id) + .take(3) + .collect(); + + // Count objects + let (counts, _) = output::count::objects( + db.clone(), + Box::new(commits.clone().into_iter().map(Ok)), + &progress::Discard, + &AtomicBool::new(false), + count::objects::Options { + input_object_expansion: count::objects::ObjectExpansion::AsIs, + thread_limit: Some(1), + ..Default::default() + }, + )?; + + // Create a simple topo: map each commit to its parent (if any) + // This is a simplified example - in real use, you'd compute delta relationships + let topo = std::collections::HashMap::new(); + // For demo purposes, use empty topo (all base objects) + // In practice, you'd compute which objects should be deltas + + let mut entries_iter = output::entry::iter_from_counts( + counts, + db.clone(), + Box::new(progress::Discard), + Options { + mode: Mode::CustomizedDeltaTopo { + topo, + cache_capacity: 1024 * 1024, // 1MB cache + }, + ..Default::default() + }, + ); + + let entries: Vec<_> = InOrderIter::from(entries_iter.by_ref()) + .collect::, _>>()? + .into_iter() + .flatten() + .collect(); + + let actual_count = entries.iter().fold(Count::default(), |mut c, e| { + c.add(e.kind); + c + }); + + // All should be base objects since topo is empty + assert!(actual_count.delta_ref == 0 || actual_count.delta_oid == 0); + + // Test with non-empty topo + let topo_with_deltas = if commits.len() >= 2 { + let mut m = std::collections::HashMap::new(); + // Map commit[1] -> commit[0] as delta + m.insert(commits[1], commits[0]); + m + } else { + std::collections::HashMap::new() + }; + + let (counts2, _) = output::count::objects( + db.clone(), + Box::new(commits.into_iter().map(Ok)), + &progress::Discard, + &AtomicBool::new(false), + count::objects::Options { + input_object_expansion: count::objects::ObjectExpansion::AsIs, + thread_limit: Some(1), + ..Default::default() + }, + )?; + + let mut entries_iter2 = output::entry::iter_from_counts( + counts2, + db.clone(), + Box::new(progress::Discard), + Options { + mode: Mode::CustomizedDeltaTopo { + topo: topo_with_deltas, + cache_capacity: 1024 * 1024, + }, + ..Default::default() + }, + ); + + let entries2: Vec<_> = InOrderIter::from(entries_iter2.by_ref()) + .collect::, _>>()? + .into_iter() + .flatten() + .collect(); + + // Should have at least some base objects + assert!(!entries2.is_empty()); + + // Verify finalize works + let _stats = entries_iter2.finalize_boxed()?; + } + + Ok(()) +} From 1a681d6bd61334e3d58e1a215afbdbe66c555d91 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Thu, 23 Apr 2026 20:44:41 +0800 Subject: [PATCH 17/47] feat: use slice in Instruction::Add --- gix-pack/src/data/delta.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 122b985bab..92e8f8d238 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -103,7 +103,7 @@ pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<( /// Delta instruction #[derive(Debug)] -pub enum Instruction { +pub enum Instruction<'a> { /// Copy data from source Copy { /// Start position to copy @@ -114,11 +114,11 @@ pub enum Instruction { /// Insert bytes embedded in instruction Add { /// Data to add - data: Vec, // TODO: use borrow here + data: &'a [u8], // TODO: use borrow here }, } -impl Instruction { +impl<'a> Instruction<'a> { /// Encode instruction to bytes. pub fn encode(self, mut writer: impl Write) -> Result<(), EncodeError> { match self { @@ -161,7 +161,7 @@ impl Instruction { let header = data.len() as u8; writer.write(&[header]).map_err(|_| EncodeError::IOError)?; - writer.write(data.as_slice()).map_err(|_| EncodeError::IOError)?; + writer.write(data).map_err(|_| EncodeError::IOError)?; Ok(()) } } @@ -169,7 +169,10 @@ impl Instruction { } /// Calcuate delta instructions from `source` to `target`. -pub fn compute_delta(source: &[u8], target: &[u8]) -> Vec { +pub fn compute_delta<'a, 'b>(source: &'a [u8], target: &'b [u8]) -> Vec> +where + 'b: 'a, +{ // TODO: more efficient // TODO: more configurable let mut common_prefix_len: usize = 0; @@ -187,7 +190,7 @@ pub fn compute_delta(source: &[u8], target: &[u8]) -> Vec { size: common_prefix_len as u32, }); for chunk in target[common_prefix_len..].chunks(127) { - insts.push(Instruction::Add { data: chunk.to_vec() }); + insts.push(Instruction::Add { data: chunk }); } insts } @@ -196,7 +199,7 @@ pub fn compute_delta(source: &[u8], target: &[u8]) -> Vec { mod tests { use super::*; - fn apply_delta(source: &[u8], delta: &Vec) -> Vec { + fn apply_delta<'a>(source: &'a [u8], delta: &Vec>) -> Vec { let mut buf = Vec::new(); for inst in delta { match inst { From dedcdcfd82c16dc55f607814e89a346e76747954 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Thu, 23 Apr 2026 20:44:57 +0800 Subject: [PATCH 18/47] refactor test --- .../pack/data/output/count_and_entries.rs | 137 +++++++++--------- 1 file changed, 69 insertions(+), 68 deletions(-) diff --git a/gix-pack/tests/pack/data/output/count_and_entries.rs b/gix-pack/tests/pack/data/output/count_and_entries.rs index 818327a57b..183707977e 100644 --- a/gix-pack/tests/pack/data/output/count_and_entries.rs +++ b/gix-pack/tests/pack/data/output/count_and_entries.rs @@ -475,85 +475,86 @@ fn customized_delta_topo() -> crate::Result { }, )?; - // Create a simple topo: map each commit to its parent (if any) - // This is a simplified example - in real use, you'd compute delta relationships - let topo = std::collections::HashMap::new(); - // For demo purposes, use empty topo (all base objects) - // In practice, you'd compute which objects should be deltas - - let mut entries_iter = output::entry::iter_from_counts( - counts, - db.clone(), - Box::new(progress::Discard), - Options { - mode: Mode::CustomizedDeltaTopo { - topo, - cache_capacity: 1024 * 1024, // 1MB cache + { + // Empty topo: every object is base + let topo = std::collections::HashMap::new(); + + let mut entries_iter = output::entry::iter_from_counts( + counts, + db.clone(), + Box::new(progress::Discard), + Options { + mode: Mode::CustomizedDeltaTopo { + topo, + cache_capacity: 1024 * 1024, // 1MB cache + }, + ..Default::default() }, - ..Default::default() - }, - ); + ); - let entries: Vec<_> = InOrderIter::from(entries_iter.by_ref()) - .collect::, _>>()? - .into_iter() - .flatten() - .collect(); + let entries: Vec<_> = InOrderIter::from(entries_iter.by_ref()) + .collect::, _>>()? + .into_iter() + .flatten() + .collect(); - let actual_count = entries.iter().fold(Count::default(), |mut c, e| { - c.add(e.kind); - c - }); + let actual_count = entries.iter().fold(Count::default(), |mut c, e| { + c.add(e.kind); + c + }); - // All should be base objects since topo is empty - assert!(actual_count.delta_ref == 0 || actual_count.delta_oid == 0); + // All should be base objects since topo is empty + assert!(actual_count.delta_ref == 0 || actual_count.delta_oid == 0); + } // Test with non-empty topo - let topo_with_deltas = if commits.len() >= 2 { - let mut m = std::collections::HashMap::new(); - // Map commit[1] -> commit[0] as delta - m.insert(commits[1], commits[0]); - m - } else { - std::collections::HashMap::new() - }; - - let (counts2, _) = output::count::objects( - db.clone(), - Box::new(commits.into_iter().map(Ok)), - &progress::Discard, - &AtomicBool::new(false), - count::objects::Options { - input_object_expansion: count::objects::ObjectExpansion::AsIs, - thread_limit: Some(1), - ..Default::default() - }, - )?; + { + let topo_with_deltas = if commits.len() >= 2 { + let mut m = std::collections::HashMap::new(); + // Map commit[1] -> commit[0] as delta + m.insert(commits[1], commits[0]); + m + } else { + std::collections::HashMap::new() + }; + + let (counts2, _) = output::count::objects( + db.clone(), + Box::new(commits.into_iter().map(Ok)), + &progress::Discard, + &AtomicBool::new(false), + count::objects::Options { + input_object_expansion: count::objects::ObjectExpansion::AsIs, + thread_limit: Some(1), + ..Default::default() + }, + )?; - let mut entries_iter2 = output::entry::iter_from_counts( - counts2, - db.clone(), - Box::new(progress::Discard), - Options { - mode: Mode::CustomizedDeltaTopo { - topo: topo_with_deltas, - cache_capacity: 1024 * 1024, + let mut entries_iter2 = output::entry::iter_from_counts( + counts2, + db.clone(), + Box::new(progress::Discard), + Options { + mode: Mode::CustomizedDeltaTopo { + topo: topo_with_deltas, + cache_capacity: 1024 * 1024, + }, + ..Default::default() }, - ..Default::default() - }, - ); + ); - let entries2: Vec<_> = InOrderIter::from(entries_iter2.by_ref()) - .collect::, _>>()? - .into_iter() - .flatten() - .collect(); + let entries2: Vec<_> = InOrderIter::from(entries_iter2.by_ref()) + .collect::, _>>()? + .into_iter() + .flatten() + .collect(); - // Should have at least some base objects - assert!(!entries2.is_empty()); + // Should have at least some base objects + assert!(!entries2.is_empty()); - // Verify finalize works - let _stats = entries_iter2.finalize_boxed()?; + // Verify finalize works + let _stats = entries_iter2.finalize_boxed()?; + } } Ok(()) From 5d8d96740c1a09610f31765fc15c2439815c13cd Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Thu, 23 Apr 2026 20:51:23 +0800 Subject: [PATCH 19/47] fix ai review --- Cargo.lock | 1 - gix-hashtable/Cargo.toml | 1 - gix-pack/src/data/delta.rs | 2 -- gix-pack/src/data/output/entry/iter_from_counts.rs | 1 + 4 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 48d309ffc2..749068e376 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1900,7 +1900,6 @@ dependencies = [ "gix-hash", "hashbrown 0.16.1", "parking_lot", - "serde", ] [[package]] diff --git a/gix-hashtable/Cargo.toml b/gix-hashtable/Cargo.toml index fc910f5758..f8ba8df217 100644 --- a/gix-hashtable/Cargo.toml +++ b/gix-hashtable/Cargo.toml @@ -22,7 +22,6 @@ sha1 = ["gix-hash/sha1"] parking_lot = "0.12.4" hashbrown = { version = "0.16.0", default-features = false, features = ["inline-more"] } gix-hash = { version = "^0.23.0", path = "../gix-hash" } -serde = "1.0.228" [dev-dependencies] gix-hash = { path = "../gix-hash", features = ["sha1"] } diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 92e8f8d238..2ca0337e73 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -222,12 +222,10 @@ mod tests { let mut delta_data = Vec::new(); for inst in delta { - eprintln!("inst: {inst:?}"); inst.encode(&mut delta_data).unwrap(); } let mut restored_target = vec![0u8; target.len()]; - eprintln!("delta_data: {delta_data:?}"); apply(source, &mut restored_target, &delta_data).unwrap(); assert_eq!(target, restored_target); } diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 6c63202c49..cd2fcfbe2c 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -494,6 +494,7 @@ mod types { /// Key object refers to delta target, value object refers to delta source. /// Treat objects missing in keys as base objects. /// If the required delta does not exist, it will be computed. + #[cfg_attr(feature = "serde", serde(skip))] CustomizedDeltaTopo { /// A mapping from a delta target's Object ID to its corresponding delta source (base) ID. topo: std::collections::HashMap, From 6fabf3c6320bca3261a5046347bbac7dbfe76386 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Thu, 23 Apr 2026 21:36:50 +0800 Subject: [PATCH 20/47] fix ci --- gix-pack/src/data/delta.rs | 3 +-- gix-pack/src/data/output/entry/iter_from_counts.rs | 8 +++++--- gix-pack/src/data/output/entry/mod.rs | 4 +--- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 2ca0337e73..4f7e2b19cc 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -44,7 +44,6 @@ pub(crate) fn decode_header_size(d: &[u8]) -> (u64, usize) { pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<(), ApplyError> { let mut i = 0; while let Some(cmd) = data.get(i) { - eprintln!("index: {i}, cmd: {cmd}"); i += 1; match cmd { // Copy @@ -118,7 +117,7 @@ pub enum Instruction<'a> { }, } -impl<'a> Instruction<'a> { +impl Instruction<'_> { /// Encode instruction to bytes. pub fn encode(self, mut writer: impl Write) -> Result<(), EncodeError> { match self { diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index cd2fcfbe2c..c9313f6995 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -233,6 +233,7 @@ pub(crate) mod function { reduce::Statistics::default(), )) } + #[cfg(feature = "pack-cache-lru-dynamic")] Mode::CustomizedDeltaTopo { topo, cache_capacity } => { let sorted_counts = Arc::new(counts); let progress = Arc::new(parking_lot::Mutex::new(progress)); @@ -289,9 +290,9 @@ pub(crate) mod function { ) .map_err(Error::Find) }; - let entry = if let Some(soruce_oid) = topo.get(&oid) { + let entry = if let Some(source_oid) = topo.get(&oid) { if let Some((mut target, _)) = db_find_cached(&oid, buf_t)? { - if let Some((source, _)) = db_find_cached(&soruce_oid, buf_s)? { + if let Some((source, _)) = db_find_cached(source_oid, buf_s)? { let delta_insts = crate::data::delta::compute_delta(source.data, target.data); let mut delta_data_buf = Vec::new(); @@ -331,7 +332,7 @@ pub(crate) mod function { } fn rearrange_counts_by_pack_id( - counts: &mut Vec, + counts: &mut [output::Count], progress: &mut Box, ) -> Vec<(u32, std::ops::Range)> { let mut progress = progress.add_child_with_id("sorting".into(), ProgressId::SortEntries.into()); @@ -495,6 +496,7 @@ mod types { /// Treat objects missing in keys as base objects. /// If the required delta does not exist, it will be computed. #[cfg_attr(feature = "serde", serde(skip))] + #[cfg(feature = "pack-cache-lru-dynamic")] CustomizedDeltaTopo { /// A mapping from a delta target's Object ID to its corresponding delta source (base) ID. topo: std::collections::HashMap, diff --git a/gix-pack/src/data/output/entry/mod.rs b/gix-pack/src/data/output/entry/mod.rs index 39cdcbec3e..5a1624693f 100644 --- a/gix-pack/src/data/output/entry/mod.rs +++ b/gix-pack/src/data/output/entry/mod.rs @@ -160,9 +160,7 @@ impl output::Entry { ) -> Result { Ok(output::Entry { id: count.id.to_owned(), - kind: Kind::DeltaRef { - object_index: object_index, - }, + kind: Kind::DeltaRef { object_index }, decompressed_size: obj.data.len(), compressed_data: { let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new()); From 698dda196b19e9f343e777116395a4fecc785ab7 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Thu, 23 Apr 2026 22:03:48 +0800 Subject: [PATCH 21/47] fix lint --- gix-pack/src/data/delta.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 4f7e2b19cc..e22766f270 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -106,9 +106,9 @@ pub enum Instruction<'a> { /// Copy data from source Copy { /// Start position to copy - offset: u32, + offset: usize, /// Data length in bytes - size: u32, + size: usize, }, /// Insert bytes embedded in instruction Add { @@ -186,7 +186,7 @@ where let mut insts = Vec::new(); insts.push(Instruction::Copy { offset: 0, - size: common_prefix_len as u32, + size: common_prefix_len, }); for chunk in target[common_prefix_len..].chunks(127) { insts.push(Instruction::Add { data: chunk }); @@ -202,10 +202,8 @@ mod tests { let mut buf = Vec::new(); for inst in delta { match inst { - Instruction::Add { data } => buf.extend_from_slice(&data), - Instruction::Copy { offset, size } => { - buf.extend_from_slice(&source[(*offset as usize)..(*offset as usize + *size as usize)]) - } + Instruction::Add { data } => buf.extend_from_slice(data), + Instruction::Copy { offset, size } => buf.extend_from_slice(&source[*offset..*offset + *size]), } } buf From 5de3cc7737130227e415472b777a67aec6d5b3cd Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Thu, 23 Apr 2026 22:04:58 +0800 Subject: [PATCH 22/47] fix doc ci --- gix-pack/src/data/output/entry/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gix-pack/src/data/output/entry/mod.rs b/gix-pack/src/data/output/entry/mod.rs index 5a1624693f..b8f234e055 100644 --- a/gix-pack/src/data/output/entry/mod.rs +++ b/gix-pack/src/data/output/entry/mod.rs @@ -31,7 +31,7 @@ pub enum Kind { }, } -/// The error returned by [`output::Entry::from_data()`]. +/// The error returned by [`output::Entry::from_base()`]. #[allow(missing_docs)] #[derive(Debug, thiserror::Error)] pub enum Error { @@ -151,7 +151,7 @@ impl output::Entry { }) } - /// Like [`from_base()`], but with type OfsDelta. + /// Like [`output::Entry::from_base()`], but with type OfsDelta. /// `object_index` is the absolute index to the object. pub fn from_delta_ref( count: &output::Count, From 3e320f9984bea139923bcefb4bd2697bc914df9c Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Thu, 23 Apr 2026 22:12:51 +0800 Subject: [PATCH 23/47] remove confusing comment --- gix-pack/src/data/delta.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index e22766f270..716d1c1587 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -113,7 +113,7 @@ pub enum Instruction<'a> { /// Insert bytes embedded in instruction Add { /// Data to add - data: &'a [u8], // TODO: use borrow here + data: &'a [u8], }, } From 351b70b389dc963a28fdd02981937a119a78865f Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Thu, 23 Apr 2026 22:58:56 +0800 Subject: [PATCH 24/47] fix nextest ci --- gix-pack/src/data/output/entry/iter_from_counts.rs | 4 +--- gix-pack/tests/pack/data/output/count_and_entries.rs | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index c9313f6995..fb01f67d0f 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -446,8 +446,6 @@ mod reduce { } mod types { - use gix_hash::ObjectId; - use crate::data::output::entry; /// Information gathered during the run of [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()]. @@ -499,7 +497,7 @@ mod types { #[cfg(feature = "pack-cache-lru-dynamic")] CustomizedDeltaTopo { /// A mapping from a delta target's Object ID to its corresponding delta source (base) ID. - topo: std::collections::HashMap, + topo: std::collections::HashMap, /// The maximum cache capacity to store object data while find object. Count in bytes. cache_capacity: usize, }, diff --git a/gix-pack/tests/pack/data/output/count_and_entries.rs b/gix-pack/tests/pack/data/output/count_and_entries.rs index 183707977e..b1fe6cb275 100644 --- a/gix-pack/tests/pack/data/output/count_and_entries.rs +++ b/gix-pack/tests/pack/data/output/count_and_entries.rs @@ -420,6 +420,7 @@ fn write_and_verify( Ok(()) } +#[cfg(feature = "all-features")] #[test] fn customized_delta_topo() -> crate::Result { use gix_pack::data::output::entry::iter_from_counts::{Mode, Options}; From aa02be3d23e509bf754b5c2e0eb50d50022743dd Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Fri, 24 Apr 2026 11:06:43 +0800 Subject: [PATCH 25/47] fix: wrong delta ref index --- gix-pack/src/data/output/entry/iter_from_counts.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index fb01f67d0f..2eca5986ae 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -304,7 +304,7 @@ pub(crate) mod function { output::Entry::from_delta_ref( count, &target, - *oid_index_mapping.get(&oid).unwrap(), + *oid_index_mapping.get(source_oid).unwrap(), // TODO: test this ) } else { Ok(output::Entry::invalid()) From dd4daf1ae63c043596689809589a926dfa556898 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Fri, 24 Apr 2026 12:49:33 +0800 Subject: [PATCH 26/47] fix: write_all when encode instruction --- gix-pack/src/data/delta.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 716d1c1587..4a8d104157 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -159,8 +159,8 @@ impl Instruction<'_> { } let header = data.len() as u8; - writer.write(&[header]).map_err(|_| EncodeError::IOError)?; - writer.write(data).map_err(|_| EncodeError::IOError)?; + writer.write_all(&[header]).map_err(|_| EncodeError::IOError)?; + writer.write_all(data).map_err(|_| EncodeError::IOError)?; Ok(()) } } From e7010a10446916592ce89c8c255f6e5851444186 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Fri, 24 Apr 2026 12:56:19 +0800 Subject: [PATCH 27/47] fix: zero size Instruction::Add --- gix-pack/src/data/delta.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 4a8d104157..4ac4a1799e 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -184,10 +184,12 @@ where } let mut insts = Vec::new(); - insts.push(Instruction::Copy { - offset: 0, - size: common_prefix_len, - }); + if common_prefix_len > 0 { + insts.push(Instruction::Copy { + offset: 0, + size: common_prefix_len, + }); + } for chunk in target[common_prefix_len..].chunks(127) { insts.push(Instruction::Add { data: chunk }); } From 19ca9db585b7b80cfc40cae959ccf16eebc9dd4f Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Fri, 24 Apr 2026 15:59:51 +0800 Subject: [PATCH 28/47] add comments --- gix-pack/src/data/output/entry/iter_from_counts.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 2eca5986ae..5eb7fa4617 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -297,9 +297,11 @@ pub(crate) mod function { crate::data::delta::compute_delta(source.data, target.data); let mut delta_data_buf = Vec::new(); for inst in delta_insts { + // Panic here because delta algorithm is incorrect, should fast fail inst.encode(&mut delta_data_buf) .expect("delta instruction should valid"); } + // Header will be encoded by `output::Entry::to_entry_header` target.data = delta_data_buf.as_slice(); output::Entry::from_delta_ref( count, From 0e5c48f107411a6f024e4fde429e2014ba6d02eb Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Fri, 24 Apr 2026 18:19:48 +0800 Subject: [PATCH 29/47] test CustomizedDeltaTopo --- .../src/data/output/entry/iter_from_counts.rs | 2 +- .../pack/data/output/count_and_entries.rs | 45 ++++++++++++++++--- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 5eb7fa4617..a8778ca0b3 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -306,7 +306,7 @@ pub(crate) mod function { output::Entry::from_delta_ref( count, &target, - *oid_index_mapping.get(source_oid).unwrap(), // TODO: test this + *oid_index_mapping.get(source_oid).unwrap(), ) } else { Ok(output::Entry::invalid()) diff --git a/gix-pack/tests/pack/data/output/count_and_entries.rs b/gix-pack/tests/pack/data/output/count_and_entries.rs index b1fe6cb275..aa0c538e9e 100644 --- a/gix-pack/tests/pack/data/output/count_and_entries.rs +++ b/gix-pack/tests/pack/data/output/count_and_entries.rs @@ -510,13 +510,19 @@ fn customized_delta_topo() -> crate::Result { // Test with non-empty topo { - let topo_with_deltas = if commits.len() >= 2 { + // Skip if not enough commits for delta test + if commits.len() < 2 { + continue; + } + + // Save source_oid before commits is moved + let source_oid = commits[0]; + + let topo_with_deltas = { let mut m = std::collections::HashMap::new(); // Map commit[1] -> commit[0] as delta - m.insert(commits[1], commits[0]); + m.insert(commits[1], source_oid); m - } else { - std::collections::HashMap::new() }; let (counts2, _) = output::count::objects( @@ -532,7 +538,7 @@ fn customized_delta_topo() -> crate::Result { )?; let mut entries_iter2 = output::entry::iter_from_counts( - counts2, + counts2.clone(), db.clone(), Box::new(progress::Discard), Options { @@ -553,6 +559,35 @@ fn customized_delta_topo() -> crate::Result { // Should have at least some base objects assert!(!entries2.is_empty()); + // Find the index of source object in counts2 for verification + let source_index = counts2 + .iter() + .position(|c| c.id == source_oid) + .expect("source commit should be in counts"); + + // Verify object_index in DeltaRef entries is valid and correct + let count_len = counts2.len(); + let mut found_delta_ref = false; + for entry in &entries2 { + if let gix_pack::data::output::entry::Kind::DeltaRef { object_index } = entry.kind { + found_delta_ref = true; + assert!( + object_index < count_len, + "DeltaRef object_index {} should be < count_len {}", + object_index, + count_len + ); + // Verify it points to the expected source + assert_eq!( + object_index, source_index, + "DeltaRef should point to source commit, expected {} but got {}", + source_index, object_index + ); + } + } + // We created topo with commit[1] -> source, so there should be DeltaRef entries + assert!(found_delta_ref, "Should have DeltaRef entries from customized topo"); + // Verify finalize works let _stats = entries_iter2.finalize_boxed()?; } From 44f594b7a26d7a4d23793aea0a1e18b61d726e46 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sat, 25 Apr 2026 10:30:00 +0800 Subject: [PATCH 30/47] fix codex review --- gix-pack/src/data/output/entry/iter_from_counts.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index a8778ca0b3..847cd80d30 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -301,12 +301,14 @@ pub(crate) mod function { inst.encode(&mut delta_data_buf) .expect("delta instruction should valid"); } - // Header will be encoded by `output::Entry::to_entry_header` + // Header with encoded size and will be encoded by `output::Entry::to_entry_header` target.data = delta_data_buf.as_slice(); output::Entry::from_delta_ref( count, &target, - *oid_index_mapping.get(source_oid).unwrap(), + *oid_index_mapping + .get(source_oid) + .expect("all target and source objects should in ONE pack"), // TODO: allow ref delta in thin pack ) } else { Ok(output::Entry::invalid()) From fc2d8a49f0a069e3cc1f08f9ba801f4be6fe306e Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sat, 25 Apr 2026 10:39:12 +0800 Subject: [PATCH 31/47] fix test --- gix-pack/tests/pack/data/output/count_and_entries.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/gix-pack/tests/pack/data/output/count_and_entries.rs b/gix-pack/tests/pack/data/output/count_and_entries.rs index aa0c538e9e..5d43c5582e 100644 --- a/gix-pack/tests/pack/data/output/count_and_entries.rs +++ b/gix-pack/tests/pack/data/output/count_and_entries.rs @@ -458,10 +458,9 @@ fn customized_delta_topo() -> crate::Result { // Get commits for testing let head = hex_to_id("dfcb5e39ac6eb30179808bbab721e8a28ce1b52e"); let commits: Vec<_> = gix_traverse::commit::Simple::new(Some(head), db.clone()) - .map(Result::unwrap) - .map(|c| c.id) .take(3) - .collect(); + .map(|commit| commit.map(|c| c.id)) + .collect::>()?; // Count objects let (counts, _) = output::count::objects( @@ -505,7 +504,7 @@ fn customized_delta_topo() -> crate::Result { }); // All should be base objects since topo is empty - assert!(actual_count.delta_ref == 0 || actual_count.delta_oid == 0); + assert!(actual_count.delta_ref == 0 && actual_count.delta_oid == 0); } // Test with non-empty topo From 893fef233bc1916ad8e867b3914b3882ed9e2755 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sat, 25 Apr 2026 10:39:38 +0800 Subject: [PATCH 32/47] fix visibility of DynFinalizeIterator --- .../src/data/output/entry/iter_from_counts.rs | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 847cd80d30..f5d5006c91 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -13,24 +13,6 @@ pub(crate) mod function { use super::{reduce, util, Error, Mode, Options, Outcome, ProgressId}; use crate::data::output; - type Item = Result<(SequenceId, Vec), Error>; - type Stats = reduce::Statistics; - type StatsOutput = ::Output; - type StatsError = ::Error; - - pub trait DynFinalizeIterator: Iterator { - fn finalize_boxed(self: Box) -> Result; - } - - impl DynFinalizeIterator for T - where - T: Iterator + parallel::reduce::Finalize, - { - fn finalize_boxed(self: Box) -> Result { - self.finalize() - } - } - /// Given a known list of object `counts`, calculate entries ready to be put into a data pack. /// /// This allows objects to be written quite soon without having to wait for the entire pack to be built in memory. @@ -71,7 +53,7 @@ pub(crate) mod function { thread_limit, chunk_size, }: Options, - ) -> Box + ) -> Box where Find: crate::Find + Send + Clone + 'static, { @@ -570,5 +552,23 @@ mod types { } } } + + type Item = Result<(gix_features::parallel::SequenceId, Vec), Error>; + type Stats = super::reduce::Statistics; + type StatsOutput = ::Output; + type StatsError = ::Error; + + pub trait DynFinalizeIterator: Iterator { + fn finalize_boxed(self: Box) -> Result; + } + + impl DynFinalizeIterator for T + where + T: Iterator + gix_features::parallel::reduce::Finalize, + { + fn finalize_boxed(self: Box) -> Result { + self.finalize() + } + } } pub use types::{Error, Mode, Options, Outcome, ProgressId}; From 1c623b8e64cb5b91c0effddb625ab3a25cb1d223 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sat, 25 Apr 2026 11:24:46 +0800 Subject: [PATCH 33/47] fix: use write_all in apply --- gix-pack/src/data/delta.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 2d8dd157ee..1668ac41ec 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -94,7 +94,7 @@ pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<( let end = ofs.checked_add(size as usize).ok_or(ApplyError::Corrupt { message: "delta copy range overflows", })?; - std::io::Write::write( + std::io::Write::write_all( &mut target, base.get(ofs..end).ok_or(ApplyError::Corrupt { message: "delta copy range exceeds base object size", @@ -111,7 +111,7 @@ pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<( let end = i.checked_add(*size as usize).ok_or(ApplyError::Corrupt { message: "delta insert range overflows", })?; - std::io::Write::write( + std::io::Write::write_all( &mut target, data.get(i..end).ok_or(ApplyError::Corrupt { message: "delta insert data is truncated", From 02e8695da2c84f3807128e51273e7566a0d76f82 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sat, 25 Apr 2026 11:30:14 +0800 Subject: [PATCH 34/47] fix: Instruction encode error --- gix-pack/src/data/delta.rs | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 1668ac41ec..2d2abf323e 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -18,12 +18,14 @@ pub enum ApplyError { #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum EncodeError { - #[error("Failed to write bytes")] - IOError, - #[error("Too large size in Copy instruction, should <= 0x00ffffff")] - TooLargeSize, - #[error("Too large data in Add instruction, length should <= 127")] - TooLargeData, + #[error("Failed to write bytes: {0}")] + IOError(std::io::Error), + #[error("Too large offset in Copy instruction, should <= 0xffffffff, got {0}")] + TooLargeOffset(usize), + #[error("Too large size in Copy instruction, should <= 0x00ffffff, got {0}")] + TooLargeSize(usize), + #[error("Too large data in Add instruction, length should <= 127, got {0}")] + TooLargeData(usize), } /// Given the decompressed pack delta `d`, decode a size in bytes (either the base object size or the result object size) @@ -165,7 +167,10 @@ impl Instruction<'_> { if size == 0x10000 { size = 0; } else if size > 0x00ffffff { - return Err(EncodeError::TooLargeSize); + return Err(EncodeError::TooLargeSize(size)); + } + if offset > 0xffffffff { + return Err(EncodeError::TooLargeOffset(offset)); } for i in 0..4 { @@ -185,18 +190,18 @@ impl Instruction<'_> { } } - writer.write_all(&[header]).map_err(|_| EncodeError::IOError)?; - writer.write_all(&buf[..n]).map_err(|_| EncodeError::IOError)?; + writer.write_all(&[header]).map_err(EncodeError::IOError)?; + writer.write_all(&buf[..n]).map_err(EncodeError::IOError)?; Ok(()) } Self::Add { data } => { if data.len() > 127 { - return Err(EncodeError::TooLargeData); + return Err(EncodeError::TooLargeData(data.len())); } let header = data.len() as u8; - writer.write_all(&[header]).map_err(|_| EncodeError::IOError)?; - writer.write_all(data).map_err(|_| EncodeError::IOError)?; + writer.write_all(&[header]).map_err(EncodeError::IOError)?; + writer.write_all(data).map_err(EncodeError::IOError)?; Ok(()) } } From b45c2bc33a8aaf77f23a54d18a4e2462db4fbe0c Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sat, 25 Apr 2026 11:32:42 +0800 Subject: [PATCH 35/47] fix typo --- gix-pack/src/data/delta.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 2d2abf323e..12738e596d 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -208,7 +208,7 @@ impl Instruction<'_> { } } -/// Calcuate delta instructions from `source` to `target`. +/// Calculate delta instructions from `source` to `target`. pub fn compute_delta<'a, 'b>(source: &'a [u8], target: &'b [u8]) -> Vec> where 'b: 'a, From 8c990417efb8fa07330e8a2aed3e4791ecc44705 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sat, 25 Apr 2026 14:24:29 +0800 Subject: [PATCH 36/47] fix delta lifetime --- gix-pack/src/data/delta.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/gix-pack/src/data/delta.rs b/gix-pack/src/data/delta.rs index 12738e596d..e56568a31e 100644 --- a/gix-pack/src/data/delta.rs +++ b/gix-pack/src/data/delta.rs @@ -209,10 +209,7 @@ impl Instruction<'_> { } /// Calculate delta instructions from `source` to `target`. -pub fn compute_delta<'a, 'b>(source: &'a [u8], target: &'b [u8]) -> Vec> -where - 'b: 'a, -{ +pub fn compute_delta<'a>(source: &[u8], target: &'a [u8]) -> Vec> { // TODO: more efficient // TODO: more configurable let mut common_prefix_len: usize = 0; From e2005d22384a242e443c2bf8bee51155ad8bc680 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sat, 25 Apr 2026 15:01:12 +0800 Subject: [PATCH 37/47] test on objects order --- gix-pack/src/data/output/bytes.rs | 2 +- .../pack/data/output/count_and_entries.rs | 78 ++++++++++--------- 2 files changed, 42 insertions(+), 38 deletions(-) diff --git a/gix-pack/src/data/output/bytes.rs b/gix-pack/src/data/output/bytes.rs index c6ee8e5f1e..c89ec4cfb1 100644 --- a/gix-pack/src/data/output/bytes.rs +++ b/gix-pack/src/data/output/bytes.rs @@ -109,7 +109,7 @@ where } self.pack_offsets_and_validity.push((self.written, true)); let header = entry.to_entry_header(self.entry_version, |index| { - let (base_offset, is_valid_object) = self.pack_offsets_and_validity[index]; + let (base_offset, is_valid_object) = self.pack_offsets_and_validity.get(index).expect("objects in pack should be sorted"); if !is_valid_object { unreachable!("if you see this the object database is correct as a delta refers to a non-existing object") } diff --git a/gix-pack/tests/pack/data/output/count_and_entries.rs b/gix-pack/tests/pack/data/output/count_and_entries.rs index 5d43c5582e..68da9efe6a 100644 --- a/gix-pack/tests/pack/data/output/count_and_entries.rs +++ b/gix-pack/tests/pack/data/output/count_and_entries.rs @@ -509,24 +509,23 @@ fn customized_delta_topo() -> crate::Result { // Test with non-empty topo { + let commits = commits.to_owned(); + // Skip if not enough commits for delta test if commits.len() < 2 { continue; } - // Save source_oid before commits is moved - let source_oid = commits[0]; - let topo_with_deltas = { let mut m = std::collections::HashMap::new(); - // Map commit[1] -> commit[0] as delta - m.insert(commits[1], source_oid); + m.insert(commits[0], commits[1]); + m.insert(commits[2], commits[1]); m }; let (counts2, _) = output::count::objects( db.clone(), - Box::new(commits.into_iter().map(Ok)), + Box::new(commits.to_owned().into_iter().map(Ok)), &progress::Discard, &AtomicBool::new(false), count::objects::Options { @@ -555,40 +554,45 @@ fn customized_delta_topo() -> crate::Result { .flatten() .collect(); - // Should have at least some base objects - assert!(!entries2.is_empty()); - - // Find the index of source object in counts2 for verification - let source_index = counts2 - .iter() - .position(|c| c.id == source_oid) - .expect("source commit should be in counts"); - - // Verify object_index in DeltaRef entries is valid and correct - let count_len = counts2.len(); - let mut found_delta_ref = false; - for entry in &entries2 { - if let gix_pack::data::output::entry::Kind::DeltaRef { object_index } = entry.kind { - found_delta_ref = true; - assert!( - object_index < count_len, - "DeltaRef object_index {} should be < count_len {}", - object_index, - count_len - ); - // Verify it points to the expected source - assert_eq!( - object_index, source_index, - "DeltaRef should point to source commit, expected {} but got {}", - source_index, object_index - ); + assert_eq!(entries2.len(), counts2.len(), "length of input and output should equal"); + for entry in entries2.iter() { + if entry.id == commits[0] || entry.id == commits[2] { + assert!(matches!(entry.kind, entry::Kind::DeltaRef { .. })); + } else if entry.id == commits[1] { + assert!(matches!(entry.kind, entry::Kind::Base(..))); } } - // We created topo with commit[1] -> source, so there should be DeltaRef entries - assert!(found_delta_ref, "Should have DeltaRef entries from customized topo"); - // Verify finalize works - let _stats = entries_iter2.finalize_boxed()?; + // Directly write to a pack file + let tmp_dir = gix_testtools::tempfile::TempDir::new()?; + let pack_file_path = tmp_dir.path().join("new.pack"); + let mut pack_file = std::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&pack_file_path)?; + let (_num_written_bytes, _pack_hash) = { + let num_entries = entries2.len(); + let mut pack_writer = output::bytes::FromEntriesIter::new( + std::iter::once(Ok::<_, entry::iter_from_counts::Error>(entries2)), + &mut pack_file, + num_entries as u32, + pack::data::Version::V2, + gix_hash::Kind::Sha1, + ); + let mut n = pack_writer.next().expect("one entries bundle was written")?; + n += pack_writer.next().expect("the trailer was written")?; + assert!( + pack_writer.next().is_none(), + "there is nothing more to iterate this time" + ); + // verify we can still get the original parts back + let hash = pack_writer.digest().expect("digest is available when iterator is done"); + let _ = pack_writer.input; + let _ = pack_writer.into_write(); + (n, hash) + }; + + // TODO: parse pack file } } From 681b1319891f919e9df9b0025b7ebef242f7e9a7 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sat, 25 Apr 2026 19:42:27 +0800 Subject: [PATCH 38/47] fix objects order --- .../src/data/output/entry/iter_from_counts.rs | 88 ++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index f5d5006c91..00fcd747bb 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -9,6 +9,7 @@ pub(crate) mod function { Progress, }, }; + use gix_hash::ObjectId; use super::{reduce, util, Error, Mode, Options, Outcome, ProgressId}; use crate::data::output; @@ -217,7 +218,10 @@ pub(crate) mod function { } #[cfg(feature = "pack-cache-lru-dynamic")] Mode::CustomizedDeltaTopo { topo, cache_capacity } => { - let sorted_counts = Arc::new(counts); + let sorted_counts = { + topo_sort(counts.as_mut_slice(), &topo).expect("no loop in delta topo"); + Arc::new(counts) + }; let progress = Arc::new(parking_lot::Mutex::new(progress)); let chunks = util::ChunkRanges::new(chunk_size, sorted_counts.len()); @@ -354,6 +358,70 @@ pub(crate) mod function { index } + + /// Topological sort `counts` in place, parents first. + /// If there is a loop, returns Err(usize), meaning how many ObjectID are in loops indicated in the `to_parent`. + fn topo_sort( + counts: &mut [output::Count], + to_parent: &std::collections::HashMap, + ) -> Result<(), usize> { + // firstly sort `vertexes` as children first, then reverse `vertexex` + use std::collections::HashMap; + + type CountIndex = usize; + + let n = counts.len(); + if n == 0 { + return Ok(()); + } + + let oid_to_idx: HashMap = counts + .iter() + .enumerate() + .map(|(idx, c)| (c.id.to_owned(), idx)) + .collect(); + + let mut idx_to_child_count: HashMap = (0..n).into_iter().map(|c| (c, 0)).collect(); + for (child, parent) in to_parent { + let child = oid_to_idx.get(child).unwrap(); + let parent = oid_to_idx.get(parent).unwrap(); + if idx_to_child_count.contains_key(child) { + if let Some(count) = idx_to_child_count.get_mut(parent) { + *count += 1; + } + } + } + + // leaf vertex collection + let mut stack: Vec = idx_to_child_count + .iter() + .filter_map(|(&c, count)| (*count == 0).then_some(c)) + .collect(); + + let mut sorted = Vec::with_capacity(n); + while let Some(curr) = stack.pop() { + if let Some(parent) = to_parent.get(&counts[curr].id) { + let parent = oid_to_idx.get(parent).unwrap(); + if let Some(count) = idx_to_child_count.get_mut(parent) { + *count -= 1; + if *count == 0 { + stack.push(*parent); + } + } + } + sorted.push(curr); + } + + if sorted.len() < n { + Err(n - sorted.len()) + } else if sorted.len() == n { + sorted.reverse(); + super::util::apply_permutation(counts, &sorted); + Ok(()) + } else { + unreachable!("sorted counts") + } + } } mod util { @@ -388,6 +456,24 @@ mod util { } } } + + pub fn apply_permutation(data: &mut [T], indices: &[usize]) { + let n = data.len(); + + // inverse transformation: indices[i] = j => indices[j] = i + let mut inv = vec![0; n]; + for (i, &j) in indices.iter().enumerate() { + inv[j] = i; + } + + for i in 0..n { + while inv[i] != i { + let target = inv[i]; + data.swap(i, target); + inv.swap(i, target); + } + } + } } mod reduce { From fa29be0904c83b2d0bdf1566b9feae4530cbe785 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sat, 25 Apr 2026 21:47:17 +0800 Subject: [PATCH 39/47] fix lint --- .../src/data/output/entry/iter_from_counts.rs | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 00fcd747bb..080080d926 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -381,7 +381,7 @@ pub(crate) mod function { .map(|(idx, c)| (c.id.to_owned(), idx)) .collect(); - let mut idx_to_child_count: HashMap = (0..n).into_iter().map(|c| (c, 0)).collect(); + let mut idx_to_child_count: HashMap = (0..n).map(|c| (c, 0)).collect(); for (child, parent) in to_parent { let child = oid_to_idx.get(child).unwrap(); let parent = oid_to_idx.get(parent).unwrap(); @@ -412,14 +412,16 @@ pub(crate) mod function { sorted.push(curr); } - if sorted.len() < n { - Err(n - sorted.len()) - } else if sorted.len() == n { - sorted.reverse(); - super::util::apply_permutation(counts, &sorted); - Ok(()) - } else { - unreachable!("sorted counts") + match sorted.len().cmp(&n) { + Ordering::Less => Err(n - sorted.len()), + Ordering::Equal => { + sorted.reverse(); + super::util::apply_permutation(counts, &sorted); + Ok(()) + } + Ordering::Greater => { + unreachable!("sorted counts should less or equal than all counts") + } } } } From abf3452ee8d8e6d0cabf01f2de29a3061da30121 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sun, 26 Apr 2026 09:59:16 +0800 Subject: [PATCH 40/47] test: use objects on a delta chain --- .../pack/data/output/count_and_entries.rs | 36 +++++++++---------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/gix-pack/tests/pack/data/output/count_and_entries.rs b/gix-pack/tests/pack/data/output/count_and_entries.rs index 68da9efe6a..e203e76118 100644 --- a/gix-pack/tests/pack/data/output/count_and_entries.rs +++ b/gix-pack/tests/pack/data/output/count_and_entries.rs @@ -455,17 +455,18 @@ fn customized_delta_topo() -> crate::Result { ] { let db = db(db_kind)?; - // Get commits for testing - let head = hex_to_id("dfcb5e39ac6eb30179808bbab721e8a28ce1b52e"); - let commits: Vec<_> = gix_traverse::commit::Simple::new(Some(head), db.clone()) - .take(3) - .map(|commit| commit.map(|c| c.id)) - .collect::>()?; + // Get objects for testing + let objects: Vec<_> = vec![ + hex_to_id("dc805c143bc9f4fcf6d333a7676f95a7f67651d8"), // base + hex_to_id("29484d17f163832a63fcc6c81f86d87bf7e56d40"), // delta @ 1 + hex_to_id("10d63474c0a8c66d24ad44b9673fe7e2d5bc2189"), // delta @ 2 + hex_to_id("c707160576c571775f9963c4efc97ba1b3ded920"), // delta @ 3 + ]; // Count objects let (counts, _) = output::count::objects( db.clone(), - Box::new(commits.clone().into_iter().map(Ok)), + Box::new(objects.clone().into_iter().map(Ok)), &progress::Discard, &AtomicBool::new(false), count::objects::Options { @@ -475,8 +476,8 @@ fn customized_delta_topo() -> crate::Result { }, )?; + // Empty topo: every object is base { - // Empty topo: every object is base let topo = std::collections::HashMap::new(); let mut entries_iter = output::entry::iter_from_counts( @@ -509,23 +510,17 @@ fn customized_delta_topo() -> crate::Result { // Test with non-empty topo { - let commits = commits.to_owned(); - - // Skip if not enough commits for delta test - if commits.len() < 2 { - continue; - } - + let objects = objects.to_owned(); let topo_with_deltas = { let mut m = std::collections::HashMap::new(); - m.insert(commits[0], commits[1]); - m.insert(commits[2], commits[1]); + m.insert(objects[3], objects[2]); + m.insert(objects[1], objects[2]); m }; let (counts2, _) = output::count::objects( db.clone(), - Box::new(commits.to_owned().into_iter().map(Ok)), + Box::new(objects.to_owned().into_iter().map(Ok)), &progress::Discard, &AtomicBool::new(false), count::objects::Options { @@ -555,10 +550,11 @@ fn customized_delta_topo() -> crate::Result { .collect(); assert_eq!(entries2.len(), counts2.len(), "length of input and output should equal"); + let delta_oids = std::collections::HashSet::from([objects[1], objects[3]]); for entry in entries2.iter() { - if entry.id == commits[0] || entry.id == commits[2] { + if delta_oids.contains(&entry.id) { assert!(matches!(entry.kind, entry::Kind::DeltaRef { .. })); - } else if entry.id == commits[1] { + } else { assert!(matches!(entry.kind, entry::Kind::Base(..))); } } From 6eee1eeeab845e3b4a7c7d2b19f1eff9bbcb79a0 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sun, 26 Apr 2026 11:36:00 +0800 Subject: [PATCH 41/47] feat: reuse delta --- .../src/data/output/entry/iter_from_counts.rs | 105 +++++++++++++++--- gix-pack/src/data/output/entry/mod.rs | 12 +- .../pack/data/output/count_and_entries.rs | 27 ++++- 3 files changed, 118 insertions(+), 26 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 080080d926..8181135456 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -218,6 +218,10 @@ pub(crate) mod function { } #[cfg(feature = "pack-cache-lru-dynamic")] Mode::CustomizedDeltaTopo { topo, cache_capacity } => { + if allow_thin_pack { + todo!("support allow_thin_pack"); + } + let sorted_counts = { topo_sort(counts.as_mut_slice(), &topo).expect("no loop in delta topo"); Arc::new(counts) @@ -225,12 +229,8 @@ pub(crate) mod function { let progress = Arc::new(parking_lot::Mutex::new(progress)); let chunks = util::ChunkRanges::new(chunk_size, sorted_counts.len()); - // TODO: reuse delta - if allow_thin_pack { - todo!("support allow_thin_pack"); - } - - let cache = Arc::new(std::sync::Mutex::new(crate::cache::lru::MemoryCappedHashmap::new( + // Cache decompressed data for Find::try_find_cached + let object_cache = Arc::new(std::sync::Mutex::new(crate::cache::lru::MemoryCappedHashmap::new( cache_capacity, ))); // TODO: use parking_lot::Mutex let oid_index_mapping = Arc::new( @@ -247,8 +247,12 @@ pub(crate) mod function { let progress = Arc::clone(&progress); move |n| { ( - Vec::new(), // buffer object data for target - Vec::new(), // buffer object data for source + // Cache entries object ID and offset for packs + std::collections::HashMap::>::new(), + // buffer object data for target + Vec::new(), + // buffer object data for source + Vec::new(), progress .lock() .add_child_with_id(format!("thread {n}"), gix_features::progress::UNKNOWN), @@ -258,14 +262,15 @@ pub(crate) mod function { { let sorted_counts = Arc::clone(&sorted_counts); let oid_index_mapping = Arc::clone(&oid_index_mapping); - let cache = Arc::clone(&cache); + let cache = Arc::clone(&object_cache); move |(chunk_id, chunk_range): (SequenceId, std::ops::Range), - (buf_t, buf_s, progress)| { + (pack_index_cache, buf_t, buf_s, progress)| { let mut out = Vec::new(); let chunk = &sorted_counts[chunk_range]; - let stats = Outcome::default(); + let mut stats = Outcome::default(); progress.init(Some(chunk.len()), gix_features::progress::count("objects")); + // TODO: refactor needed for count in chunk.iter() { let oid = count.id; let db_find_cached = |oid, buf| { @@ -277,7 +282,47 @@ pub(crate) mod function { .map_err(Error::Find) }; let entry = if let Some(source_oid) = topo.get(&oid) { - if let Some((mut target, _)) = db_find_cached(&oid, buf_t)? { + let mut find_existing_delta = || -> Option<_> { + let (_location, pack_entry) = count + .entry_pack_location + .as_ref() + .and_then(|l| db.entry_by_location(l).map(|pe| (l, pe)))?; + let delta = find_delta( + count, + &pack_entry, + source_oid, + |pack_id, base_offset| { + let offsets_oid_mapping = + pack_index_cache.entry(pack_id).or_insert_with(|| { + db.pack_offsets_and_oid(pack_id) + .map(|mut v| { + v.sort_by_key(|e| e.0); + v + }) + .expect("pack used for counts is still available") + }); + offsets_oid_mapping + .binary_search_by_key(&base_offset, |e| e.0) + .ok() + .map(|idx| offsets_oid_mapping[idx].1) + }, + version, + )?; + Some(output::Entry::from_delta_ref( + count, + &delta, + *oid_index_mapping + .get(source_oid) + .expect("all target and source objects should in ONE pack"), // TODO: allow ref delta in thin pack + )) + }; + // Find existing delta + if let Some(entry) = find_existing_delta() { + stats.objects_copied_from_pack += 1; + entry + } + // Build delta + else if let Some((target, _)) = db_find_cached(&oid, buf_t)? { if let Some((source, _)) = db_find_cached(source_oid, buf_s)? { let delta_insts = crate::data::delta::compute_delta(source.data, target.data); @@ -287,11 +332,9 @@ pub(crate) mod function { inst.encode(&mut delta_data_buf) .expect("delta instruction should valid"); } - // Header with encoded size and will be encoded by `output::Entry::to_entry_header` - target.data = delta_data_buf.as_slice(); output::Entry::from_delta_ref( count, - &target, + &delta_data_buf.as_slice(), *oid_index_mapping .get(source_oid) .expect("all target and source objects should in ONE pack"), // TODO: allow ref delta in thin pack @@ -424,6 +467,38 @@ pub(crate) mod function { } } } + + fn find_delta<'a>( + count: &output::Count, + entry: &'a crate::find::Entry, + source_oid: &ObjectId, + mut pack_offset_to_oid: impl FnMut(u32, u64) -> Option, + target_version: crate::data::Version, + ) -> Option<&'a [u8]> { + if entry.version != target_version { + return None; + } + + let pack_offset_must_be_zero = 0; + let pack_entry = + crate::data::Entry::from_bytes(&entry.data, pack_offset_must_be_zero, count.id.as_slice().len()).ok()?; + + use crate::data::entry::Header::*; + match pack_entry.header { + OfsDelta { base_distance } => { + let pack_location = count.entry_pack_location.as_ref().expect("packed"); + let base_offset = pack_location + .pack_offset + .checked_sub(base_distance) + .expect("pack-offset - distance is firmly within the pack"); + pack_offset_to_oid(pack_location.pack_id, base_offset) + } + RefDelta { base_id } => Some(base_id), + _ => None, + } + .filter(|id| id == source_oid) + .map(|_| &entry.data[pack_entry.data_offset as usize..]) + } } mod util { diff --git a/gix-pack/src/data/output/entry/mod.rs b/gix-pack/src/data/output/entry/mod.rs index b8f234e055..fb066ad1c2 100644 --- a/gix-pack/src/data/output/entry/mod.rs +++ b/gix-pack/src/data/output/entry/mod.rs @@ -61,6 +61,7 @@ impl output::Entry { } /// Create an Entry from a previously counted object which is located in a pack. It's `entry` is provided here. + /// `potential_bases` should be sorted by `Count.entry_pack_location.pack_offset`. /// The `version` specifies what kind of target `Entry` version the caller desires. pub fn from_pack_entry( mut entry: find::Entry, @@ -152,19 +153,16 @@ impl output::Entry { } /// Like [`output::Entry::from_base()`], but with type OfsDelta. + /// `delta_data` is encoded instructions. Header with encoded size and will be encoded by `output::Entry::to_entry_header` /// `object_index` is the absolute index to the object. - pub fn from_delta_ref( - count: &output::Count, - obj: &gix_object::Data<'_>, - object_index: usize, - ) -> Result { + pub fn from_delta_ref(count: &output::Count, delta_data: &[u8], object_index: usize) -> Result { Ok(output::Entry { id: count.id.to_owned(), kind: Kind::DeltaRef { object_index }, - decompressed_size: obj.data.len(), + decompressed_size: delta_data.len(), compressed_data: { let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new()); - if let Err(err) = std::io::copy(&mut &*obj.data, &mut out) { + if let Err(err) = std::io::copy(&mut &*delta_data, &mut out) { match err.kind() { std::io::ErrorKind::Other => return Err(Error::ZlibDeflate(err)), err => unreachable!("Should never see other errors than zlib, but got {:?}", err), diff --git a/gix-pack/tests/pack/data/output/count_and_entries.rs b/gix-pack/tests/pack/data/output/count_and_entries.rs index e203e76118..66346f7534 100644 --- a/gix-pack/tests/pack/data/output/count_and_entries.rs +++ b/gix-pack/tests/pack/data/output/count_and_entries.rs @@ -456,11 +456,12 @@ fn customized_delta_topo() -> crate::Result { let db = db(db_kind)?; // Get objects for testing + // TODO: delta chain may not stable let objects: Vec<_> = vec![ - hex_to_id("dc805c143bc9f4fcf6d333a7676f95a7f67651d8"), // base - hex_to_id("29484d17f163832a63fcc6c81f86d87bf7e56d40"), // delta @ 1 - hex_to_id("10d63474c0a8c66d24ad44b9673fe7e2d5bc2189"), // delta @ 2 - hex_to_id("c707160576c571775f9963c4efc97ba1b3ded920"), // delta @ 3 + hex_to_id("a63e479f22985d08b5debd6567e15999123d25a4"), // base + hex_to_id("d1ff3f36411c6eead64400062a7c8e30886b94ff"), // delta @ 1 + hex_to_id("37fbc9660088c6afad4b48169e80fe59670190d1"), // delta @ 2 + hex_to_id("dc2da8bbf4d82a654b35a2a43c0d714d4d7afbf9"), // delta @ 3 ]; // Count objects @@ -530,6 +531,24 @@ fn customized_delta_topo() -> crate::Result { }, )?; + // Test reuse delta + { + let entries_iter2 = output::entry::iter_from_counts( + counts2.clone(), + db.clone(), + Box::new(progress::Discard), + Options { + mode: Mode::CustomizedDeltaTopo { + topo: topo_with_deltas.to_owned(), + cache_capacity: 1024 * 1024, + }, + ..Default::default() + }, + ); + let stat = entries_iter2.finalize_boxed().unwrap(); + assert_eq!(stat.objects_copied_from_pack, 1); + } + let mut entries_iter2 = output::entry::iter_from_counts( counts2.clone(), db.clone(), From af5314f21d6d8397bb29399280f5f5b62e290bab Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sun, 26 Apr 2026 11:37:25 +0800 Subject: [PATCH 42/47] fix clippy --- gix-pack/src/data/output/entry/iter_from_counts.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 8181135456..6402d89236 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -310,7 +310,7 @@ pub(crate) mod function { )?; Some(output::Entry::from_delta_ref( count, - &delta, + delta, *oid_index_mapping .get(source_oid) .expect("all target and source objects should in ONE pack"), // TODO: allow ref delta in thin pack @@ -334,7 +334,7 @@ pub(crate) mod function { } output::Entry::from_delta_ref( count, - &delta_data_buf.as_slice(), + delta_data_buf.as_slice(), *oid_index_mapping .get(source_oid) .expect("all target and source objects should in ONE pack"), // TODO: allow ref delta in thin pack @@ -345,12 +345,10 @@ pub(crate) mod function { } else { Ok(output::Entry::invalid()) } + } else if let Some((data, _)) = db_find_cached(&oid, buf_t)? { + output::Entry::from_base(count, &data) } else { - if let Some((data, _)) = db_find_cached(&oid, buf_t)? { - output::Entry::from_base(count, &data) - } else { - Ok(output::Entry::invalid()) - } + Ok(output::Entry::invalid()) }?; out.push(entry); progress.inc(); From ad5405f421d4acf60b19bc7ec5a14b80056151ca Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sun, 26 Apr 2026 14:18:16 +0800 Subject: [PATCH 43/47] refactor: resolve_counts --- .../src/data/output/entry/iter_from_counts.rs | 76 +++++++++++-------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 6402d89236..d995618c53 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -64,39 +64,7 @@ pub(crate) mod function { ); let (chunk_size, thread_limit, _) = parallel::optimize_chunk_size_and_thread_limit(chunk_size, Some(counts.len()), thread_limit, None); - { - let progress = Arc::new(parking_lot::Mutex::new( - progress.add_child_with_id("resolving".into(), ProgressId::ResolveCounts.into()), - )); - progress.lock().init(None, gix_features::progress::count("counts")); - let enough_counts_present = counts.len() > 4_000; - let start = std::time::Instant::now(); - parallel::in_parallel_if( - || enough_counts_present, - counts.chunks_mut(chunk_size), - thread_limit, - |_n| Vec::::new(), - { - let progress = Arc::clone(&progress); - let db = db.clone(); - move |chunk, buf| { - let chunk_size = chunk.len(); - for count in chunk { - use crate::data::output::count::PackLocation::*; - match count.entry_pack_location { - LookedUp(_) => continue, - NotLookedUp => count.entry_pack_location = LookedUp(db.location_by_oid(&count.id, buf)), - } - } - progress.lock().inc_by(chunk_size); - Ok::<_, ()>(()) - } - }, - parallel::reduce::IdentityWithResult::<(), ()>::default(), - ) - .expect("infallible - we ignore none-existing objects"); - progress.lock().show_throughput(start); - } + resolve_counts(counts.as_mut_slice(), &db, &mut progress, thread_limit, chunk_size); match mode { Mode::PackCopyAndBaseObjects => { let counts_range_by_pack_id = rearrange_counts_by_pack_id(&mut counts, &mut progress); @@ -362,6 +330,48 @@ pub(crate) mod function { } } + fn resolve_counts( + counts: &mut [output::Count], + db: &Find, + progress: &mut Box, + thread_limit: Option, + chunk_size: usize, + ) where + Find: crate::Find + Send + Clone + 'static, + { + let progress = Arc::new(parking_lot::Mutex::new( + progress.add_child_with_id("resolving".into(), ProgressId::ResolveCounts.into()), + )); + progress.lock().init(None, gix_features::progress::count("counts")); + let enough_counts_present = counts.len() > 4_000; + let start = std::time::Instant::now(); + parallel::in_parallel_if( + || enough_counts_present, + counts.chunks_mut(chunk_size), + thread_limit, + |_n| Vec::::new(), + { + let progress = Arc::clone(&progress); + let db = db.clone(); + move |chunk, buf| { + let chunk_size = chunk.len(); + for count in chunk { + use crate::data::output::count::PackLocation::*; + match count.entry_pack_location { + LookedUp(_) => continue, + NotLookedUp => count.entry_pack_location = LookedUp(db.location_by_oid(&count.id, buf)), + } + } + progress.lock().inc_by(chunk_size); + Ok::<_, ()>(()) + } + }, + parallel::reduce::IdentityWithResult::<(), ()>::default(), + ) + .expect("infallible - we ignore none-existing objects"); + progress.lock().show_throughput(start); + } + fn rearrange_counts_by_pack_id( counts: &mut [output::Count], progress: &mut Box, From d25aa13440621bf033e0f7b559eade20e022b967 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sun, 26 Apr 2026 14:40:55 +0800 Subject: [PATCH 44/47] refactor: customized --- .../src/data/output/entry/iter_from_counts.rs | 511 ++++++++++-------- 1 file changed, 282 insertions(+), 229 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index d995618c53..20c8a65c70 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -1,5 +1,5 @@ pub(crate) mod function { - use std::{cmp::Ordering, sync::Arc}; + use std::sync::Arc; use gix_features::{ parallel, @@ -9,9 +9,8 @@ pub(crate) mod function { Progress, }, }; - use gix_hash::ObjectId; - use super::{reduce, util, Error, Mode, Options, Outcome, ProgressId}; + use super::{reduce, util, Error, Mode, Options, Outcome}; use crate::data::output; /// Given a known list of object `counts`, calculate entries ready to be put into a data pack. @@ -64,10 +63,10 @@ pub(crate) mod function { ); let (chunk_size, thread_limit, _) = parallel::optimize_chunk_size_and_thread_limit(chunk_size, Some(counts.len()), thread_limit, None); - resolve_counts(counts.as_mut_slice(), &db, &mut progress, thread_limit, chunk_size); + util::resolve_counts(counts.as_mut_slice(), &db, &mut progress, thread_limit, chunk_size); match mode { Mode::PackCopyAndBaseObjects => { - let counts_range_by_pack_id = rearrange_counts_by_pack_id(&mut counts, &mut progress); + let counts_range_by_pack_id = util::rearrange_counts_by_pack_id(&mut counts, &mut progress); let sorted_counts = Arc::new(counts); let progress = Arc::new(parking_lot::Mutex::new(progress)); let chunks = util::ChunkRanges::new(chunk_size, sorted_counts.len()); @@ -184,230 +183,199 @@ pub(crate) mod function { reduce::Statistics::default(), )) } - #[cfg(feature = "pack-cache-lru-dynamic")] - Mode::CustomizedDeltaTopo { topo, cache_capacity } => { - if allow_thin_pack { - todo!("support allow_thin_pack"); - } + Mode::Customized => unimplemented!("should handle customized mode in other function"), + } + } +} - let sorted_counts = { - topo_sort(counts.as_mut_slice(), &topo).expect("no loop in delta topo"); - Arc::new(counts) - }; - let progress = Arc::new(parking_lot::Mutex::new(progress)); - let chunks = util::ChunkRanges::new(chunk_size, sorted_counts.len()); +/// Customized handler for counts. +pub mod customized { + use std::{cmp::Ordering, sync::Arc}; - // Cache decompressed data for Find::try_find_cached - let object_cache = Arc::new(std::sync::Mutex::new(crate::cache::lru::MemoryCappedHashmap::new( - cache_capacity, - ))); // TODO: use parking_lot::Mutex - let oid_index_mapping = Arc::new( - sorted_counts - .iter() - .enumerate() - .map(|(index, count)| (count.id, index)) - .collect::>(), - ); // TODO: rearrange delta solving order or lru to avoid cache peak - Box::new(parallel::reduce::Stepwise::new( - chunks.enumerate(), - thread_limit, - { - let progress = Arc::clone(&progress); - move |n| { - ( - // Cache entries object ID and offset for packs - std::collections::HashMap::>::new(), - // buffer object data for target - Vec::new(), - // buffer object data for source - Vec::new(), - progress - .lock() - .add_child_with_id(format!("thread {n}"), gix_features::progress::UNKNOWN), - ) - } - }, - { - let sorted_counts = Arc::clone(&sorted_counts); - let oid_index_mapping = Arc::clone(&oid_index_mapping); - let cache = Arc::clone(&object_cache); - move |(chunk_id, chunk_range): (SequenceId, std::ops::Range), - (pack_index_cache, buf_t, buf_s, progress)| { - let mut out = Vec::new(); - let chunk = &sorted_counts[chunk_range]; - let mut stats = Outcome::default(); - progress.init(Some(chunk.len()), gix_features::progress::count("objects")); + use gix_features::{ + parallel, + parallel::SequenceId, + progress::{ + prodash::{Count, DynNestedProgress}, + Progress, + }, + }; + use gix_hash::ObjectId; - // TODO: refactor needed - for count in chunk.iter() { - let oid = count.id; - let db_find_cached = |oid, buf| { - db.try_find_cached( - oid, - buf, - &mut *cache.lock().expect("other thread should not panic on cache"), - ) - .map_err(Error::Find) - }; - let entry = if let Some(source_oid) = topo.get(&oid) { - let mut find_existing_delta = || -> Option<_> { - let (_location, pack_entry) = count - .entry_pack_location - .as_ref() - .and_then(|l| db.entry_by_location(l).map(|pe| (l, pe)))?; - let delta = find_delta( - count, - &pack_entry, - source_oid, - |pack_id, base_offset| { - let offsets_oid_mapping = - pack_index_cache.entry(pack_id).or_insert_with(|| { - db.pack_offsets_and_oid(pack_id) - .map(|mut v| { - v.sort_by_key(|e| e.0); - v - }) - .expect("pack used for counts is still available") - }); - offsets_oid_mapping - .binary_search_by_key(&base_offset, |e| e.0) - .ok() - .map(|idx| offsets_oid_mapping[idx].1) - }, - version, - )?; - Some(output::Entry::from_delta_ref( - count, - delta, - *oid_index_mapping - .get(source_oid) - .expect("all target and source objects should in ONE pack"), // TODO: allow ref delta in thin pack - )) - }; - // Find existing delta - if let Some(entry) = find_existing_delta() { - stats.objects_copied_from_pack += 1; - entry - } - // Build delta - else if let Some((target, _)) = db_find_cached(&oid, buf_t)? { - if let Some((source, _)) = db_find_cached(source_oid, buf_s)? { - let delta_insts = - crate::data::delta::compute_delta(source.data, target.data); - let mut delta_data_buf = Vec::new(); - for inst in delta_insts { - // Panic here because delta algorithm is incorrect, should fast fail - inst.encode(&mut delta_data_buf) - .expect("delta instruction should valid"); - } - output::Entry::from_delta_ref( - count, - delta_data_buf.as_slice(), - *oid_index_mapping - .get(source_oid) - .expect("all target and source objects should in ONE pack"), // TODO: allow ref delta in thin pack - ) - } else { - Ok(output::Entry::invalid()) - } - } else { - Ok(output::Entry::invalid()) - } - } else if let Some((data, _)) = db_find_cached(&oid, buf_t)? { - output::Entry::from_base(count, &data) - } else { - Ok(output::Entry::invalid()) - }?; - out.push(entry); - progress.inc(); - } - Ok((chunk_id, out, stats)) - } - }, - reduce::Statistics::default(), - )) - } - } - } + use super::{reduce, util, Error, Options, Outcome}; + use crate::data::output; - fn resolve_counts( - counts: &mut [output::Count], - db: &Find, - progress: &mut Box, - thread_limit: Option, - chunk_size: usize, - ) where + type Topo = std::collections::HashMap; + + /// Like [`function::iter_from_counts`], but can determine + /// whether an object is a base or a delta based on topological relationships. + /// + /// Key object refers to delta target, value object refers to delta source. + /// Treat objects missing in keys as base objects. + /// + /// If the required delta does not exist, it will be computed. + #[cfg(feature = "pack-cache-lru-dynamic")] + pub fn iter_from_counts_with_topo( + mut counts: Vec, + db: Find, + progress: Box, + topo: Topo, + cache_capacity: usize, + Options { + version, + mode, + allow_thin_pack, + thread_limit, + chunk_size, + }: Options, + ) -> Box + where Find: crate::Find + Send + Clone + 'static, { - let progress = Arc::new(parking_lot::Mutex::new( - progress.add_child_with_id("resolving".into(), ProgressId::ResolveCounts.into()), - )); - progress.lock().init(None, gix_features::progress::count("counts")); - let enough_counts_present = counts.len() > 4_000; - let start = std::time::Instant::now(); - parallel::in_parallel_if( - || enough_counts_present, - counts.chunks_mut(chunk_size), + if allow_thin_pack { + todo!("support allow_thin_pack"); + } + + assert!( + matches!(mode, super::types::Mode::Customized), + "mode except Customized should be handled by other function" + ); + + let sorted_counts = { + topo_sort(counts.as_mut_slice(), &topo).expect("no loop in delta topo"); + Arc::new(counts) + }; + let progress = Arc::new(parking_lot::Mutex::new(progress)); + let chunks = util::ChunkRanges::new(chunk_size, sorted_counts.len()); + + // Cache decompressed data for Find::try_find_cached + let object_cache = Arc::new(std::sync::Mutex::new(crate::cache::lru::MemoryCappedHashmap::new( + cache_capacity, + ))); // TODO: use parking_lot::Mutex + let oid_index_mapping = Arc::new( + sorted_counts + .iter() + .enumerate() + .map(|(index, count)| (count.id, index)) + .collect::>(), + ); // TODO: rearrange delta solving order or lru to avoid cache peak + Box::new(parallel::reduce::Stepwise::new( + chunks.enumerate(), thread_limit, - |_n| Vec::::new(), { let progress = Arc::clone(&progress); - let db = db.clone(); - move |chunk, buf| { - let chunk_size = chunk.len(); - for count in chunk { - use crate::data::output::count::PackLocation::*; - match count.entry_pack_location { - LookedUp(_) => continue, - NotLookedUp => count.entry_pack_location = LookedUp(db.location_by_oid(&count.id, buf)), - } + move |n| { + ( + // Cache entries object ID and offset for packs + std::collections::HashMap::>::new(), + // buffer object data for target + Vec::new(), + // buffer object data for source + Vec::new(), + progress + .lock() + .add_child_with_id(format!("thread {n}"), gix_features::progress::UNKNOWN), + ) + } + }, + { + let sorted_counts = Arc::clone(&sorted_counts); + let oid_index_mapping = Arc::clone(&oid_index_mapping); + let cache = Arc::clone(&object_cache); + move |(chunk_id, chunk_range): (SequenceId, std::ops::Range), + (pack_index_cache, buf_t, buf_s, progress)| { + let mut out = Vec::new(); + let chunk = &sorted_counts[chunk_range]; + let mut stats = Outcome::default(); + progress.init(Some(chunk.len()), gix_features::progress::count("objects")); + + // TODO: refactor needed + for count in chunk.iter() { + let oid = count.id; + let db_find_cached = |oid, buf| { + db.try_find_cached( + oid, + buf, + &mut *cache.lock().expect("other thread should not panic on cache"), + ) + .map_err(Error::Find) + }; + let entry = if let Some(source_oid) = topo.get(&oid) { + let mut find_existing_delta = || -> Option<_> { + let (_location, pack_entry) = count + .entry_pack_location + .as_ref() + .and_then(|l| db.entry_by_location(l).map(|pe| (l, pe)))?; + let delta = find_delta( + count, + &pack_entry, + source_oid, + |pack_id, base_offset| { + let offsets_oid_mapping = + pack_index_cache.entry(pack_id).or_insert_with(|| { + db.pack_offsets_and_oid(pack_id) + .map(|mut v| { + v.sort_by_key(|e| e.0); + v + }) + .expect("pack used for counts is still available") + }); + offsets_oid_mapping + .binary_search_by_key(&base_offset, |e| e.0) + .ok() + .map(|idx| offsets_oid_mapping[idx].1) + }, + version, + )?; + Some(output::Entry::from_delta_ref( + count, + delta, + *oid_index_mapping + .get(source_oid) + .expect("all target and source objects should in ONE pack"), // TODO: allow ref delta in thin pack + )) + }; + // Find existing delta + if let Some(entry) = find_existing_delta() { + stats.objects_copied_from_pack += 1; + entry + } + // Build delta + else if let Some((target, _)) = db_find_cached(&oid, buf_t)? { + if let Some((source, _)) = db_find_cached(source_oid, buf_s)? { + let delta_insts = crate::data::delta::compute_delta(source.data, target.data); + let mut delta_data_buf = Vec::new(); + for inst in delta_insts { + // Panic here because delta algorithm is incorrect, should fast fail + inst.encode(&mut delta_data_buf) + .expect("delta instruction should valid"); + } + output::Entry::from_delta_ref( + count, + delta_data_buf.as_slice(), + *oid_index_mapping + .get(source_oid) + .expect("all target and source objects should in ONE pack"), // TODO: allow ref delta in thin pack + ) + } else { + Ok(output::Entry::invalid()) + } + } else { + Ok(output::Entry::invalid()) + } + } else if let Some((data, _)) = db_find_cached(&oid, buf_t)? { + output::Entry::from_base(count, &data) + } else { + Ok(output::Entry::invalid()) + }?; + out.push(entry); + progress.inc(); } - progress.lock().inc_by(chunk_size); - Ok::<_, ()>(()) + Ok((chunk_id, out, stats)) } }, - parallel::reduce::IdentityWithResult::<(), ()>::default(), - ) - .expect("infallible - we ignore none-existing objects"); - progress.lock().show_throughput(start); - } - - fn rearrange_counts_by_pack_id( - counts: &mut [output::Count], - progress: &mut Box, - ) -> Vec<(u32, std::ops::Range)> { - let mut progress = progress.add_child_with_id("sorting".into(), ProgressId::SortEntries.into()); - progress.init(Some(counts.len()), gix_features::progress::count("counts")); - let start = std::time::Instant::now(); - - use crate::data::output::count::PackLocation::*; - counts.sort_by(|lhs, rhs| match (&lhs.entry_pack_location, &rhs.entry_pack_location) { - (LookedUp(None), LookedUp(None)) => Ordering::Equal, - (LookedUp(Some(_)), LookedUp(None)) => Ordering::Greater, - (LookedUp(None), LookedUp(Some(_))) => Ordering::Less, - (LookedUp(Some(lhs)), LookedUp(Some(rhs))) => lhs - .pack_id - .cmp(&rhs.pack_id) - .then(lhs.pack_offset.cmp(&rhs.pack_offset)), - (_, _) => unreachable!("counts were resolved beforehand"), - }); - - let mut index: Vec<(u32, std::ops::Range)> = Vec::new(); - let mut chunks_pack_start = counts.partition_point(|e| e.entry_pack_location.is_none()); - let mut slice = &counts[chunks_pack_start..]; - while !slice.is_empty() { - let current_pack_id = slice[0].entry_pack_location.as_ref().expect("packed object").pack_id; - let pack_end = slice - .partition_point(|e| e.entry_pack_location.as_ref().expect("packed object").pack_id == current_pack_id); - index.push((current_pack_id, chunks_pack_start..chunks_pack_start + pack_end)); - slice = &slice[pack_end..]; - chunks_pack_start += pack_end; - } - - progress.set(counts.len()); - progress.show_throughput(start); - - index + reduce::Statistics::default(), + )) } /// Topological sort `counts` in place, parents first. @@ -559,6 +527,101 @@ mod util { } } } + + pub fn resolve_counts( + counts: &mut [crate::data::output::Count], + db: &Find, + progress: &mut Box, + thread_limit: Option, + chunk_size: usize, + ) where + Find: crate::Find + Send + Clone + 'static, + { + use std::sync::Arc; + + use gix_features::{ + parallel, + progress::{Count, Progress}, + }; + + use super::ProgressId; + + let progress = Arc::new(parking_lot::Mutex::new( + progress.add_child_with_id("resolving".into(), ProgressId::ResolveCounts.into()), + )); + progress.lock().init(None, gix_features::progress::count("counts")); + let enough_counts_present = counts.len() > 4_000; + let start = std::time::Instant::now(); + parallel::in_parallel_if( + || enough_counts_present, + counts.chunks_mut(chunk_size), + thread_limit, + |_n| Vec::::new(), + { + let progress = Arc::clone(&progress); + let db = db.clone(); + move |chunk, buf| { + let chunk_size = chunk.len(); + for count in chunk { + use crate::data::output::count::PackLocation::*; + match count.entry_pack_location { + LookedUp(_) => continue, + NotLookedUp => count.entry_pack_location = LookedUp(db.location_by_oid(&count.id, buf)), + } + } + progress.lock().inc_by(chunk_size); + Ok::<_, ()>(()) + } + }, + parallel::reduce::IdentityWithResult::<(), ()>::default(), + ) + .expect("infallible - we ignore none-existing objects"); + progress.lock().show_throughput(start); + } + + pub fn rearrange_counts_by_pack_id( + counts: &mut [crate::data::output::Count], + progress: &mut Box, + ) -> Vec<(u32, std::ops::Range)> { + use std::cmp::Ordering; + + use gix_features::progress::{Count, Progress}; + + use super::ProgressId; + + let mut progress = progress.add_child_with_id("sorting".into(), ProgressId::SortEntries.into()); + progress.init(Some(counts.len()), gix_features::progress::count("counts")); + let start = std::time::Instant::now(); + + use crate::data::output::count::PackLocation::*; + counts.sort_by(|lhs, rhs| match (&lhs.entry_pack_location, &rhs.entry_pack_location) { + (LookedUp(None), LookedUp(None)) => Ordering::Equal, + (LookedUp(Some(_)), LookedUp(None)) => Ordering::Greater, + (LookedUp(None), LookedUp(Some(_))) => Ordering::Less, + (LookedUp(Some(lhs)), LookedUp(Some(rhs))) => lhs + .pack_id + .cmp(&rhs.pack_id) + .then(lhs.pack_offset.cmp(&rhs.pack_offset)), + (_, _) => unreachable!("counts were resolved beforehand"), + }); + + let mut index: Vec<(u32, std::ops::Range)> = Vec::new(); + let mut chunks_pack_start = counts.partition_point(|e| e.entry_pack_location.is_none()); + let mut slice = &counts[chunks_pack_start..]; + while !slice.is_empty() { + let current_pack_id = slice[0].entry_pack_location.as_ref().expect("packed object").pack_id; + let pack_end = slice + .partition_point(|e| e.entry_pack_location.as_ref().expect("packed object").pack_id == current_pack_id); + index.push((current_pack_id, chunks_pack_start..chunks_pack_start + pack_end)); + slice = &slice[pack_end..]; + chunks_pack_start += pack_end; + } + + progress.set(counts.len()); + progress.show_throughput(start); + + index + } } mod reduce { @@ -646,18 +709,8 @@ mod types { /// from existing pack compression and spending the smallest possible time on compressing unpacked objects at /// the cost of bandwidth. PackCopyAndBaseObjects, - /// Determine whether an object is a base or a delta based on topological relationships. - /// Key object refers to delta target, value object refers to delta source. - /// Treat objects missing in keys as base objects. - /// If the required delta does not exist, it will be computed. - #[cfg_attr(feature = "serde", serde(skip))] - #[cfg(feature = "pack-cache-lru-dynamic")] - CustomizedDeltaTopo { - /// A mapping from a delta target's Object ID to its corresponding delta source (base) ID. - topo: std::collections::HashMap, - /// The maximum cache capacity to store object data while find object. Count in bytes. - cache_capacity: usize, - }, + /// Other customized process for counts. + Customized, } /// Configuration options for the pack generation functions provided in [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()]. From 7ee244164d77fd54c621754c7d2bfa5a702aed8c Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sun, 26 Apr 2026 14:51:39 +0800 Subject: [PATCH 45/47] revert breaking changes to Mode & Options --- gix-pack/src/data/output/entry/iter_from_counts.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 20c8a65c70..3a6df50f74 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -701,7 +701,7 @@ mod types { } /// The way the iterator operates. - #[derive(Debug)] + #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum Mode { /// Copy base objects and deltas from packs, while non-packed objects will be treated as base objects @@ -714,7 +714,7 @@ mod types { } /// Configuration options for the pack generation functions provided in [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()]. - #[derive(Debug)] + #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Options { /// The amount of threads to use at most when resolving the pack. If `None`, all logical cores are used. From 799f727d57a78fa8aadb9272f4fd2dfc0f773988 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sun, 26 Apr 2026 14:56:25 +0800 Subject: [PATCH 46/47] fix doc --- gix-pack/src/data/output/entry/iter_from_counts.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gix-pack/src/data/output/entry/iter_from_counts.rs b/gix-pack/src/data/output/entry/iter_from_counts.rs index 3a6df50f74..40f870d464 100644 --- a/gix-pack/src/data/output/entry/iter_from_counts.rs +++ b/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -207,7 +207,7 @@ pub mod customized { type Topo = std::collections::HashMap; - /// Like [`function::iter_from_counts`], but can determine + /// Like [`super::function::iter_from_counts`], but can determine /// whether an object is a base or a delta based on topological relationships. /// /// Key object refers to delta target, value object refers to delta source. From e6053a084fd5218e2f4db220854a9318e0219ec3 Mon Sep 17 00:00:00 2001 From: HairlessVillager Date: Sun, 26 Apr 2026 15:18:13 +0800 Subject: [PATCH 47/47] fix test --- .../pack/data/output/count_and_entries.rs | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/gix-pack/tests/pack/data/output/count_and_entries.rs b/gix-pack/tests/pack/data/output/count_and_entries.rs index 66346f7534..1b4cccc0c8 100644 --- a/gix-pack/tests/pack/data/output/count_and_entries.rs +++ b/gix-pack/tests/pack/data/output/count_and_entries.rs @@ -481,15 +481,14 @@ fn customized_delta_topo() -> crate::Result { { let topo = std::collections::HashMap::new(); - let mut entries_iter = output::entry::iter_from_counts( + let mut entries_iter = output::entry::iter_from_counts::customized::iter_from_counts_with_topo( counts, db.clone(), Box::new(progress::Discard), + topo, + 1024 * 1024, // 1MB cache Options { - mode: Mode::CustomizedDeltaTopo { - topo, - cache_capacity: 1024 * 1024, // 1MB cache - }, + mode: Mode::Customized, ..Default::default() }, ); @@ -533,15 +532,14 @@ fn customized_delta_topo() -> crate::Result { // Test reuse delta { - let entries_iter2 = output::entry::iter_from_counts( + let entries_iter2 = output::entry::iter_from_counts::customized::iter_from_counts_with_topo( counts2.clone(), db.clone(), Box::new(progress::Discard), + topo_with_deltas.to_owned(), + 1024 * 1024, Options { - mode: Mode::CustomizedDeltaTopo { - topo: topo_with_deltas.to_owned(), - cache_capacity: 1024 * 1024, - }, + mode: Mode::Customized, ..Default::default() }, ); @@ -549,15 +547,14 @@ fn customized_delta_topo() -> crate::Result { assert_eq!(stat.objects_copied_from_pack, 1); } - let mut entries_iter2 = output::entry::iter_from_counts( + let mut entries_iter2 = output::entry::iter_from_counts::customized::iter_from_counts_with_topo( counts2.clone(), db.clone(), Box::new(progress::Discard), + topo_with_deltas, + 1024 * 1024, Options { - mode: Mode::CustomizedDeltaTopo { - topo: topo_with_deltas, - cache_capacity: 1024 * 1024, - }, + mode: Mode::Customized, ..Default::default() }, );