Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
200284c
feat: simple compute delta
HairlessVillager Apr 21, 2026
503ed41
feat: encode_size
HairlessVillager Apr 21, 2026
3839de3
feat: encode instruction
HairlessVillager Apr 21, 2026
c3c963e
refactor: iter_from_counts
HairlessVillager Apr 21, 2026
8e49862
fix: remove dead derive for Options and Mode
HairlessVillager Apr 21, 2026
88e611d
feat: Mode::CustomedDeltaTopo
HairlessVillager Apr 21, 2026
4f94165
fix typo
HairlessVillager Apr 21, 2026
63f21c8
refactor: add help for iter_from_counts
HairlessVillager Apr 21, 2026
9e5a6bb
wip: feat: CustomizedDeltaTopo
HairlessVillager Apr 22, 2026
f1db790
fix: remove Serialize & Deserialize for ObjectIdMap
HairlessVillager Apr 22, 2026
503d57d
feat: CustomizedDeltaTopo poc
HairlessVillager Apr 23, 2026
609143a
feat: basic parallel
HairlessVillager Apr 23, 2026
79cf84e
fix: use map_err on Results
HairlessVillager Apr 23, 2026
9eaf5ec
refactor errors
HairlessVillager Apr 23, 2026
3aeada3
fix dynamic method dispatch
HairlessVillager Apr 23, 2026
53ad1c5
add test
HairlessVillager Apr 23, 2026
1a681d6
feat: use slice in Instruction::Add
HairlessVillager Apr 23, 2026
dedcdcf
refactor test
HairlessVillager Apr 23, 2026
5d8d967
fix ai review
HairlessVillager Apr 23, 2026
6fabf3c
fix ci
HairlessVillager Apr 23, 2026
698dda1
fix lint
HairlessVillager Apr 23, 2026
5de3cc7
fix doc ci
HairlessVillager Apr 23, 2026
3e320f9
remove confusing comment
HairlessVillager Apr 23, 2026
351b70b
fix nextest ci
HairlessVillager Apr 23, 2026
aa02be3
fix: wrong delta ref index
HairlessVillager Apr 24, 2026
dd4daf1
fix: write_all when encode instruction
HairlessVillager Apr 24, 2026
e7010a1
fix: zero size Instruction::Add
HairlessVillager Apr 24, 2026
99def97
Merge remote-tracking branch 'upstream/main' into feat/delta
HairlessVillager Apr 24, 2026
19ca9db
add comments
HairlessVillager Apr 24, 2026
0e5c48f
test CustomizedDeltaTopo
HairlessVillager Apr 24, 2026
44f594b
fix codex review
HairlessVillager Apr 25, 2026
fc2d8a4
fix test
HairlessVillager Apr 25, 2026
893fef2
fix visibility of DynFinalizeIterator
HairlessVillager Apr 25, 2026
1c623b8
fix: use write_all in apply
HairlessVillager Apr 25, 2026
02e8695
fix: Instruction encode error
HairlessVillager Apr 25, 2026
b45c2bc
fix typo
HairlessVillager Apr 25, 2026
8c99041
fix delta lifetime
HairlessVillager Apr 25, 2026
e2005d2
test on objects order
HairlessVillager Apr 25, 2026
681b131
fix objects order
HairlessVillager Apr 25, 2026
fa29be0
fix lint
HairlessVillager Apr 25, 2026
abf3452
test: use objects on a delta chain
HairlessVillager Apr 26, 2026
6eee1ee
feat: reuse delta
HairlessVillager Apr 26, 2026
af5314f
fix clippy
HairlessVillager Apr 26, 2026
ad5405f
refactor: resolve_counts
HairlessVillager Apr 26, 2026
d25aa13
refactor: customized
HairlessVillager Apr 26, 2026
7ee2441
revert breaking changes to Mode & Options
HairlessVillager Apr 26, 2026
799f727
fix doc
HairlessVillager Apr 26, 2026
e6053a0
fix test
HairlessVillager Apr 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions gitoxide-core/src/pack/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use std::{ffi::OsStr, io, path::Path, str::FromStr, time::Instant};

use anyhow::anyhow;
use gix::{
hash, hash::ObjectId, interrupt, objs::bstr::ByteVec, odb::pack, parallel::InOrderIter, prelude::Finalize,
progress, traverse, Count, NestedProgress, Progress,
hash, hash::ObjectId, interrupt, objs::bstr::ByteVec, odb::pack, parallel::InOrderIter, progress, traverse, Count,
NestedProgress, Progress,
};

use crate::OutputFormat;
Expand Down Expand Up @@ -284,7 +284,7 @@ where
} else {
writeln!(out, "{pack_name}")?;
}
stats.entries = in_order_entries.inner.finalize()?;
stats.entries = in_order_entries.inner.finalize_boxed()?;

write_progress.show_throughput(start);
entries_progress.show_throughput(start);
Expand Down
1 change: 1 addition & 0 deletions gix-hashtable/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ pub use hashbrown::{hash_map, hash_set, hash_table, Equivalent};
/// thread-safe types
pub mod sync {
/// A map for associating data with object ids in a thread-safe fashion. It should scale well up to 256 threads.
#[derive(Debug)]
pub struct ObjectIdMap<V> {
/// Sharing is done by the first byte of the incoming object id.
shards: [parking_lot::Mutex<super::HashMap<gix_hash::ObjectId, V>>; 256],
Expand Down
2 changes: 1 addition & 1 deletion gix-pack/src/cache/delta/traverse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub enum Error {
#[error("Failed to spawn thread when switching to work-stealing mode")]
SpawnThread(#[from] std::io::Error),
#[error(transparent)]
Delta(#[from] crate::data::delta::apply::Error),
Delta(#[from] crate::data::delta::ApplyError),
}

/// Additional context passed to the `inspect_object(…)` function of the [`Tree::traverse()`] method.
Expand Down
206 changes: 175 additions & 31 deletions gix-pack/src/data/delta.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,42 @@
///
pub mod apply {
/// Returned when failing to apply deltas.
#[derive(thiserror::Error, Debug)]
#[allow(missing_docs)]
pub enum Error {
#[error("Corrupt delta data: {message}")]
Corrupt { message: &'static str },
#[error("Encountered unsupported command code: 0")]
UnsupportedCommandCode,
#[error("Delta copy from base: byte slices must match")]
DeltaCopyBaseSliceMismatch,
#[error("Delta copy data: byte slices must match")]
DeltaCopyDataSliceMismatch,
}
use std::io::Write;

/// Returned when failing to apply deltas.
#[derive(thiserror::Error, Debug)]
#[allow(missing_docs)]
pub enum ApplyError {
#[error("Corrupt delta data: {message}")]
Corrupt { message: &'static str },
#[error("Encountered unsupported command code: 0")]
UnsupportedCommandCode,
#[error("Delta copy from base: byte slices must match")]
DeltaCopyBaseSliceMismatch,
#[error("Delta copy data: byte slices must match")]
DeltaCopyDataSliceMismatch,
}

/// Returned when failing to encode deltas.
#[derive(thiserror::Error, Debug)]
#[allow(missing_docs)]
pub enum EncodeError {
#[error("Failed to write bytes: {0}")]
IOError(std::io::Error),
#[error("Too large offset in Copy instruction, should <= 0xffffffff, got {0}")]
TooLargeOffset(usize),
#[error("Too large size in Copy instruction, should <= 0x00ffffff, got {0}")]
TooLargeSize(usize),
#[error("Too large data in Add instruction, length should <= 127, got {0}")]
TooLargeData(usize),
}

/// Given the decompressed pack delta `d`, decode a size in bytes (either the base object size or the result object size)
/// Equivalent to [this canonical git function](https://github.com/git/git/blob/311531c9de557d25ac087c1637818bd2aad6eb3a/delta.h#L89)
pub(crate) fn decode_header_size(d: &[u8]) -> Result<(u64, usize), apply::Error> {
pub(crate) fn decode_header_size(d: &[u8]) -> Result<(u64, usize), ApplyError> {
let mut shift = 0;
let mut size = 0u64;
let mut consumed = 0;
for cmd in d.iter() {
if shift >= u64::BITS {
return Err(apply::Error::Corrupt {
return Err(ApplyError::Corrupt {
message: "delta header size uses more bits than fit into u64",
});
}
Expand All @@ -34,14 +47,14 @@ pub(crate) fn decode_header_size(d: &[u8]) -> Result<(u64, usize), apply::Error>
return Ok((size, consumed));
}
}
Err(apply::Error::Corrupt {
Err(ApplyError::Corrupt {
message: "delta header size is truncated",
})
}

pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<(), apply::Error> {
fn next_byte(data: &[u8], i: &mut usize) -> Result<u8, apply::Error> {
let byte = *data.get(*i).ok_or(apply::Error::Corrupt {
pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<(), ApplyError> {
fn next_byte(data: &[u8], i: &mut usize) -> Result<u8, ApplyError> {
let byte = *data.get(*i).ok_or(ApplyError::Corrupt {
message: "delta copy instruction is truncated",
})?;
*i += 1;
Expand All @@ -52,6 +65,7 @@ pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<(
while let Some(cmd) = data.get(i) {
i += 1;
match cmd {
// Copy
cmd if cmd & 0b1000_0000 != 0 => {
let (mut ofs, mut size): (u32, u32) = (0, 0);
if cmd & 0b0000_0001 != 0 {
Expand Down Expand Up @@ -79,33 +93,33 @@ pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<(
size = 0x10000; // 65536
}
let ofs = ofs as usize;
let end = ofs.checked_add(size as usize).ok_or(apply::Error::Corrupt {
let end = ofs.checked_add(size as usize).ok_or(ApplyError::Corrupt {
message: "delta copy range overflows",
})?;
std::io::Write::write(
std::io::Write::write_all(
&mut target,
base.get(ofs..end).ok_or(apply::Error::Corrupt {
base.get(ofs..end).ok_or(ApplyError::Corrupt {
message: "delta copy range exceeds base object size",
})?,
)
.map_err(|_e| apply::Error::DeltaCopyBaseSliceMismatch)?;
.map_err(|_e| ApplyError::DeltaCopyBaseSliceMismatch)?;
}
0 => {
return Err(apply::Error::Corrupt {
return Err(ApplyError::Corrupt {
message: "delta command 0 is reserved and invalid",
})
}
size => {
let end = i.checked_add(*size as usize).ok_or(apply::Error::Corrupt {
let end = i.checked_add(*size as usize).ok_or(ApplyError::Corrupt {
message: "delta insert range overflows",
})?;
std::io::Write::write(
std::io::Write::write_all(
&mut target,
data.get(i..end).ok_or(apply::Error::Corrupt {
data.get(i..end).ok_or(ApplyError::Corrupt {
message: "delta insert data is truncated",
})?,
)
.map_err(|_e| apply::Error::DeltaCopyDataSliceMismatch)?;
.map_err(|_e| ApplyError::DeltaCopyDataSliceMismatch)?;
i = end;
}
}
Expand All @@ -116,10 +130,140 @@ pub(crate) fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) -> Result<(
"delta instructions were not consumed completely, should be impossible"
);
if !target.is_empty() {
return Err(apply::Error::Corrupt {
return Err(ApplyError::Corrupt {
message: "delta instructions produced fewer bytes than promised",
});
}

Ok(())
}

/// Delta instruction
#[derive(Debug)]
pub enum Instruction<'a> {
/// Copy data from source
Copy {
/// Start position to copy
offset: usize,
/// Data length in bytes
size: usize,
},
/// Insert bytes embedded in instruction
Add {
/// Data to add
data: &'a [u8],
},
}

impl Instruction<'_> {
/// Encode instruction to bytes.
pub fn encode(self, mut writer: impl Write) -> Result<(), EncodeError> {
match self {
Self::Copy { offset, mut size } => {
let mut header = 0x80u8;
let mut buf = [0u8; 7];
let mut n = 0;

if size == 0x10000 {
size = 0;
} else if size > 0x00ffffff {
return Err(EncodeError::TooLargeSize(size));
}
if offset > 0xffffffff {
return Err(EncodeError::TooLargeOffset(offset));
}

for i in 0..4 {
let byte = (offset >> (i * 8)) as u8;
if byte != 0 {
header |= 1 << i;
buf[n] = byte;
n += 1;
}
}
for i in 0..3 {
let byte = (size >> (i * 8)) as u8;
if byte != 0 {
header |= 1 << (4 + i);
buf[n] = byte;
n += 1;
}
}

writer.write_all(&[header]).map_err(EncodeError::IOError)?;
writer.write_all(&buf[..n]).map_err(EncodeError::IOError)?;
Ok(())
}
Self::Add { data } => {
if data.len() > 127 {
return Err(EncodeError::TooLargeData(data.len()));
}

let header = data.len() as u8;
writer.write_all(&[header]).map_err(EncodeError::IOError)?;
writer.write_all(data).map_err(EncodeError::IOError)?;
Ok(())
}
}
}
}

/// Calculate delta instructions from `source` to `target`.
pub fn compute_delta<'a>(source: &[u8], target: &'a [u8]) -> Vec<Instruction<'a>> {
// TODO: more efficient
// TODO: more configurable
let mut common_prefix_len: usize = 0;
for (s, t) in source.iter().zip(target) {
if s == t {
common_prefix_len += 1;
} else {
break;
}
}

let mut insts = Vec::new();
if common_prefix_len > 0 {
insts.push(Instruction::Copy {
offset: 0,
size: common_prefix_len,
});
}
for chunk in target[common_prefix_len..].chunks(127) {
insts.push(Instruction::Add { data: chunk });
}
insts
}

#[cfg(test)]
mod tests {
use super::*;

fn apply_delta<'a>(source: &'a [u8], delta: &Vec<Instruction<'a>>) -> Vec<u8> {
let mut buf = Vec::new();
for inst in delta {
match inst {
Instruction::Add { data } => buf.extend_from_slice(data),
Instruction::Copy { offset, size } => buf.extend_from_slice(&source[*offset..*offset + *size]),
}
}
buf
}

#[test]
fn make_it_right() {
let source = "hello, world".as_bytes();
let target = "hello, gitoxide".as_bytes();
let delta = compute_delta(source, target);
let restored = apply_delta(source, &delta);
assert_eq!(target, restored);

let mut delta_data = Vec::new();
for inst in delta {
inst.encode(&mut delta_data).unwrap();
}

let mut restored_target = vec![0u8; target.len()];
apply(source, &mut restored_target, &delta_data).unwrap();
assert_eq!(target, restored_target);
}
}
2 changes: 1 addition & 1 deletion gix-pack/src/data/file/decode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub enum Error {
#[error("Entry too large to fit in memory")]
OutOfMemory,
#[error(transparent)]
Delta(#[from] crate::data::delta::apply::Error),
Delta(#[from] crate::data::delta::ApplyError),
}

impl From<TryReserveError> for Error {
Expand Down
2 changes: 1 addition & 1 deletion gix-pack/src/data/output/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ where
}
self.pack_offsets_and_validity.push((self.written, true));
let header = entry.to_entry_header(self.entry_version, |index| {
let (base_offset, is_valid_object) = self.pack_offsets_and_validity[index];
let (base_offset, is_valid_object) = self.pack_offsets_and_validity.get(index).expect("objects in pack should be sorted");
if !is_valid_object {
unreachable!("if you see this the object database is correct as a delta refers to a non-existing object")
}
Expand Down
Loading
Loading