diff --git a/Cargo.lock b/Cargo.lock index 3dbaaea9cb..49e54c5838 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2067,7 +2067,6 @@ dependencies = [ "document-features", "gix-actor", "gix-date", - "gix-error", "gix-features", "gix-hash", "gix-hashtable", diff --git a/gitoxide-core/src/hours/mod.rs b/gitoxide-core/src/hours/mod.rs index 0aa7ad60a1..d07e5c6e36 100644 --- a/gitoxide-core/src/hours/mod.rs +++ b/gitoxide-core/src/hours/mod.rs @@ -83,8 +83,9 @@ fn parse_trailer_identity(trailer: gix::objs::commit::message::body::TrailerRef< /// Return `(commit_author, [commit_author, co_authors...])`. Use the `commit_author` for easy access to the commit author itself. fn commit_author_identities( commit_data: &[u8], + hash_kind: gix::hash::Kind, ) -> Result<(gix::actor::SignatureRef<'_>, SmallVec<[ParsedIdentity<'_>; 2]>), gix::objs::decode::Error> { - let commit = gix::objs::CommitRef::from_bytes(commit_data)?; + let commit = gix::objs::CommitRef::from_bytes(commit_data, hash_kind)?; let author = commit.author()?.trim(); let mut authors = smallvec![ParsedIdentity::Borrowed(gix::actor::IdentityRef::from(author))]; authors.extend(commit.co_authored_by_trailers().filter_map(parse_trailer_identity)); @@ -130,7 +131,7 @@ where let extract_signatures = scope.spawn(move || -> anyhow::Result> { let mut out = Vec::new(); for (commit_idx, commit_data) in rx { - if let Ok((commit_author, authors)) = commit_author_identities(&commit_data) { + if let Ok((commit_author, authors)) = commit_author_identities(&commit_data, commit_id.kind()) { let mut string_ref = |s: &[u8]| -> &'static BStr { match string_heap.get(s) { Some(n) => n.as_bstr(), @@ -445,7 +446,7 @@ body\n\ \n\ Co-authored-by: Second Author \n\ Co-authored-by: Third Author \n"; - let (author, authors) = commit_author_identities(commit).expect("valid commit"); + let (author, authors) = commit_author_identities(commit, gix::hash::Kind::Sha1).expect("valid commit"); assert_eq!(author.time, "1710000000 +0000"); assert_eq!( authors @@ -478,7 +479,7 @@ committer Main Author 1710000000 +0000\n\ subject\n\ \n\ Co-authored-by: not a signature\n"; - let (_, authors) = commit_author_identities(commit).expect("valid commit"); + let (_, authors) = commit_author_identities(commit, gix::hash::Kind::Sha1).expect("valid commit"); assert_eq!(authors.len(), 1); assert_eq!(authors[0].name(), "Main Author".as_bytes().as_bstr()); assert_eq!(authors[0].email(), "main@example.com".as_bytes().as_bstr()); diff --git a/gitoxide-core/src/query/engine/update.rs b/gitoxide-core/src/query/engine/update.rs index 818dfff3ec..f9d76d8633 100644 --- a/gitoxide-core/src/query/engine/update.rs +++ b/gitoxide-core/src/query/engine/update.rs @@ -395,7 +395,7 @@ pub fn update( self.progress.inc(); if self.known_commits.binary_search(&id.to_owned()).is_err() { let res = { - let mut parents = gix::objs::CommitRefIter::from_bytes(obj.data).parent_ids(); + let mut parents = gix::objs::CommitRefIter::from_bytes(obj.data, obj.hash_kind).parent_ids(); let res = parents.next().map(|first_parent| (Some(first_parent), id.to_owned())); match parents.next() { Some(_) => None, diff --git a/gix-config/src/parse/from_bytes/tests.rs b/gix-config/src/parse/from_bytes/tests.rs index 77fbcd4231..3f80209737 100644 --- a/gix-config/src/parse/from_bytes/tests.rs +++ b/gix-config/src/parse/from_bytes/tests.rs @@ -1046,7 +1046,10 @@ mod value { #[test] fn trailing_backslash_is_accepted_as_continuation_to_eof() { let (remaining, events) = parse(b"hello\\").unwrap(); - assert_eq!(remaining, b"", "it consumes everything, as the continuation backslash is no value"); + assert_eq!( + remaining, b"", + "it consumes everything, as the continuation backslash is no value" + ); assert_eq!( events, vec![value_not_done_event("hello"), value_done_event("")], diff --git a/gix-config/tests/config/file/access/raw/set_existing_raw_value.rs b/gix-config/tests/config/file/access/raw/set_existing_raw_value.rs index cbdabd9873..66d045fe6c 100644 --- a/gix-config/tests/config/file/access/raw/set_existing_raw_value.rs +++ b/gix-config/tests/config/file/access/raw/set_existing_raw_value.rs @@ -26,7 +26,11 @@ fn single_line() { fn global_property_uses_empty_section_name() -> crate::Result { let mut file = file("a=b\n[core]\na=c"); let err = file.set_existing_raw_value_by("", None, "a", "d").unwrap_err(); - assert_eq!(err.to_string(), "The requested section does not exist", "cannot set global values"); + assert_eq!( + err.to_string(), + "The requested section does not exist", + "cannot set global values" + ); Ok(()) } diff --git a/gix-object/Cargo.toml b/gix-object/Cargo.toml index 018e392f14..8840c02fb4 100644 --- a/gix-object/Cargo.toml +++ b/gix-object/Cargo.toml @@ -45,7 +45,6 @@ gix-hashtable = { version = "^0.14.0", path = "../gix-hashtable" } gix-validate = { version = "^0.11.1", path = "../gix-validate" } gix-actor = { version = "^0.40.1", path = "../gix-actor" } gix-date = { version = "^0.15.2", path = "../gix-date" } -gix-error = { version = "^0.2.2", path = "../gix-error" } gix-utils = { version = "^0.3.2", path = "../gix-utils" } itoa = "1.0.17" diff --git a/gix-object/benches/decode_objects.rs b/gix-object/benches/decode_objects.rs index 6a340b0093..8bd51bcd90 100644 --- a/gix-object/benches/decode_objects.rs +++ b/gix-object/benches/decode_objects.rs @@ -3,19 +3,29 @@ use std::hint::black_box; fn parse_commit(c: &mut Criterion) { c.bench_function("CommitRef(sig)", |b| { - b.iter(|| black_box(gix_object::CommitRef::from_bytes(COMMIT_WITH_MULTI_LINE_HEADERS)).unwrap()); + b.iter(|| { + black_box(gix_object::CommitRef::from_bytes( + COMMIT_WITH_MULTI_LINE_HEADERS, + gix_hash::Kind::Sha1, + )) + .unwrap() + }); }); c.bench_function("CommitRefIter(sig)", |b| { - b.iter(|| black_box(gix_object::CommitRefIter::from_bytes(COMMIT_WITH_MULTI_LINE_HEADERS).count())); + b.iter(|| { + black_box( + gix_object::CommitRefIter::from_bytes(COMMIT_WITH_MULTI_LINE_HEADERS, gix_hash::Kind::Sha1).count(), + ) + }); }); } fn parse_tag(c: &mut Criterion) { c.bench_function("TagRef(sig)", |b| { - b.iter(|| black_box(gix_object::TagRef::from_bytes(TAG_WITH_SIGNATURE)).unwrap()); + b.iter(|| black_box(gix_object::TagRef::from_bytes(TAG_WITH_SIGNATURE, gix_hash::Kind::Sha1)).unwrap()); }); c.bench_function("TagRefIter(sig)", |b| { - b.iter(|| black_box(gix_object::TagRefIter::from_bytes(TAG_WITH_SIGNATURE).count())); + b.iter(|| black_box(gix_object::TagRefIter::from_bytes(TAG_WITH_SIGNATURE, gix_hash::Kind::Sha1).count())); }); } diff --git a/gix-object/fuzz/fuzz_targets/fuzz_commit.rs b/gix-object/fuzz/fuzz_targets/fuzz_commit.rs index b8f3fe1ff3..821e93c874 100644 --- a/gix-object/fuzz/fuzz_targets/fuzz_commit.rs +++ b/gix-object/fuzz/fuzz_targets/fuzz_commit.rs @@ -3,6 +3,8 @@ use libfuzzer_sys::fuzz_target; use std::hint::black_box; fuzz_target!(|commit: &[u8]| { - _ = black_box(gix_object::CommitRef::from_bytes(commit)); - _ = black_box(gix_object::CommitRefIter::from_bytes(commit)).count(); + _ = black_box(gix_object::CommitRef::from_bytes(commit, gix_hash::Kind::Sha1)); + _ = black_box(gix_object::CommitRefIter::from_bytes(commit, gix_hash::Kind::Sha1)).count(); + _ = black_box(gix_object::CommitRef::from_bytes(commit, gix_hash::Kind::Sha256)); + _ = black_box(gix_object::CommitRefIter::from_bytes(commit, gix_hash::Kind::Sha256)).count(); }); diff --git a/gix-object/fuzz/fuzz_targets/fuzz_tag.rs b/gix-object/fuzz/fuzz_targets/fuzz_tag.rs index 34135d4097..c3bb319218 100644 --- a/gix-object/fuzz/fuzz_targets/fuzz_tag.rs +++ b/gix-object/fuzz/fuzz_targets/fuzz_tag.rs @@ -4,6 +4,8 @@ use libfuzzer_sys::fuzz_target; use std::hint::black_box; fuzz_target!(|tag: &[u8]| { - _ = black_box(gix_object::TagRef::from_bytes(tag)); - _ = black_box(gix_object::TagRefIter::from_bytes(tag).count()); + _ = black_box(gix_object::TagRef::from_bytes(tag, gix_hash::Kind::Sha1)); + _ = black_box(gix_object::TagRefIter::from_bytes(tag, gix_hash::Kind::Sha1).count()); + _ = black_box(gix_object::TagRef::from_bytes(tag, gix_hash::Kind::Sha256)); + _ = black_box(gix_object::TagRefIter::from_bytes(tag, gix_hash::Kind::Sha256).count()); }); diff --git a/gix-object/src/commit/decode.rs b/gix-object/src/commit/decode.rs index 5882b4e5c8..dfd4798d9d 100644 --- a/gix-object/src/commit/decode.rs +++ b/gix-object/src/commit/decode.rs @@ -37,13 +37,13 @@ pub fn message<'a>(i: &mut &'a [u8]) -> ParseResult<&'a BStr> { /// This parser is not transactional as a whole: if a later required field or /// the final message parse fails, `i` may already have been advanced past /// earlier successfully parsed fields. -pub fn commit<'a>(i: &mut &'a [u8]) -> ParseResult> { - let tree = parse::header_field(i, b"tree", parse::hex_hash)?; +pub fn commit<'a>(i: &mut &'a [u8], hash_kind: gix_hash::Kind) -> ParseResult> { + let tree = parse::header_field(i, b"tree", |value| parse::hex_hash(value, hash_kind))?; let mut parents = SmallVec::new(); loop { let before = *i; - match parse::header_field(i, b"parent", parse::hex_hash) { + match parse::header_field(i, b"parent", |value| parse::hex_hash(value, hash_kind)) { Ok(parent) => parents.push(parent), Err(_) => { *i = before; diff --git a/gix-object/src/commit/mod.rs b/gix-object/src/commit/mod.rs index a33d70bf71..6db9331aca 100644 --- a/gix-object/src/commit/mod.rs +++ b/gix-object/src/commit/mod.rs @@ -62,10 +62,11 @@ mod write; /// Lifecycle impl<'a> CommitRef<'a> { - /// Deserialize a commit from the given `data` bytes while avoiding most allocations. - pub fn from_bytes(mut data: &'a [u8]) -> Result, crate::decode::Error> { + /// Deserialize a commit from the given `data` bytes while avoiding most allocations, using `hash_kind` to know + /// what kind of hash to expect for validation. + pub fn from_bytes(mut data: &'a [u8], hash_kind: gix_hash::Kind) -> Result, crate::decode::Error> { let input = &mut data; - match decode::commit(input) { + match decode::commit(input, hash_kind) { Ok(tag) => Ok(tag), Err(err) => Err(err), } @@ -88,7 +89,10 @@ impl<'a> CommitRef<'a> { /// Returns a convenient iterator over all extra headers. pub fn extra_headers(&self) -> ExtraHeaders> { - ExtraHeaders::new(self.extra_headers.iter().map(|(k, v)| (*k, v.as_ref()))) + ExtraHeaders::new( + self.extra_headers.iter().map(|(k, v)| (*k, v.as_ref())), + self.tree().kind(), + ) } /// Return the author, with whitespace trimmed. @@ -132,13 +136,17 @@ impl CommitRef<'_> { impl Commit { /// Returns a convenient iterator over all extra headers. pub fn extra_headers(&self) -> ExtraHeaders> { - ExtraHeaders::new(self.extra_headers.iter().map(|(k, v)| (k.as_bstr(), v.as_bstr()))) + ExtraHeaders::new( + self.extra_headers.iter().map(|(k, v)| (k.as_bstr(), v.as_bstr())), + self.tree.kind(), + ) } } /// An iterator over extra headers in [owned][crate::Commit] and [borrowed][crate::CommitRef] commits. pub struct ExtraHeaders { inner: I, + hash_kind: gix_hash::Kind, } /// Instantiation and convenience. @@ -147,8 +155,8 @@ where I: Iterator, { /// Create a new instance from an iterator over tuples of (name, value) pairs. - pub fn new(iter: I) -> Self { - ExtraHeaders { inner: iter } + pub fn new(iter: I, hash_kind: gix_hash::Kind) -> Self { + ExtraHeaders { inner: iter, hash_kind } } /// Find the _value_ of the _first_ header with the given `name`. @@ -175,7 +183,8 @@ where /// A merge tag is a tag object embedded within the respective header field of a commit, making /// it a child object of sorts. pub fn mergetags(self) -> impl Iterator, crate::decode::Error>> { - self.find_all("mergetag").map(|b| TagRef::from_bytes(b)) + let hash_kind = self.hash_kind; + self.find_all("mergetag").map(move |b| TagRef::from_bytes(b, hash_kind)) } /// Return the cryptographic signature provided by gpg/pgp verbatim. diff --git a/gix-object/src/commit/ref_iter.rs b/gix-object/src/commit/ref_iter.rs index c400b495cb..3b49f3f1c6 100644 --- a/gix-object/src/commit/ref_iter.rs +++ b/gix-object/src/commit/ref_iter.rs @@ -30,11 +30,13 @@ pub(crate) enum State { /// Lifecycle impl<'a> CommitRefIter<'a> { - /// Create a commit iterator from data. - pub fn from_bytes(data: &'a [u8]) -> CommitRefIter<'a> { + /// Create a commit iterator from the given `data`, using `hash_kind` to know + /// what kind of hash to expect for validation. + pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> CommitRefIter<'a> { CommitRefIter { data, state: State::default(), + hash_kind, } } } @@ -42,17 +44,21 @@ impl<'a> CommitRefIter<'a> { /// Access impl<'a> CommitRefIter<'a> { /// Parse `data` as commit and return its PGP signature, along with *all non-signature* data as [`SignedData`], or `None` - /// if the commit isn't signed. + /// if the commit isn't signed. All hashes in `data` are parsed as `hash_kind`. /// /// This allows the caller to validate the signature by passing the signed data along with the signature back to the program /// that created it. - pub fn signature(data: &'a [u8]) -> Result, SignedData<'a>)>, crate::decode::Error> { + pub fn signature( + data: &'a [u8], + hash_kind: gix_hash::Kind, + ) -> Result, SignedData<'a>)>, crate::decode::Error> { let mut signature_and_range = None; let raw_tokens = CommitRefIterRaw { data, state: State::default(), offset: 0, + hash_kind, }; for token in raw_tokens { let token = token?; @@ -146,19 +152,27 @@ fn missing_field() -> crate::decode::Error { impl<'a> CommitRefIter<'a> { #[inline] - fn next_inner(mut i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { + fn next_inner( + mut i: &'a [u8], + state: &mut State, + hash_kind: gix_hash::Kind, + ) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { let input = &mut i; - match Self::next_inner_(input, state) { + match Self::next_inner_(input, state, hash_kind) { Ok(token) => Ok((*input, token)), Err(err) => Err(err), } } - fn next_inner_(input: &mut &'a [u8], state: &mut State) -> Result, crate::decode::Error> { + fn next_inner_( + input: &mut &'a [u8], + state: &mut State, + hash_kind: gix_hash::Kind, + ) -> Result, crate::decode::Error> { use State::*; Ok(match state { Tree => { - let tree = parse::header_field(input, b"tree", parse::hex_hash)?; + let tree = parse::header_field(input, b"tree", |value| parse::hex_hash(value, hash_kind))?; *state = State::Parents; Token::Tree { id: ObjectId::from_hex(tree).expect("parsing validation"), @@ -166,7 +180,7 @@ impl<'a> CommitRefIter<'a> { } Parents => { if input.starts_with(b"parent ") { - let parent = parse::header_field(input, b"parent", parse::hex_hash)?; + let parent = parse::header_field(input, b"parent", |value| parse::hex_hash(value, hash_kind))?; Token::Parent { id: ObjectId::from_hex(parent).expect("parsing validation"), } @@ -174,7 +188,7 @@ impl<'a> CommitRefIter<'a> { *state = State::Signature { of: SignatureKind::Author, }; - Self::next_inner_(input, state)? + Self::next_inner_(input, state, hash_kind)? } } Signature { ref mut of } => { @@ -201,13 +215,13 @@ impl<'a> CommitRefIter<'a> { let encoding = parse::header_field(input, b"encoding", Ok)?; Token::Encoding(encoding.as_bstr()) } else { - Self::next_inner_(input, state)? + Self::next_inner_(input, state, hash_kind)? } } ExtraHeaders => { if input.starts_with(b"\n") { *state = State::Message; - Self::next_inner_(input, state)? + Self::next_inner_(input, state, hash_kind)? } else { let before = *input; match parse::any_header_field_multi_line(input) @@ -240,7 +254,7 @@ impl<'a> Iterator for CommitRefIter<'a> { if self.data.is_empty() { return None; } - match Self::next_inner(self.data, &mut self.state) { + match Self::next_inner(self.data, &mut self.state, self.hash_kind) { Ok((data, token)) => { self.data = data; Some(Ok(token)) @@ -258,6 +272,7 @@ struct CommitRefIterRaw<'a> { data: &'a [u8], state: State, offset: usize, + hash_kind: gix_hash::Kind, } impl<'a> Iterator for CommitRefIterRaw<'a> { @@ -267,7 +282,7 @@ impl<'a> Iterator for CommitRefIterRaw<'a> { if self.data.is_empty() { return None; } - match CommitRefIter::next_inner(self.data, &mut self.state) { + match CommitRefIter::next_inner(self.data, &mut self.state, self.hash_kind) { Ok((remaining, token)) => { let consumed = self.data.len() - remaining.len(); let start = self.offset; diff --git a/gix-object/src/data.rs b/gix-object/src/data.rs index 1ced8a7f4d..1d16eb581a 100644 --- a/gix-object/src/data.rs +++ b/gix-object/src/data.rs @@ -16,8 +16,8 @@ impl<'a> Data<'a> { Ok(match self.kind { Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(self.data, self.hash_kind)?), Kind::Blob => ObjectRef::Blob(BlobRef { data: self.data }), - Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(self.data)?), - Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(self.data)?), + Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(self.data, self.hash_kind)?), + Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(self.data, self.hash_kind)?), }) } @@ -34,7 +34,7 @@ impl<'a> Data<'a> { /// `None` if this is not a commit object. pub fn try_into_commit_iter(self) -> Option> { match self.kind { - Kind::Commit => Some(CommitRefIter::from_bytes(self.data)), + Kind::Commit => Some(CommitRefIter::from_bytes(self.data, self.hash_kind)), _ => None, } } @@ -43,7 +43,7 @@ impl<'a> Data<'a> { /// `None` if this is not a tag object. pub fn try_into_tag_iter(self) -> Option> { match self.kind { - Kind::Tag => Some(TagRefIter::from_bytes(self.data)), + Kind::Tag => Some(TagRefIter::from_bytes(self.data, self.hash_kind)), _ => None, } } diff --git a/gix-object/src/lib.rs b/gix-object/src/lib.rs index 6d577ab95a..5e153f6753 100644 --- a/gix-object/src/lib.rs +++ b/gix-object/src/lib.rs @@ -109,7 +109,7 @@ pub struct Blob { #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct CommitRef<'a> { - /// HEX hash of tree object we point to. Usually 40 bytes long. + /// HEX hash of tree object we point to. /// /// Use [`tree()`](CommitRef::tree()) to obtain a decoded version of it. #[cfg_attr(feature = "serde", serde(borrow))] @@ -140,6 +140,7 @@ pub struct CommitRef<'a> { pub struct CommitRefIter<'a> { data: &'a [u8], state: commit::ref_iter::State, + hash_kind: gix_hash::Kind, } /// A mutable git commit, representing an annotated state of a working tree along with a reference to its historical commits. @@ -194,6 +195,7 @@ pub struct TagRef<'a> { pub struct TagRefIter<'a> { data: &'a [u8], state: tag::ref_iter::State, + hash_kind: gix_hash::Kind, } /// A mutable git tag. diff --git a/gix-object/src/object/mod.rs b/gix-object/src/object/mod.rs index 8275119574..2a15b08ef3 100644 --- a/gix-object/src/object/mod.rs +++ b/gix-object/src/object/mod.rs @@ -212,8 +212,8 @@ impl<'a> ObjectRef<'a> { Ok(match kind { Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(data, hash_kind)?), Kind::Blob => ObjectRef::Blob(BlobRef { data }), - Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(data)?), - Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(data)?), + Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(data, hash_kind)?), + Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(data, hash_kind)?), }) } diff --git a/gix-object/src/parse.rs b/gix-object/src/parse.rs index c519a679fe..3e426bd868 100644 --- a/gix-object/src/parse.rs +++ b/gix-object/src/parse.rs @@ -109,15 +109,13 @@ pub(crate) fn any_header_field<'a>(i: &mut &'a [u8]) -> ParseResult<(&'a [u8], & } } -/// Parse a complete hexadecimal object id. +/// Parse a complete hexadecimal object id of the given `hash_kind`. /// -/// Typical input is a 40-byte SHA-1 hex id or a 64-byte SHA-256 hex id. The -/// entire input slice must be ASCII hex and must match one of the supported -/// object hash lengths. -pub fn hex_hash(i: &[u8]) -> ParseResult<&BStr> { - let max = gix_hash::Kind::longest().len_in_hex(); - let len = i.iter().take(max).take_while(|b| b.is_ascii_hexdigit()).count(); - if len != i.len() || !gix_hash::Kind::all().iter().any(|hk| hk.len_in_hex() == len) { +/// Typical input is a 40-byte SHA-1 hex id or a 64-byte SHA-256 hex id, +/// depending on `hash_kind`. The entire input slice must be ASCII hex and +/// match the expected object hash length. +pub fn hex_hash(i: &[u8], hash_kind: gix_hash::Kind) -> ParseResult<&BStr> { + if i.len() != hash_kind.len_in_hex() || !i.iter().all(u8::is_ascii_hexdigit) { return Err(crate::decode::Error); } Ok(i.as_bstr()) diff --git a/gix-object/src/tag/decode.rs b/gix-object/src/tag/decode.rs index c3ce1515dc..2bf67424a2 100644 --- a/gix-object/src/tag/decode.rs +++ b/gix-object/src/tag/decode.rs @@ -14,8 +14,8 @@ use crate::{parse, parse::ParseResult, BStr, Kind, TagRef}; /// This parser is not transactional as a whole: if a later field fails, `i` may /// already have been advanced past earlier successfully parsed fields. Individual /// field parsers document their own cursor behaviour. -pub fn git_tag<'a>(i: &mut &'a [u8]) -> ParseResult> { - let target = target(i)?; +pub fn git_tag<'a>(i: &mut &'a [u8], hash_kind: gix_hash::Kind) -> ParseResult> { + let target = target(i, hash_kind)?; let kind = kind(i)?; let tag_version = name(i)?; let tagger = tagger_raw(i)?; @@ -38,17 +38,10 @@ pub fn git_tag<'a>(i: &mut &'a [u8]) -> ParseResult> { /// Parse the `object \n` header and return the object id as bytes. /// /// Typical input is `object 0123456789012345678901234567890123456789\n`. -/// Both SHA-1 and SHA-256 hex lengths are accepted, and uppercase ASCII hex is -/// valid. On success, `i` is advanced past the entire header line. -pub(crate) fn target<'a>(i: &mut &'a [u8]) -> ParseResult<&'a BStr> { - fn is_valid_hex_hash(value: &[u8]) -> bool { - matches!(value.len(), 40 | 64) && value.iter().all(u8::is_ascii_hexdigit) - } - parse::header_field(i, b"object", |value| { - is_valid_hex_hash(value) - .then(|| value.as_bstr()) - .ok_or(crate::decode::Error) - }) +/// The hash must match `hash_kind`. Uppercase ASCII hex is also valid. +/// On success, `i` is advanced past the entire header line. +pub(crate) fn target<'a>(i: &mut &'a [u8], hash_kind: gix_hash::Kind) -> ParseResult<&'a BStr> { + parse::header_field(i, b"object", |value| parse::hex_hash(value, hash_kind)) } /// Parse the `type \n` header and return the object kind. diff --git a/gix-object/src/tag/mod.rs b/gix-object/src/tag/mod.rs index 6e502ff532..62242210aa 100644 --- a/gix-object/src/tag/mod.rs +++ b/gix-object/src/tag/mod.rs @@ -11,9 +11,9 @@ pub mod ref_iter; impl<'a> TagRef<'a> { /// Deserialize a tag from `data`. - pub fn from_bytes(mut data: &'a [u8]) -> Result, crate::decode::Error> { + pub fn from_bytes(mut data: &'a [u8], hash_kind: gix_hash::Kind) -> Result, crate::decode::Error> { let input = &mut data; - match decode::git_tag(input) { + match decode::git_tag(input, hash_kind) { Ok(tag) => Ok(tag), Err(err) => Err(err), } diff --git a/gix-object/src/tag/ref_iter.rs b/gix-object/src/tag/ref_iter.rs index 6b7f001853..199f4cc822 100644 --- a/gix-object/src/tag/ref_iter.rs +++ b/gix-object/src/tag/ref_iter.rs @@ -14,11 +14,12 @@ pub(crate) enum State { } impl<'a> TagRefIter<'a> { - /// Create a tag iterator from data. - pub fn from_bytes(data: &'a [u8]) -> TagRefIter<'a> { + /// Create a tag iterator from `data`, parsing hashes as `hash_kind`. + pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> TagRefIter<'a> { TagRefIter { data, state: State::default(), + hash_kind, } } @@ -52,19 +53,27 @@ fn missing_field() -> crate::decode::Error { impl<'a> TagRefIter<'a> { #[inline] - fn next_inner(mut i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { + fn next_inner( + mut i: &'a [u8], + state: &mut State, + hash_kind: gix_hash::Kind, + ) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { let input = &mut i; - match Self::next_inner_(input, state) { + match Self::next_inner_(input, state, hash_kind) { Ok(token) => Ok((*input, token)), Err(err) => Err(err), } } - fn next_inner_(input: &mut &'a [u8], state: &mut State) -> Result, crate::decode::Error> { + fn next_inner_( + input: &mut &'a [u8], + state: &mut State, + hash_kind: gix_hash::Kind, + ) -> Result, crate::decode::Error> { use State::*; Ok(match state { Target => { - let target = decode::target(input)?; + let target = decode::target(input, hash_kind)?; *state = TargetKind; Token::Target { id: ObjectId::from_hex(target).expect("parsing validation"), @@ -104,7 +113,7 @@ impl<'a> Iterator for TagRefIter<'a> { if self.data.is_empty() { return None; } - match Self::next_inner(self.data, &mut self.state) { + match Self::next_inner(self.data, &mut self.state, self.hash_kind) { Ok((data, token)) => { self.data = data; Some(Ok(token)) diff --git a/gix-object/tests/object/commit/from_bytes.rs b/gix-object/tests/object/commit/from_bytes.rs index 29251c572f..8d6781f8c2 100644 --- a/gix-object/tests/object/commit/from_bytes.rs +++ b/gix-object/tests/object/commit/from_bytes.rs @@ -1,5 +1,7 @@ use gix_actor::SignatureRef; -use gix_object::{bstr::ByteSlice, commit::message::body::TrailerRef, CommitRef, WriteTo}; +use gix_object::{ + bstr::ByteSlice, commit::message::body::TrailerRef, commit::ref_iter::Token, CommitRef, CommitRefIter, WriteTo, +}; use smallvec::SmallVec; use crate::{ @@ -10,7 +12,7 @@ use crate::{ #[test] fn invalid_timestsamp() { assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "invalid-timestamp.txt")) + CommitRef::from_bytes(&fixture_name("commit", "invalid-timestamp.txt"), gix_hash::Kind::Sha1) .expect("auto-correct invalid timestamp by discarding it (time is still valid UTC)"), CommitRef { tree: b"7989dfb2ec2f41914611a22fb30bbc2b3849df9a".as_bstr(), @@ -25,6 +27,69 @@ fn invalid_timestsamp() { ); } +#[test] +fn sha256_with_all_fields_and_signature() -> crate::Result { + let input = b"tree 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef +parent 1111111111111111111111111111111111111111111111111111111111111111 +parent 2222222222222222222222222222222222222222222222222222222222222222 +author Ada Lovelace 1710000000 +0000 +committer Grace Hopper 1710003600 -0230 +encoding ISO-8859-1 +gpgsig -----BEGIN SSH SIGNATURE----- + U1NIU0lHAAAAAQAAADMAAAALc3NoLWVkMjU1MTkAAAAgZXhhbXBsZS1zaGEyNTY= + -----END SSH SIGNATURE----- +mergetag object 3333333333333333333333333333333333333333333333333333333333333333 + type commit + tag nested-sha256 + tagger Release Bot 1710007200 +0530 +\x20 +nested release notes + -----BEGIN PGP SIGNATURE----- + nested-signature + -----END PGP SIGNATURE----- + +sha256 subject + +sha256 body +"; + let commit = CommitRef::from_bytes(input, gix_hash::Kind::Sha256)?; + assert_eq!( + commit.tree, + b"0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef".as_bstr() + ); + assert_eq!(commit.parents.len(), 2); + assert_eq!(commit.encoding, Some(b"ISO-8859-1".as_bstr())); + assert_eq!(commit.author()?.name, b"Ada Lovelace".as_bstr()); + assert_eq!(commit.committer()?.email, b"grace@example.com".as_bstr()); + assert_eq!( + commit.extra_headers().pgp_signature(), + Some( + b"-----BEGIN SSH SIGNATURE----- +U1NIU0lHAAAAAQAAADMAAAALc3NoLWVkMjU1MTkAAAAgZXhhbXBsZS1zaGEyNTY= +-----END SSH SIGNATURE----- +" + .as_bstr() + ) + ); + assert_eq!(commit.extra_headers().mergetags().count(), 1); + assert_eq!(commit.message, b"sha256 subject\n\nsha256 body\n".as_bstr()); + + let tokens = CommitRefIter::from_bytes(input, gix_hash::Kind::Sha256).collect::, _>>()?; + assert!(matches!(tokens[0], Token::Tree { ref id } if id.kind() == gix_hash::Kind::Sha256)); + assert_eq!( + tokens + .iter() + .filter(|token| matches!(token, Token::Parent { .. })) + .count(), + 2 + ); + assert_eq!( + tokens.last(), + Some(&Token::Message(b"sha256 subject\n\nsha256 body\n".as_bstr())) + ); + Ok(()) +} + #[test] fn uppercase_tree_id() -> crate::Result { let input = b"tree 7989DFB2EC2F41914611A22FB30BBC2B3849DF9A @@ -32,7 +97,7 @@ author Name 1312735823 +0518 committer Name 1312735823 +0518 message"; - let commit = CommitRef::from_bytes(input)?; + let commit = CommitRef::from_bytes(input, gix_hash::Kind::Sha1)?; assert_eq!(commit.tree, b"7989DFB2EC2F41914611A22FB30BBC2B3849DF9A".as_bstr()); assert_eq!(commit.tree(), hex_to_id("7989dfb2ec2f41914611a22fb30bbc2b3849df9a")); Ok(()) @@ -48,7 +113,7 @@ fn invalid_email_of_committer() -> crate::Result { let mut buf = vec![]; let backing = fixture_name("commit", "invalid-actor.txt"); - let commit = CommitRef::from_bytes(&backing).expect("ignore strangely formed actor format"); + let commit = CommitRef::from_bytes(&backing, gix_hash::Kind::Sha1).expect("ignore strangely formed actor format"); assert_eq!( commit, CommitRef { @@ -66,7 +131,7 @@ fn invalid_email_of_committer() -> crate::Result { commit.write_to(&mut buf).expect("we can write invalid actors back"); assert_eq!( - CommitRef::from_bytes(&buf).expect("this is the same commit and it can be parsed"), + CommitRef::from_bytes(&buf, gix_hash::Kind::Sha1).expect("this is the same commit and it can be parsed"), commit, "round-tripping works" ); @@ -77,7 +142,7 @@ fn invalid_email_of_committer() -> crate::Result { #[test] fn unsigned() -> crate::Result { assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "unsigned.txt"))?, + CommitRef::from_bytes(&fixture_name("commit", "unsigned.txt"), gix_hash::Kind::Sha1)?, CommitRef { tree: b"1b2dfb4ac5e42080b682fc676e9738c94ce6d54d".as_bstr(), parents: SmallVec::default(), @@ -94,7 +159,7 @@ fn unsigned() -> crate::Result { #[test] fn whitespace() -> crate::Result { assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "whitespace.txt"))?, + CommitRef::from_bytes(&fixture_name("commit", "whitespace.txt"), gix_hash::Kind::Sha1)?, CommitRef { tree: b"9bed6275068a0575243ba8409253e61af81ab2ff".as_bstr(), parents: SmallVec::from(vec![b"26b4df046d1776c123ac69d918f5aec247b58cc6".as_bstr()]), @@ -111,7 +176,7 @@ fn whitespace() -> crate::Result { #[test] fn signed_singleline() -> crate::Result { assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "signed-singleline.txt"))?, + CommitRef::from_bytes(&fixture_name("commit", "signed-singleline.txt"), gix_hash::Kind::Sha1)?, CommitRef { tree: b"00fc39317701176e326974ce44f5bd545a32ec0b".as_bstr(), parents: SmallVec::from(vec![b"09d8d3a12e161a7f6afb522dbe8900a9c09bce06".as_bstr()]), @@ -143,7 +208,7 @@ fn mergetag() -> crate::Result { std::borrow::Cow::Owned(MERGE_TAG.as_bytes().into()), )], }; - let commit = CommitRef::from_bytes(&fixture)?; + let commit = CommitRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?; assert_eq!(commit, expected); assert_eq!(commit.extra_headers().find_all("mergetag").count(), 1); assert_eq!(commit.extra_headers().mergetags().count(), 1); @@ -155,7 +220,7 @@ fn mergetag() -> crate::Result { #[test] fn signed() -> crate::Result { assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "signed.txt"))?, + CommitRef::from_bytes(&fixture_name("commit", "signed.txt"), gix_hash::Kind::Sha1)?, CommitRef { tree: b"00fc39317701176e326974ce44f5bd545a32ec0b".as_bstr(), parents: SmallVec::from(vec![b"09d8d3a12e161a7f6afb522dbe8900a9c09bce06".as_bstr()]), @@ -172,7 +237,10 @@ fn signed() -> crate::Result { #[test] fn signed_with_encoding() -> crate::Result { assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "signed-with-encoding.txt"))?, + CommitRef::from_bytes( + &fixture_name("commit", "signed-with-encoding.txt"), + gix_hash::Kind::Sha1 + )?, CommitRef { tree: b"1973afa74d87b2bb73fa884aaaa8752aec43ea88".as_bstr(), parents: SmallVec::from(vec![b"79c51cc86923e2b8ca0ee5c4eb75e48027133f9a".as_bstr()]), @@ -189,7 +257,7 @@ fn signed_with_encoding() -> crate::Result { #[test] fn with_encoding() -> crate::Result { assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "with-encoding.txt"))?, + CommitRef::from_bytes(&fixture_name("commit", "with-encoding.txt"), gix_hash::Kind::Sha1)?, CommitRef { tree: b"4a1c03029e7407c0afe9fc0320b3258e188b115e".as_bstr(), parents: SmallVec::from(vec![b"7ca98aad461a5c302cb4c9e3acaaa6053cc67a62".as_bstr()]), @@ -206,7 +274,7 @@ fn with_encoding() -> crate::Result { #[test] fn pre_epoch() -> crate::Result { assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "pre-epoch.txt"))?, + CommitRef::from_bytes(&fixture_name("commit", "pre-epoch.txt"), gix_hash::Kind::Sha1)?, CommitRef { tree: b"71cdd4015386b764b178005cad4c88966bc9d61a".as_bstr(), parents: SmallVec::default(), @@ -223,7 +291,10 @@ fn pre_epoch() -> crate::Result { #[test] fn double_dash_special_time_offset() -> crate::Result { assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "double-dash-date-offset.txt"))?, + CommitRef::from_bytes( + &fixture_name("commit", "double-dash-date-offset.txt"), + gix_hash::Kind::Sha1 + )?, CommitRef { tree: b"0a851d7a2a66084ab10516c406a405d147e974ad".as_bstr(), parents: SmallVec::from(vec![b"31350f4f0f459485eff2131517e3450cf251f6fa".as_bstr()]), @@ -245,7 +316,7 @@ fn with_trailer() -> crate::Result { time: "1631514803 +0200", }; let backing = fixture_name("commit", "message-with-footer.txt"); - let commit = CommitRef::from_bytes(&backing)?; + let commit = CommitRef::from_bytes(&backing, gix_hash::Kind::Sha1)?; assert_eq!( commit, CommitRef { @@ -326,7 +397,7 @@ instead of depending directly on the lower-level crates. #[test] fn merge() -> crate::Result { assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "merge.txt"))?, + CommitRef::from_bytes(&fixture_name("commit", "merge.txt"), gix_hash::Kind::Sha1)?, CommitRef { tree: b"0cf16ce8e229b59a761198975f0c0263229faf82".as_bstr(), parents: SmallVec::from(vec![ @@ -346,7 +417,7 @@ fn merge() -> crate::Result { #[test] fn newline_right_after_signature_multiline_header() -> crate::Result { let fixture = fixture_name("commit", "signed-whitespace.txt"); - let commit = CommitRef::from_bytes(&fixture)?; + let commit = CommitRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?; let pgp_sig = crate::commit::OTHER_SIGNATURE.as_bstr(); assert_eq!(commit.extra_headers[0].1.as_ref(), pgp_sig); assert_eq!(commit.extra_headers().pgp_signature(), Some(pgp_sig)); @@ -363,7 +434,7 @@ fn newline_right_after_signature_multiline_header() -> crate::Result { #[test] fn bogus_multi_gpgsig_header() -> crate::Result { let fixture = fixture_name("commit", "bogus-gpgsig-lines-in-git.git.txt"); - let commit = CommitRef::from_bytes(&fixture)?; + let commit = CommitRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?; let pgp_sig = b"-----BEGIN PGP SIGNATURE-----".as_bstr(); assert_eq!(commit.extra_headers().pgp_signature(), Some(pgp_sig)); assert_eq!( diff --git a/gix-object/tests/object/commit/iter.rs b/gix-object/tests/object/commit/iter.rs index 6991ba1447..7ce34bc13a 100644 --- a/gix-object/tests/object/commit/iter.rs +++ b/gix-object/tests/object/commit/iter.rs @@ -8,7 +8,7 @@ use crate::{ #[test] fn newline_right_after_signature_multiline_header() -> crate::Result { let data = fixture_name("commit", "signed-whitespace.txt"); - let tokens = CommitRefIter::from_bytes(&data).collect::, _>>()?; + let tokens = CommitRefIter::from_bytes(&data, gix_hash::Kind::Sha1).collect::, _>>()?; assert_eq!(tokens.len(), 7, "mainly a parsing exercise"); match tokens.last().expect("there are tokens") { Token::Message(msg) => { @@ -22,7 +22,7 @@ fn newline_right_after_signature_multiline_header() -> crate::Result { #[test] fn signed_with_encoding() -> crate::Result { let input = fixture_name("commit", "signed-with-encoding.txt"); - let iter = CommitRefIter::from_bytes(&input); + let iter = CommitRefIter::from_bytes(&input, gix_hash::Kind::Sha1); assert_eq!( iter.collect::, _>>()?, vec![ @@ -52,7 +52,8 @@ fn signed_with_encoding() -> crate::Result { #[test] fn whitespace() -> crate::Result { assert_eq!( - CommitRefIter::from_bytes(&fixture_name("commit", "whitespace.txt")).collect::, _>>()?, + CommitRefIter::from_bytes(&fixture_name("commit", "whitespace.txt"), gix_hash::Kind::Sha1) + .collect::, _>>()?, vec![ Token::Tree { id: hex_to_id("9bed6275068a0575243ba8409253e61af81ab2ff") @@ -75,7 +76,8 @@ fn whitespace() -> crate::Result { #[test] fn unsigned() -> crate::Result { assert_eq!( - CommitRefIter::from_bytes(&fixture_name("commit", "unsigned.txt")).collect::, _>>()?, + CommitRefIter::from_bytes(&fixture_name("commit", "unsigned.txt"), gix_hash::Kind::Sha1) + .collect::, _>>()?, vec![ Token::Tree { id: hex_to_id("1b2dfb4ac5e42080b682fc676e9738c94ce6d54d") @@ -95,7 +97,8 @@ fn unsigned() -> crate::Result { #[test] fn signed_singleline() -> crate::Result { assert_eq!( - CommitRefIter::from_bytes(&fixture_name("commit", "signed-singleline.txt")).collect::, _>>()?, + CommitRefIter::from_bytes(&fixture_name("commit", "signed-singleline.txt"), gix_hash::Kind::Sha1) + .collect::, _>>()?, vec![ Token::Tree { id: hex_to_id("00fc39317701176e326974ce44f5bd545a32ec0b") @@ -114,7 +117,7 @@ fn signed_singleline() -> crate::Result { ] ); assert_eq!( - CommitRefIter::from_bytes(&fixture_name("commit", "signed-singleline.txt")) + CommitRefIter::from_bytes(&fixture_name("commit", "signed-singleline.txt"), gix_hash::Kind::Sha1) .parent_ids() .collect::>(), vec![hex_to_id("09d8d3a12e161a7f6afb522dbe8900a9c09bce06")] @@ -125,7 +128,7 @@ fn signed_singleline() -> crate::Result { #[test] fn error_handling() -> crate::Result { let data = fixture_name("commit", "unsigned.txt"); - let iter = CommitRefIter::from_bytes(&data[..data.len() / 2]); + let iter = CommitRefIter::from_bytes(&data[..data.len() / 2], gix_hash::Kind::Sha1); let tokens = iter.collect::>(); assert!( tokens.last().expect("at least the errored token").is_err(), @@ -137,7 +140,7 @@ fn error_handling() -> crate::Result { #[test] fn mergetag() -> crate::Result { let input = fixture_name("commit", "mergetag.txt"); - let iter = CommitRefIter::from_bytes(&input); + let iter = CommitRefIter::from_bytes(&input, gix_hash::Kind::Sha1); assert_eq!( iter.collect::, _>>()?, vec![ @@ -179,7 +182,7 @@ mod method { #[test] fn tree_id() -> crate::Result { let input = fixture_name("commit", "unsigned.txt"); - let iter = CommitRefIter::from_bytes(&input); + let iter = CommitRefIter::from_bytes(&input, gix_hash::Kind::Sha1); assert_eq!( iter.clone().tree_id().ok(), Some(hex_to_id("1b2dfb4ac5e42080b682fc676e9738c94ce6d54d")) @@ -195,7 +198,7 @@ mod method { #[test] fn signatures() -> crate::Result { let input = fixture_name("commit", "unsigned.txt"); - let iter = CommitRefIter::from_bytes(&input); + let iter = CommitRefIter::from_bytes(&input, gix_hash::Kind::Sha1); assert_eq!( iter.signatures().collect::>(), vec![signature("1592437401 +0800"), signature("1592437401 +0800")] @@ -227,7 +230,8 @@ mod method { let expected_signature = expected_signature.into(); let fixture_data = fixture_name("commit", fixture); - let (actual_signature, actual_signed_data) = CommitRefIter::signature(&fixture_data)?.expect("sig present"); + let (actual_signature, actual_signed_data) = + CommitRefIter::signature(&fixture_data, gix_hash::Kind::Sha1)?.expect("sig present"); let expected_signed_data: BString = fixture_data .lines_with_terminator() .enumerate() diff --git a/gix-object/tests/object/commit/mod.rs b/gix-object/tests/object/commit/mod.rs index 4d0914ad2f..382cbf6fc4 100644 --- a/gix-object/tests/object/commit/mod.rs +++ b/gix-object/tests/object/commit/mod.rs @@ -158,7 +158,7 @@ mod method { #[test] fn tree() -> crate::Result { let fixture = fixture_name("commit", "unsigned.txt"); - let commit = CommitRef::from_bytes(&fixture)?; + let commit = CommitRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?; assert_eq!(commit.tree(), hex_to_id("1b2dfb4ac5e42080b682fc676e9738c94ce6d54d")); assert_eq!(commit.tree, "1b2dfb4ac5e42080b682fc676e9738c94ce6d54d"); Ok(()) @@ -167,7 +167,7 @@ mod method { #[test] fn author_and_committer_trims_signature() -> crate::Result { let backing = fixture_name("commit", "email-with-space.txt"); - let commit = CommitRef::from_bytes(&backing)?; + let commit = CommitRef::from_bytes(&backing, gix_hash::Kind::Sha1)?; std::assert_eq!(commit.author()?, signature("1592437401 +0800")); std::assert_eq!(commit.committer()?, signature("1592437401 +0800")); Ok(()) @@ -178,9 +178,9 @@ mod method { fn invalid() { let fixture = fixture_name("commit", "unsigned.txt"); let partial_commit = &fixture[..fixture.len() / 2]; - assert!(CommitRef::from_bytes(partial_commit).is_err()); + assert!(CommitRef::from_bytes(partial_commit, gix_hash::Kind::Sha1).is_err()); assert_eq!( - CommitRefIter::from_bytes(partial_commit) + CommitRefIter::from_bytes(partial_commit, gix_hash::Kind::Sha1) .take_while(Result::is_ok) .count(), 1, @@ -192,8 +192,8 @@ fn invalid() { fn invalid_object_id_length() { let input = b"tree 00000066666666666684666666666666666299297\npare6"; - assert!(CommitRef::from_bytes(input).is_err()); - assert!(CommitRefIter::from_bytes(input) + assert!(CommitRef::from_bytes(input, gix_hash::Kind::Sha1).is_err()); + assert!(CommitRefIter::from_bytes(input, gix_hash::Kind::Sha1) .next() .expect("a decoding error is returned for the first token") .is_err()); @@ -203,8 +203,8 @@ fn invalid_object_id_length() { fn fuzz_artifact_inputs_can_be_parsed_without_panicking() { for path in crate::fuzz_artifact_paths("fuzz_commit") { let input = std::fs::read(path).expect("artifact is readable"); - _ = CommitRef::from_bytes(&input); - _ = CommitRefIter::from_bytes(&input).count(); + _ = CommitRef::from_bytes(&input, gix_hash::Kind::Sha1); + _ = CommitRefIter::from_bytes(&input, gix_hash::Kind::Sha1).count(); } } diff --git a/gix-object/tests/object/encode.rs b/gix-object/tests/object/encode.rs index 1fe1eeecd2..ee9b15c54f 100644 --- a/gix-object/tests/object/encode.rs +++ b/gix-object/tests/object/encode.rs @@ -6,71 +6,6 @@ enum Error { TryFromError, } -macro_rules! round_trip { - ($owned:ty, $borrowed:ty, $( $files:literal ), +) => { - #[test] - fn round_trip() -> Result<(), Box> { - use std::convert::TryFrom; - use std::io::Write; - use crate::fixture_bytes; - use gix_object::{ObjectRef, Object, WriteTo}; - use bstr::ByteSlice; - - for input_name in &[ - $( $files ),* - ] { - let input = fixture_bytes(input_name); - // Test the parse->borrowed->owned->write chain for an object kind - let mut output = Vec::new(); - let item = <$borrowed>::from_bytes(&input)?; - item.write_to(&mut output)?; - assert_eq!(output.as_bstr(), input.as_bstr(), "borrowed: {input_name}"); - - let item: $owned = item.try_into()?; - output.clear(); - item.write_to(&mut output)?; - assert_eq!(output.as_bstr(), input.as_bstr()); - - // Test the parse->borrowed->owned->write chain for the top-level objects - let item = ObjectRef::from(<$borrowed>::from_bytes(&input)?); - output.clear(); - item.write_to(&mut output)?; - assert_eq!(output.as_bstr(), input.as_bstr(), "object-ref"); - - let item: Object = Object::try_from(item)?; - output.clear(); - item.write_to(&mut output)?; - assert_eq!(output.as_bstr(), input.as_bstr(), "owned"); - - // Test the loose serialisation -> parse chain for an object kind - let item = <$borrowed>::from_bytes(&input)?; - // serialise a borowed item to a tagged loose object - output.clear(); - { - let w = &mut output; - w.write_all(&item.loose_header())?; - item.write_to(w)?; - let parsed = ObjectRef::from_loose(&output, gix_testtools::hash_kind_from_env().unwrap_or_default())?; - let item2 = <$borrowed>::try_from(parsed).or(Err(super::Error::TryFromError))?; - assert_eq!(item2, item, "object-ref loose: {input_name} {:?}\n{:?}", output.as_bstr(), input.as_bstr()); - } - - let item: $owned = item.try_into()?; - // serialise an owned to a tagged loose object - output.clear(); - let w = &mut output; - w.write_all(&item.loose_header())?; - item.write_to(w)?; - let parsed = ObjectRef::from_loose(&output, gix_testtools::hash_kind_from_env().unwrap_or_default())?; - let parsed_borrowed = <$borrowed>::try_from(parsed).or(Err(super::Error::TryFromError))?; - let item2: $owned = parsed_borrowed.try_into().or(Err(super::Error::TryFromError))?; - assert_eq!(item2, item, "object-ref loose owned: {input_name} {:?}\n{:?}", output.as_bstr(), input.as_bstr()); - } - Ok(()) - } - }; -} - /// Needed for roundtripping object types that take a `hash_kind` parameter. /// This is the same as `round_trip`, but for types that have `from_bytes()` with `hash_kind`. macro_rules! round_trip_with_hash_kind { @@ -79,7 +14,7 @@ macro_rules! round_trip_with_hash_kind { fn round_trip() -> Result<(), Box> { use std::convert::TryFrom; use std::io::Write; - use crate::fixture_bytes; + use crate::object_fixture; use gix_object::{ObjectRef, Object, WriteTo}; use bstr::ByteSlice; let hash_kind = crate::fixture_hash_kind(); @@ -87,11 +22,7 @@ macro_rules! round_trip_with_hash_kind { for input_name in &[ $( $files ),* ] { - let input = if let Some(path) = input_name.strip_prefix("tree/") { - crate::tree_fixture(path)? - } else { - fixture_bytes(input_name) - }; + let input = object_fixture(input_name)?; // Test the parse->borrowed->owned->write chain for an object kind let mut output = Vec::new(); let item = <$borrowed>::from_bytes(&input, hash_kind)?; @@ -144,7 +75,7 @@ macro_rules! round_trip_with_hash_kind { } mod tag { - round_trip!( + round_trip_with_hash_kind!( gix_object::Tag, gix_object::TagRef, "tag/empty_missing_nl.txt", @@ -157,7 +88,7 @@ mod tag { } mod commit { - round_trip!( + round_trip_with_hash_kind!( gix_object::Commit, gix_object::CommitRef, "commit/email-with-space.txt", @@ -216,8 +147,74 @@ mod tree { } mod blob { - // It doesn't matter which data we use - it's not interpreted. - round_trip!(gix_object::Blob, gix_object::BlobRef, "tree/everything.tree"); + use std::{convert::TryFrom, io::Write}; + + use bstr::ByteSlice; + use gix_object::{Blob, BlobRef, Object, ObjectRef, WriteTo}; + + use crate::fixture_bytes; + + #[test] + fn round_trip() -> Result<(), Box> { + let input_name = "tree/everything.tree"; + let input = fixture_bytes(input_name); + // It doesn't matter which data we use - it's not interpreted. + + let mut output = Vec::new(); + let item = BlobRef::from_bytes(&input)?; + item.write_to(&mut output)?; + assert_eq!(output.as_bstr(), input.as_bstr(), "borrowed: {input_name}"); + + let item: Blob = item.into(); + output.clear(); + item.write_to(&mut output)?; + assert_eq!(output.as_bstr(), input.as_bstr()); + + let item = ObjectRef::from(BlobRef::from_bytes(&input)?); + output.clear(); + item.write_to(&mut output)?; + assert_eq!(output.as_bstr(), input.as_bstr(), "object-ref"); + + let item: Object = Object::try_from(item)?; + output.clear(); + item.write_to(&mut output)?; + assert_eq!(output.as_bstr(), input.as_bstr(), "owned"); + + let item = BlobRef::from_bytes(&input)?; + output.clear(); + { + let w = &mut output; + w.write_all(&item.loose_header())?; + item.write_to(w)?; + let parsed = ObjectRef::from_loose(&output, gix_testtools::hash_kind_from_env().unwrap_or_default())?; + let item2 = BlobRef::try_from(parsed).or(Err(super::Error::TryFromError))?; + assert_eq!( + item2, + item, + "object-ref loose: {input_name} {:?}\n{:?}", + output.as_bstr(), + input.as_bstr() + ); + } + + let item: Blob = item.into(); + output.clear(); + let w = &mut output; + w.write_all(&item.loose_header())?; + item.write_to(w)?; + let parsed = ObjectRef::from_loose(&output, gix_testtools::hash_kind_from_env().unwrap_or_default())?; + let parsed_borrowed = BlobRef::try_from(parsed).or(Err(super::Error::TryFromError))?; + let item2: Blob = parsed_borrowed.into(); + assert_eq!( + item2, + item, + "object-ref loose owned: {input_name} {:?}\n{:?}", + output.as_bstr(), + input.as_bstr() + ); + + Ok(()) + } } mod loose_header { diff --git a/gix-object/tests/object/main.rs b/gix-object/tests/object/main.rs index fdcc90fd71..113569e942 100644 --- a/gix-object/tests/object/main.rs +++ b/gix-object/tests/object/main.rs @@ -99,12 +99,16 @@ fn fixture_name(kind: &str, path: &str) -> Vec { /// Return the object id expected in fixture assertions for the active fixture hash kind. /// -/// Tree fixtures in this test module are authored as SHA-1 data and are rewritten on demand for -/// SHA-256 runs. This helper mirrors that rewrite on the expectation side so tree parsing tests can -/// use one set of source ids for both hash kinds. +/// Object fixtures in this test module are authored as SHA-1 data and are rewritten on demand for +/// SHA-256 runs. This helper mirrors that rewrite on the expectation side so parsing tests can use +/// one set of source ids for both hash kinds. pub fn fixture_oid(hex: &str) -> ObjectId { let oid = hex_to_id(hex); - match fixture_hash_kind() { + translate_fixture_oid(oid, fixture_hash_kind()) +} + +fn translate_fixture_oid(oid: ObjectId, kind: gix_hash::Kind) -> ObjectId { + match kind { gix_hash::Kind::Sha1 => oid, kind => { let mut hasher = gix_hash::hasher(kind); @@ -114,6 +118,19 @@ pub fn fixture_oid(hex: &str) -> ObjectId { } } +/// Load an object fixture and, if needed, rewrite its SHA-1 object ids for the active fixture hash kind. +pub fn object_fixture(path: &str) -> Result> { + if let Some(path) = path.strip_prefix("tree/") { + tree_fixture(path) + } else if let Some(path) = path.strip_prefix("commit/") { + commit_fixture(path) + } else if let Some(path) = path.strip_prefix("tag/") { + tag_fixture(path) + } else { + Ok(fixture_bytes(path)) + } +} + /// Load a tree fixture and, if needed, rewrite its embedded entry ids for the active fixture hash kind. /// /// The on-disk `tree/*.tree` fixtures contain SHA-1-sized ids. For SHA-256 test runs we parse the @@ -126,9 +143,7 @@ pub fn tree_fixture(path: &str) -> Result> { kind => { let mut tree: gix_object::Tree = gix_object::TreeRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?.into(); for entry in &mut tree.entries { - let mut hasher = gix_hash::hasher(kind); - hasher.update(entry.oid.as_bytes()); - entry.oid = hasher.try_finalize()?; + entry.oid = translate_fixture_oid(entry.oid, kind); } let mut out = Vec::with_capacity( fixture.len() + tree.entries.len() * (kind.len_in_bytes() - gix_hash::Kind::Sha1.len_in_bytes()), @@ -139,6 +154,42 @@ pub fn tree_fixture(path: &str) -> Result> { } } +fn commit_fixture(path: &str) -> Result> { + let fixture = fixture_name("commit", path); + match fixture_hash_kind() { + gix_hash::Kind::Sha1 => Ok(fixture), + kind => { + let mut commit = gix_object::CommitRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?.into_owned()?; + commit.tree = translate_fixture_oid(commit.tree, kind); + for parent in &mut commit.parents { + *parent = translate_fixture_oid(*parent, kind); + } + + let mut out = Vec::with_capacity( + fixture.len() + + (1 + commit.parents.len()) * (kind.len_in_bytes() - gix_hash::Kind::Sha1.len_in_bytes()), + ); + commit.write_to(&mut out)?; + Ok(out) + } + } +} + +fn tag_fixture(path: &str) -> Result> { + let fixture = fixture_name("tag", path); + match fixture_hash_kind() { + gix_hash::Kind::Sha1 => Ok(fixture), + kind => { + let mut tag = gix_object::TagRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?.into_owned()?; + tag.target = translate_fixture_oid(tag.target, kind); + + let mut out = Vec::with_capacity(fixture.len() + kind.len_in_bytes() - gix_hash::Kind::Sha1.len_in_bytes()); + tag.write_to(&mut out)?; + Ok(out) + } + } +} + pub fn generated_tree_root_id() -> Result { let root = gix_testtools::scripted_fixture_read_only("make_trees.sh")?; let tree = std::fs::read(root.join("tree.baseline"))?; diff --git a/gix-object/tests/object/tag.rs b/gix-object/tests/object/tag.rs index cb9d5b0db4..a227476b00 100644 --- a/gix-object/tests/object/tag.rs +++ b/gix-object/tests/object/tag.rs @@ -45,6 +45,64 @@ body"; const PGP_SIGNATURE_AT_BODY_START_SIGNATURE: &[u8] = b"-----BEGIN PGP SIGNATURE----- body"; +#[test] +fn sha256_with_all_fields_and_signature() -> crate::Result { + let input = b"object abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789 +type commit +tag v2.0.0-sha256 +tagger Release Bot 1710007200 +0530 + +Release v2.0.0 + +- ship sha256 object support +- include annotated tag signatures +-----BEGIN PGP SIGNATURE----- +sha256-tag-signature +-----END PGP SIGNATURE----- +"; + let tag = TagRef::from_bytes(input, gix_hash::Kind::Sha256)?; + assert_eq!( + tag.target, + b"abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789".as_bstr() + ); + assert_eq!(tag.target().kind(), gix_hash::Kind::Sha256); + assert_eq!(tag.target_kind, Kind::Commit); + assert_eq!(tag.name, b"v2.0.0-sha256".as_bstr()); + assert_eq!(tag.tagger()?.expect("tagger").name, b"Release Bot".as_bstr()); + assert_eq!( + tag.message, + b"Release v2.0.0 + +- ship sha256 object support +- include annotated tag signatures" + .as_bstr() + ); + assert_eq!( + tag.pgp_signature, + Some( + b"-----BEGIN PGP SIGNATURE----- +sha256-tag-signature +-----END PGP SIGNATURE----- +" + .as_bstr() + ) + ); + + let tokens = TagRefIter::from_bytes(input, gix_hash::Kind::Sha256).collect::, _>>()?; + assert!(matches!( + tokens.first(), + Some(gix_object::tag::ref_iter::Token::Target { id }) if id.kind() == gix_hash::Kind::Sha256 + )); + assert!(matches!( + tokens.last(), + Some(gix_object::tag::ref_iter::Token::Body { + pgp_signature: Some(_), + .. + }) + )); + Ok(()) +} + mod method { use bstr::ByteSlice; use gix_object::TagRef; @@ -55,7 +113,7 @@ mod method { #[test] fn target() -> crate::Result { let fixture = fixture_name("tag", "signed.txt"); - let tag_ref = TagRef::from_bytes(&fixture)?; + let tag_ref = TagRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?; assert_eq!(tag_ref.target(), hex_to_id("ffa700b4aca13b80cb6b98a078e7c96804f8e0ec")); assert_eq!(tag_ref.target, "ffa700b4aca13b80cb6b98a078e7c96804f8e0ec".as_bytes()); @@ -80,7 +138,7 @@ mod method { #[test] fn tagger_trims_signature() -> crate::Result { let fixture = fixture_name("tag", "tagger-with-whitespace.txt"); - let tag = TagRef::from_bytes(&fixture)?; + let tag = TagRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?; std::assert_eq!(tag.tagger()?, Some(signature("1592381636 +0800"))); Ok(()) } @@ -94,7 +152,7 @@ mod iter { #[test] fn empty() -> crate::Result { let tag = fixture_name("tag", "empty.txt"); - let tag_iter = TagRefIter::from_bytes(&tag); + let tag_iter = TagRefIter::from_bytes(&tag, gix_hash::Kind::Sha1); let target_id = hex_to_id("01dd4e2a978a9f5bd773dae6da7aa4a5ac1cdbbc"); let tagger = Some(signature("1592381636 +0800")); assert_eq!( @@ -118,7 +176,8 @@ mod iter { #[test] fn no_tagger() -> crate::Result { assert_eq!( - TagRefIter::from_bytes(&fixture_name("tag", "no-tagger.txt")).collect::, _>>()?, + TagRefIter::from_bytes(&fixture_name("tag", "no-tagger.txt"), gix_hash::Kind::Sha1) + .collect::, _>>()?, vec![ Token::Target { id: hex_to_id("c39ae07f393806ccf406ef966e9a15afc43cc36a") @@ -154,7 +213,8 @@ KLMHist5yj0sw1E4hDTyQa0= #[test] fn whitespace() -> crate::Result { assert_eq!( - TagRefIter::from_bytes(&fixture_name("tag", "whitespace.txt")).collect::, _>>()?, + TagRefIter::from_bytes(&fixture_name("tag", "whitespace.txt"), gix_hash::Kind::Sha1) + .collect::, _>>()?, vec![ Token::Target { id: hex_to_id("01dd4e2a978a9f5bd773dae6da7aa4a5ac1cdbbc") @@ -174,7 +234,8 @@ KLMHist5yj0sw1E4hDTyQa0= #[test] fn pgp_begin_marker_not_at_line_start_is_message() -> crate::Result { assert_eq!( - TagRefIter::from_bytes(super::PGP_BEGIN_NOT_AT_LINE_START).collect::, _>>()?, + TagRefIter::from_bytes(super::PGP_BEGIN_NOT_AT_LINE_START, gix_hash::Kind::Sha1) + .collect::, _>>()?, vec![ Token::Target { id: hex_to_id("ffa700b4aca13b80cb6b98a078e7c96804f8e0ec") @@ -194,7 +255,7 @@ KLMHist5yj0sw1E4hDTyQa0= #[test] fn error_handling() -> crate::Result { let data = fixture_name("tag", "empty.txt"); - let iter = TagRefIter::from_bytes(&data[..data.len() / 3]); + let iter = TagRefIter::from_bytes(&data[..data.len() / 3], gix_hash::Kind::Sha1); let tokens = iter.collect::>(); assert!( tokens.last().expect("at least the errored token").is_err(), @@ -208,9 +269,11 @@ KLMHist5yj0sw1E4hDTyQa0= fn invalid() { let fixture = fixture_name("tag", "whitespace.txt"); let partial_tag = &fixture[..fixture.len() / 2]; - assert!(TagRef::from_bytes(partial_tag).is_err()); + assert!(TagRef::from_bytes(partial_tag, gix_hash::Kind::Sha1).is_err()); assert_eq!( - TagRefIter::from_bytes(partial_tag).take_while(Result::is_ok).count(), + TagRefIter::from_bytes(partial_tag, gix_hash::Kind::Sha1) + .take_while(Result::is_ok) + .count(), 3, "we can decode some fields before failing" ); @@ -223,7 +286,7 @@ type commit tag uppercase-target message"; - let tag = TagRef::from_bytes(input)?; + let tag = TagRef::from_bytes(input, gix_hash::Kind::Sha1)?; assert_eq!(tag.target, b"FFA700B4ACA13B80CB6B98A078E7C96804F8E0EC".as_bstr()); assert_eq!( tag.target(), @@ -236,8 +299,8 @@ message"; fn invalid_target_id_length() { let input = b"object 00000066666666666684666666666666666299297\ntype commit\ntag bad\n"; - assert!(TagRef::from_bytes(input).is_err()); - assert!(TagRefIter::from_bytes(input) + assert!(TagRef::from_bytes(input, gix_hash::Kind::Sha1).is_err()); + assert!(TagRefIter::from_bytes(input, gix_hash::Kind::Sha1) .next() .expect("a decoding error is returned for the first token") .is_err()); @@ -249,7 +312,7 @@ mod from_bytes { use crate::{fixture_name, tag::tag_fixture}; fn assert_roundtrip(input: &[u8]) -> crate::Result { - let tag = TagRef::from_bytes(input)?; + let tag = TagRef::from_bytes(input, gix_hash::Kind::Sha1)?; let mut out = Vec::new(); tag.write_to(&mut out)?; assert_eq!(out, input); @@ -258,14 +321,17 @@ mod from_bytes { #[test] fn signed() -> crate::Result { - assert_eq!(TagRef::from_bytes(&fixture_name("tag", "signed.txt"))?, tag_fixture()); + assert_eq!( + TagRef::from_bytes(&fixture_name("tag", "signed.txt"), gix_hash::Kind::Sha1)?, + tag_fixture() + ); Ok(()) } #[test] fn empty() -> crate::Result { let fixture = fixture_name("tag", "empty.txt"); - let tag_ref = TagRef::from_bytes(&fixture)?; + let tag_ref = TagRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?; assert_eq!( tag_ref, TagRef { @@ -284,7 +350,7 @@ mod from_bytes { #[test] fn empty_missing_nl() -> crate::Result { let fixture = fixture_name("tag", "empty_missing_nl.txt"); - let tag_ref = TagRef::from_bytes(&fixture)?; + let tag_ref = TagRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?; assert_eq!( tag_ref, TagRef { @@ -303,7 +369,7 @@ mod from_bytes { #[test] fn with_newlines() -> crate::Result { assert_eq!( - TagRef::from_bytes(&fixture_name("tag", "with-newlines.txt"))?, + TagRef::from_bytes(&fixture_name("tag", "with-newlines.txt"), gix_hash::Kind::Sha1)?, TagRef { target: b"ebdf205038b66108c0331aa590388431427493b7".as_bstr(), name: b"baz".as_bstr(), @@ -319,7 +385,7 @@ mod from_bytes { #[test] fn no_tagger() -> crate::Result { assert_eq!( - TagRef::from_bytes(&fixture_name("tag", "no-tagger.txt"))?, + TagRef::from_bytes(&fixture_name("tag", "no-tagger.txt"), gix_hash::Kind::Sha1)?, TagRef { target: b"c39ae07f393806ccf406ef966e9a15afc43cc36a".as_bstr(), name: b"v2.6.11-tree".as_bstr(), @@ -350,7 +416,7 @@ KLMHist5yj0sw1E4hDTyQa0= #[test] fn pgp_begin_marker_not_at_line_start_is_message() -> crate::Result { - let tag = TagRef::from_bytes(super::PGP_BEGIN_NOT_AT_LINE_START)?; + let tag = TagRef::from_bytes(super::PGP_BEGIN_NOT_AT_LINE_START, gix_hash::Kind::Sha1)?; assert_eq!(tag.message, super::PGP_BEGIN_NOT_AT_LINE_START_MESSAGE.as_bstr()); assert_eq!(tag.pgp_signature, None, "it doesn't parse this as PGP signature"); assert_roundtrip(super::PGP_BEGIN_NOT_AT_LINE_START)?; @@ -359,7 +425,7 @@ KLMHist5yj0sw1E4hDTyQa0= #[test] fn trailing_text_after_pgp_end_marker_is_signature() -> crate::Result { - let tag = TagRef::from_bytes(super::PGP_SIGNATURE_WITH_TRAILING_TEXT)?; + let tag = TagRef::from_bytes(super::PGP_SIGNATURE_WITH_TRAILING_TEXT, gix_hash::Kind::Sha1)?; assert_eq!(tag.message, b"message text".as_bstr()); assert_eq!( tag.pgp_signature, @@ -371,7 +437,7 @@ KLMHist5yj0sw1E4hDTyQa0= #[test] fn pgp_begin_marker_without_end_marker_starts_signature() -> crate::Result { - let tag = TagRef::from_bytes(super::PGP_SIGNATURE_WITHOUT_END_MARKER)?; + let tag = TagRef::from_bytes(super::PGP_SIGNATURE_WITHOUT_END_MARKER, gix_hash::Kind::Sha1)?; assert_eq!(tag.message, b"message text".as_bstr()); assert_eq!( tag.pgp_signature, @@ -383,7 +449,7 @@ KLMHist5yj0sw1E4hDTyQa0= #[test] fn pgp_begin_marker_at_body_start_is_signature() -> crate::Result { - let tag = TagRef::from_bytes(super::PGP_SIGNATURE_AT_BODY_START)?; + let tag = TagRef::from_bytes(super::PGP_SIGNATURE_AT_BODY_START, gix_hash::Kind::Sha1)?; assert_eq!(tag.message, b"".as_bstr()); assert_eq!( tag.pgp_signature, @@ -396,7 +462,7 @@ KLMHist5yj0sw1E4hDTyQa0= #[test] fn whitespace() -> crate::Result { assert_eq!( - TagRef::from_bytes(&fixture_name("tag", "whitespace.txt"))?, + TagRef::from_bytes(&fixture_name("tag", "whitespace.txt"), gix_hash::Kind::Sha1)?, TagRef { target: b"01dd4e2a978a9f5bd773dae6da7aa4a5ac1cdbbc".as_bstr(), name: b"whitespace".as_bstr(), @@ -412,7 +478,10 @@ KLMHist5yj0sw1E4hDTyQa0= #[test] fn tagger_without_timestamp() -> crate::Result { assert_eq!( - TagRef::from_bytes(&fixture_name("tag", "tagger-without-timestamp.txt"))?, + TagRef::from_bytes( + &fixture_name("tag", "tagger-without-timestamp.txt"), + gix_hash::Kind::Sha1 + )?, TagRef { target: b"4fcd840c4935e4c7a5ea3552710a0f26b9178c24".as_bstr(), name: b"ChangeLog".as_bstr(), diff --git a/gix-pack/src/data/output/count/objects/mod.rs b/gix-pack/src/data/output/count/objects/mod.rs index 05f06f5866..1f0997f25c 100644 --- a/gix-pack/src/data/output/count/objects/mod.rs +++ b/gix-pack/src/data/output/count/objects/mod.rs @@ -175,7 +175,7 @@ mod expand { match obj.kind { Tree | Blob => break, Tag => { - id = TagRefIter::from_bytes(obj.data) + id = TagRefIter::from_bytes(obj.data, obj.hash_kind) .target_id() .expect("every tag has a target"); let tmp = db.find(&id, buf1)?; @@ -188,7 +188,7 @@ mod expand { } Commit => { let current_tree_iter = { - let mut commit_iter = CommitRefIter::from_bytes(obj.data); + let mut commit_iter = CommitRefIter::from_bytes(obj.data, obj.hash_kind); let tree_id = commit_iter.tree_id().expect("every commit has a tree"); parent_commit_ids.clear(); for token in commit_iter { @@ -227,9 +227,12 @@ mod expand { push_obj_count_unique( &mut out, seen_objs, commit_id, location, objects, stats, true, ); - CommitRefIter::from_bytes(parent_commit_obj.data) - .tree_id() - .expect("every commit has a tree") + CommitRefIter::from_bytes( + parent_commit_obj.data, + parent_commit_obj.hash_kind, + ) + .tree_id() + .expect("every commit has a tree") }; let parent_tree = { let (parent_tree_obj, location) = db.find(&parent_tree_id, buf2)?; @@ -296,7 +299,7 @@ mod expand { break; } Commit => { - id = CommitRefIter::from_bytes(obj.0.data) + id = CommitRefIter::from_bytes(obj.0.data, obj.0.hash_kind) .tree_id() .expect("every commit has a tree"); stats.expanded_objects += 1; @@ -305,7 +308,7 @@ mod expand { } Blob => break, Tag => { - id = TagRefIter::from_bytes(obj.0.data) + id = TagRefIter::from_bytes(obj.0.data, obj.0.hash_kind) .target_id() .expect("every tag has a target"); stats.expanded_objects += 1; diff --git a/gix-ref/src/store/file/raw_ext.rs b/gix-ref/src/store/file/raw_ext.rs index 760cceb9ec..320690c146 100644 --- a/gix-ref/src/store/file/raw_ext.rs +++ b/gix-ref/src/store/file/raw_ext.rs @@ -160,24 +160,21 @@ impl ReferenceExt for Reference { let mut oid = self.follow_to_object_packed(store, packed)?; let mut buf = Vec::new(); let peeled_id = loop { - let gix_object::Data { - kind, - data, - hash_kind: _, - } = objects - .try_find(&oid, &mut buf)? - .ok_or_else(|| peel::to_id::Error::NotFound { - oid, - name: self.name.0.clone(), - })?; + let gix_object::Data { kind, data, hash_kind } = + objects + .try_find(&oid, &mut buf)? + .ok_or_else(|| peel::to_id::Error::NotFound { + oid, + name: self.name.0.clone(), + })?; match kind { gix_object::Kind::Tag => { - oid = gix_object::TagRefIter::from_bytes(data).target_id().map_err(|_err| { - peel::to_id::Error::NotFound { + oid = gix_object::TagRefIter::from_bytes(data, hash_kind) + .target_id() + .map_err(|_err| peel::to_id::Error::NotFound { oid, name: self.name.0.clone(), - } - })?; + })?; } _ => break oid, } diff --git a/gix-ref/src/store/packed/transaction.rs b/gix-ref/src/store/packed/transaction.rs index e9990ce90f..4c22078053 100644 --- a/gix-ref/src/store/packed/transaction.rs +++ b/gix-ref/src/store/packed/transaction.rs @@ -107,14 +107,20 @@ impl packed::Transaction { { let mut next_id = new; edit.peeled = loop { - let kind = objects.try_find(&next_id, &mut buf)?.map(|d| d.kind); - match kind { - Some(gix_object::Kind::Tag) => { - next_id = gix_object::TagRefIter::from_bytes(&buf).target_id().map_err(|_| { - prepare::Error::Resolve( - format!("Couldn't get target object id from tag {next_id}").into(), - ) - })?; + let data = objects.try_find(&next_id, &mut buf)?; + match data { + Some(gix_object::Data { + kind: gix_object::Kind::Tag, + data, + hash_kind, + }) => { + next_id = gix_object::TagRefIter::from_bytes(data, hash_kind) + .target_id() + .map_err(|_| { + prepare::Error::Resolve( + format!("Couldn't get target object id from tag {next_id}").into(), + ) + })?; } Some(_) => { break if next_id == new { None } else { Some(next_id) }; diff --git a/gix-revwalk/src/graph/commit.rs b/gix-revwalk/src/graph/commit.rs index 390dfa84d6..4d8c5b0494 100644 --- a/gix-revwalk/src/graph/commit.rs +++ b/gix-revwalk/src/graph/commit.rs @@ -8,7 +8,7 @@ impl<'graph, 'cache> LazyCommit<'graph, 'cache> { /// Return an iterator over the parents of this commit. pub fn iter_parents(&self) -> Parents<'graph, 'cache> { let backing = match &self.backing { - Either::Left(buf) => Either::Left(gix_object::CommitRefIter::from_bytes(buf)), + Either::Left(buf) => Either::Left(gix_object::CommitRefIter::from_bytes(buf, self.hash_kind)), Either::Right((cache, pos)) => Either::Right((*cache, cache.commit_at(*pos).iter_parents())), }; Parents { backing } @@ -20,7 +20,9 @@ impl<'graph, 'cache> LazyCommit<'graph, 'cache> { /// Note that this can only fail if the commit is backed by the object database *and* parsing fails. pub fn committer_timestamp(&self) -> Result { Ok(match &self.backing { - Either::Left(buf) => gix_object::CommitRefIter::from_bytes(buf).committer()?.seconds(), + Either::Left(buf) => gix_object::CommitRefIter::from_bytes(buf, self.hash_kind) + .committer()? + .seconds(), Either::Right((cache, pos)) => cache.commit_at(*pos).committer_timestamp() as SecondsSinceUnixEpoch, // a cast as we cannot represent the error and trying seems overkill }) } @@ -38,7 +40,12 @@ impl<'graph, 'cache> LazyCommit<'graph, 'cache> { &self, ) -> Result<(Option, SecondsSinceUnixEpoch), gix_object::decode::Error> { Ok(match &self.backing { - Either::Left(buf) => (None, gix_object::CommitRefIter::from_bytes(buf).committer()?.seconds()), + Either::Left(buf) => ( + None, + gix_object::CommitRefIter::from_bytes(buf, self.hash_kind) + .committer()? + .seconds(), + ), Either::Right((cache, pos)) => { let commit = cache.commit_at(*pos); ( @@ -57,7 +64,7 @@ impl<'graph, 'cache> LazyCommit<'graph, 'cache> { Ok(match &self.backing { Either::Left(buf) => { use gix_object::commit::ref_iter::Token; - let iter = gix_object::CommitRefIter::from_bytes(buf); + let iter = gix_object::CommitRefIter::from_bytes(buf, self.hash_kind); let mut parents = SmallVec::default(); let mut timestamp = None; for token in iter { diff --git a/gix-revwalk/src/graph/mod.rs b/gix-revwalk/src/graph/mod.rs index a896034c46..691d2ad7cf 100644 --- a/gix-revwalk/src/graph/mod.rs +++ b/gix-revwalk/src/graph/mod.rs @@ -369,6 +369,7 @@ fn try_lookup<'graph, 'cache>( if let Some(cache) = cache { if let Some(pos) = cache.lookup(id) { return Ok(Some(LazyCommit { + hash_kind: id.kind(), backing: Either::Right((cache, pos)), })); } @@ -380,6 +381,7 @@ fn try_lookup<'graph, 'cache>( .map_err(gix_object::find::existing_iter::Error::Find)? { Some(data) => data.kind.is_commit().then_some(LazyCommit { + hash_kind: data.hash_kind, backing: Either::Left(buf), }), None => None, @@ -439,6 +441,7 @@ where /// /// The owned version of this type is called [`Commit`] and can be obtained by calling [`LazyCommit::to_owned()`]. pub struct LazyCommit<'graph, 'cache> { + hash_kind: gix_hash::Kind, backing: Either<&'graph [u8], (&'cache gix_commitgraph::Graph, gix_commitgraph::Position)>, } diff --git a/gix-traverse/src/commit/simple.rs b/gix-traverse/src/commit/simple.rs index 404d947e2e..d237ce225b 100644 --- a/gix-traverse/src/commit/simple.rs +++ b/gix-traverse/src/commit/simple.rs @@ -138,6 +138,10 @@ pub(super) struct State { queue: CommitDateQueue, /// Backing storage for the currently yielded commit. buf: Vec, + /// The object hash kind of the currently yielded commit data in `buf`. + /// It's used to know the kind of hash to expect when a new iterator is returned from `buf` + /// via `Simple::commit_iter()`. + object_hash: gix_hash::Kind, /// Set of commits that were already enqueued for the visible traversal, for cycle-checking. seen: gix_hashtable::HashSet, /// Hidden frontier commits that must not be yielded or crossed during traversal. @@ -247,6 +251,7 @@ mod init { next: Default::default(), queue: gix_revwalk::PriorityQueue::new(), buf: vec![], + object_hash: gix_hash::Kind::Sha1, seen: Default::default(), hidden: Default::default(), hidden_tips: Vec::new(), @@ -262,6 +267,7 @@ mod init { next, queue, buf, + object_hash, seen, hidden, hidden_tips, @@ -271,6 +277,7 @@ mod init { next.clear(); queue.clear(); buf.clear(); + *object_hash = gix_hash::Kind::Sha1; seen.clear(); hidden.clear(); hidden_tips.clear(); @@ -464,7 +471,7 @@ mod init { impl Simple { /// Return an iterator for accessing data of the current commit, parsed lazily. pub fn commit_iter(&self) -> CommitRefIter<'_> { - CommitRefIter::from_bytes(self.commit_data()) + CommitRefIter::from_bytes(self.commit_data(), self.state.object_hash) } /// Return the current commits' raw data, which can be parsed using [`gix_object::CommitRef::from_bytes()`]. @@ -519,6 +526,7 @@ mod init { let (commit_time, oid) = match next.pop()? { (Ok(t) | Err(Reverse(t)), o) => (t, o), }; + state.object_hash = oid.kind(); if state.hidden.contains_key(&oid) { continue; } @@ -592,6 +600,7 @@ mod init { loop { let oid = next.pop_front()?; + state.object_hash = oid.kind(); if state.hidden.contains_key(&oid) { continue; } diff --git a/gix/src/object/commit.rs b/gix/src/object/commit.rs index c0e6272fec..8588b828c6 100644 --- a/gix/src/object/commit.rs +++ b/gix/src/object/commit.rs @@ -86,7 +86,7 @@ impl<'repo> Commit<'repo> { /// # Ok(()) } /// ``` pub fn message_raw(&self) -> Result<&'_ BStr, gix_object::decode::Error> { - gix_object::CommitRefIter::from_bytes(&self.data).message() + gix_object::CommitRefIter::from_bytes(&self.data, self.id.kind()).message() } /// Obtain the message by using intricate knowledge about the encoding, which is fastest and /// can't fail at the expense of error handling. @@ -114,24 +114,24 @@ impl<'repo> Commit<'repo> { /// used for successive calls to string-ish information to avoid decoding the object /// more than once. pub fn decode(&self) -> Result, gix_object::decode::Error> { - gix_object::CommitRef::from_bytes(&self.data) + gix_object::CommitRef::from_bytes(&self.data, self.id.kind()) } /// Return an iterator over tokens, representing this commit piece by piece. pub fn iter(&self) -> gix_object::CommitRefIter<'_> { - gix_object::CommitRefIter::from_bytes(&self.data) + gix_object::CommitRefIter::from_bytes(&self.data, self.id.kind()) } /// Return the commits author, with surrounding whitespace trimmed. pub fn author(&self) -> Result, gix_object::decode::Error> { - gix_object::CommitRefIter::from_bytes(&self.data) + gix_object::CommitRefIter::from_bytes(&self.data, self.id.kind()) .author() .map(|s| s.trim()) } /// Return the commits committer. with surrounding whitespace trimmed. pub fn committer(&self) -> Result, gix_object::decode::Error> { - gix_object::CommitRefIter::from_bytes(&self.data) + gix_object::CommitRefIter::from_bytes(&self.data, self.id.kind()) .committer() .map(|s| s.trim()) } @@ -153,7 +153,7 @@ impl<'repo> Commit<'repo> { pub fn parent_ids(&self) -> impl Iterator> + '_ { use crate::ext::ObjectIdExt; let repo = self.repo; - gix_object::CommitRefIter::from_bytes(&self.data) + gix_object::CommitRefIter::from_bytes(&self.data, self.id.kind()) .parent_ids() .map(move |id| id.attach(repo)) } @@ -181,7 +181,7 @@ impl<'repo> Commit<'repo> { /// Parse the commit and return the tree id it points to. pub fn tree_id(&self) -> Result, gix_object::decode::Error> { - gix_object::CommitRefIter::from_bytes(&self.data) + gix_object::CommitRefIter::from_bytes(&self.data, self.id.kind()) .tree_id() .map(|id| crate::Id::from_id(id, self.repo)) } @@ -217,7 +217,7 @@ impl<'repo> Commit<'repo> { &self, ) -> Result, gix_object::commit::SignedData<'_>)>, gix_object::decode::Error> { - gix_object::CommitRefIter::signature(&self.data) + gix_object::CommitRefIter::signature(&self.data, self.id.kind()) } } diff --git a/gix/src/object/tag.rs b/gix/src/object/tag.rs index 1a7a3de86c..6ae6363a3a 100644 --- a/gix/src/object/tag.rs +++ b/gix/src/object/tag.rs @@ -9,19 +9,19 @@ impl<'repo> Tag<'repo> { /// used for successive calls to string-ish information to avoid decoding the object /// more than once. pub fn decode(&self) -> Result, gix_object::decode::Error> { - gix_object::TagRef::from_bytes(&self.data) + gix_object::TagRef::from_bytes(&self.data, self.id.kind()) } /// Decode this tag partially and return the id of its target. pub fn target_id(&self) -> Result, gix_object::decode::Error> { - gix_object::TagRefIter::from_bytes(&self.data) + gix_object::TagRefIter::from_bytes(&self.data, self.id.kind()) .target_id() .map(|id| id.attach(self.repo)) } /// Decode this tag partially and return the tagger, if the field exists. pub fn tagger(&self) -> Result>, gix_object::decode::Error> { - gix_object::TagRefIter::from_bytes(&self.data).tagger() + gix_object::TagRefIter::from_bytes(&self.data, self.id.kind()).tagger() } }