From c3d79ce3dbbd2301282541be97cccc1f2aff7f7e Mon Sep 17 00:00:00 2001 From: maclei Date: Sun, 17 May 2026 19:18:21 +0900 Subject: [PATCH 1/2] Strip top-level Ion annotation in KFX parse_entity_ion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some KFX containers tag entities with an Ion type annotation, e.g. \`\$490::{ ... }\`, where the outer Ion value is an Annotated wrapper around the actual entity struct. Every callsite of \`parse_entity_ion\` in \`import/kfx.rs\` then calls \`.as_struct()\` (directly or via \`get_field()\`) on the returned value — which returns \`None\` for an \`Annotated\`, silently dropping the entity's fields. Strip the wrapper once at the parse boundary so all importer paths can treat the returned value as the entity itself, consistent with how \`bin/kfx-dump.rs\` already unwraps annotated values at every consumer. This matches the Ion spec's treatment of annotations as schema-level metadata that does not change the value's logical shape: https://amazon-ion.github.io/ion-docs/docs/spec.html#annot --- src/import/kfx.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/import/kfx.rs b/src/import/kfx.rs index 5fefda2..9463e4d 100644 --- a/src/import/kfx.rs +++ b/src/import/kfx.rs @@ -404,10 +404,21 @@ impl KfxImporter { } /// Parse an entity as Ion and return the parsed value. + /// + /// Strips any top-level Ion type annotation (e.g. `$490::{ ... }`) so + /// callers can rely on the returned value being the entity struct itself, + /// matching every other callsite in this importer that does + /// `value.as_struct()` directly. Some KFX containers tag entities with an + /// annotation indicating their schema type; without this strip, those + /// entities silently fall through `get_field()` lookups because the + /// outer value is an `Annotated`, not a `Struct`. fn parse_entity_ion(&self, loc: EntityLoc) -> io::Result { let ion_data = self.read_entity(loc)?; let mut parser = IonParser::new(&ion_data); - parser.parse() + Ok(match parser.parse()? { + IonValue::Annotated(_, inner) => *inner, + other => other, + }) } /// Get a symbol's text from an IonValue (handles both Symbol and String). From dab54a726dd7635d58be717bb7dab09217b6de29 Mon Sep 17 00:00:00 2001 From: maclei Date: Mon, 18 May 2026 00:54:08 +0900 Subject: [PATCH 2/2] Add fixture and integration test for annotated KFX entities Adds a 641-byte (501 bytes gzipped) synthetic KFX fixture with a single `\$490`-annotated `book_metadata` entity carrying eight categorised metadata fields (title/author/publisher/language/ASIN/book_id/cde/issue_date). `tests/kfx_annotated_metadata.rs` opens the fixture via the public `Book::open` API and asserts that all metadata fields round-trip. Before the parent commit's fix, this test fails because the importer's `.as_struct()` calls return `None` for the annotated value and every field comes back empty. `examples/build_annotated_kfx_fixture.rs` is a small generator that uses the public `kfx::serialization` helpers to rebuild the fixture from synthetic data. Run with `cargo run --release --example build_annotated_kfx_fixture` to regenerate. --- examples/build_annotated_kfx_fixture.rs | 80 +++++++++++++++++++++++ tests/fixtures/annotated_metadata.kfx.gz | Bin 0 -> 501 bytes tests/kfx_annotated_metadata.rs | 55 ++++++++++++++++ 3 files changed, 135 insertions(+) create mode 100644 examples/build_annotated_kfx_fixture.rs create mode 100644 tests/fixtures/annotated_metadata.kfx.gz create mode 100644 tests/kfx_annotated_metadata.rs diff --git a/examples/build_annotated_kfx_fixture.rs b/examples/build_annotated_kfx_fixture.rs new file mode 100644 index 0000000..de396f8 --- /dev/null +++ b/examples/build_annotated_kfx_fixture.rs @@ -0,0 +1,80 @@ +//! Build a minimal KFX fixture for the annotated-entity test. +//! +//! Produces a tiny KFX container with a single `book_metadata` entity wrapped +//! in a `$490::{ ... }` annotation (the real-world pattern observed in KFX-ZIP +//! metadata sidecars). All identifying values are synthetic. +//! +//! Run with: `cargo run --release --example build_annotated_kfx_fixture` +//! +//! Output: `tests/fixtures/annotated_metadata.kfx` + +use std::io::Write; + +use boko::kfx::ion::IonValue; +use boko::kfx::serialization::{SerializedEntity, create_entity_data, serialize_container}; +use boko::kfx::symbols::KfxSymbol; + +fn metadata_entry(key: &str, value: &str) -> IonValue { + IonValue::Struct(vec![ + (KfxSymbol::Key as u64, IonValue::String(key.to_string())), + (KfxSymbol::Value as u64, IonValue::String(value.to_string())), + ]) +} + +fn main() -> std::io::Result<()> { + // Build categorised_metadata list: a single "kindle_title_metadata" category + // with synthetic title/author/ASIN/etc. + let kindle_title_metadata = IonValue::Struct(vec![ + ( + KfxSymbol::Category as u64, + IonValue::String("kindle_title_metadata".to_string()), + ), + ( + KfxSymbol::Metadata as u64, + IonValue::List(vec![ + metadata_entry("title", "Annotated Entity Test Book"), + metadata_entry("author", "Boko Test Author"), + metadata_entry("publisher", "Boko Test Press"), + metadata_entry("language", "en"), + metadata_entry("ASIN", "B000TESTASIN"), + metadata_entry("book_id", "synthetic-book-id-0001"), + metadata_entry("cde_content_type", "EBOK"), + metadata_entry("issue_date", "2026-01-01"), + ]), + ), + ]); + + // book_metadata struct: { categorised_metadata: [ ... ] } + let book_metadata_inner = IonValue::Struct(vec![( + KfxSymbol::CategorisedMetadata as u64, + IonValue::List(vec![kindle_title_metadata]), + )]); + + // Wrap in the $490 (book_metadata) annotation — the pattern this fixture + // exists to test. + let annotated = IonValue::Annotated( + vec![KfxSymbol::BookMetadata as u64], + Box::new(book_metadata_inner), + ); + + let entity_data = create_entity_data(&annotated); + + let entities = vec![SerializedEntity { + id: 0, + entity_type: KfxSymbol::BookMetadata as u32, + data: entity_data, + }]; + + let container_id = "CR!BOKOTESTANNOTATEDMETADATAFIX"; + let symtab_ion: Vec = Vec::new(); + let format_caps_ion: Vec = Vec::new(); + + let bytes = serialize_container(container_id, &entities, &symtab_ion, &format_caps_ion); + + let out_path = "tests/fixtures/annotated_metadata.kfx"; + let mut f = std::fs::File::create(out_path)?; + f.write_all(&bytes)?; + println!("Wrote {} ({} bytes)", out_path, bytes.len()); + + Ok(()) +} diff --git a/tests/fixtures/annotated_metadata.kfx.gz b/tests/fixtures/annotated_metadata.kfx.gz new file mode 100644 index 0000000000000000000000000000000000000000..87ba85d3509e116e35a5a1c0a4ccde841fa0a402 GIT binary patch literal 501 zcmVCIBVifk9bid)z9LVSU}wvXZE z;Po05-@w@~zYrXTIcNU!JLhoD)astEqa6*QC4^87&DK~$KiB5m|E!_0{0OhR!`jVC zwSKMcyL-Old3E3M-R~;}8o){v+Z~Hp4~Aa1`(s zw&xJuv~%W~H!g3iE$l*H*uRQ~?RorvT0Xezdj4$GX_S&c#6q3-iNFySSnr%^-{l7=A5C?A)a^TzGJyq&43{dgFnq#= zQDrQV11KNkT(nqQfxW64!tBwRYH|qWvn+3jFxar}$8(I%YB9+Xd0Oq;p-1AWvX zRGvV2=MGwu`J|yr263bko$!K+77=l1PDC?~Oa?-fdT*kTz3E7 literal 0 HcmV?d00001 diff --git a/tests/kfx_annotated_metadata.rs b/tests/kfx_annotated_metadata.rs new file mode 100644 index 0000000..2420025 --- /dev/null +++ b/tests/kfx_annotated_metadata.rs @@ -0,0 +1,55 @@ +//! Regression test: KFX entities wrapped in a top-level Ion type annotation +//! (e.g. `$490::{ ... }`) must still have their fields populated when imported. +//! +//! Before the fix, `parse_entity_ion` returned the `Annotated` value as-is and +//! every downstream `.as_struct()` call returned `None`, silently dropping the +//! entity's fields. The fixture (`tests/fixtures/annotated_metadata.kfx.gz`) is +//! a 641-byte synthetic KFX container with a single `$490`-annotated +//! `book_metadata` entity. It is regenerated by +//! `cargo run --release --example build_annotated_kfx_fixture`. + +use boko::Book; +use flate2::read::GzDecoder; +use std::io::Read; +use std::path::Path; + +fn decompress_fixture() -> Option { + let gz_path = "tests/fixtures/annotated_metadata.kfx.gz"; + if !Path::new(gz_path).exists() { + eprintln!("Skipping test - fixture not found: {gz_path}"); + return None; + } + + let gz_data = std::fs::read(gz_path).expect("Failed to read fixture"); + let mut decoder = GzDecoder::new(&gz_data[..]); + let mut kfx_data = Vec::new(); + decoder + .read_to_end(&mut kfx_data) + .expect("Failed to decompress fixture"); + + let mut tmp = tempfile::Builder::new() + .suffix(".kfx") + .tempfile() + .expect("Failed to create temp file"); + std::io::Write::write_all(&mut tmp, &kfx_data).expect("Failed to write temp file"); + Some(tmp) +} + +#[test] +fn annotated_book_metadata_entity_is_imported() { + let Some(tmp) = decompress_fixture() else { + return; + }; + + let book = Book::open(tmp.path()).expect("opening annotated-metadata KFX fixture"); + let meta = book.metadata(); + + // Every field in the synthetic fixture's book_metadata entity must round-trip. + // Without the annotation-stripping fix, all of these are empty / default. + assert_eq!(meta.title, "Annotated Entity Test Book"); + assert_eq!(meta.authors, vec!["Boko Test Author".to_string()]); + assert_eq!(meta.publisher.as_deref(), Some("Boko Test Press")); + assert_eq!(meta.language, "en"); + assert_eq!(meta.identifier, "synthetic-book-id-0001"); + assert_eq!(meta.date.as_deref(), Some("2026-01-01")); +}