diff --git a/examples/build_annotated_kfx_fixture.rs b/examples/build_annotated_kfx_fixture.rs new file mode 100644 index 0000000..de396f8 --- /dev/null +++ b/examples/build_annotated_kfx_fixture.rs @@ -0,0 +1,80 @@ +//! Build a minimal KFX fixture for the annotated-entity test. +//! +//! Produces a tiny KFX container with a single `book_metadata` entity wrapped +//! in a `$490::{ ... }` annotation (the real-world pattern observed in KFX-ZIP +//! metadata sidecars). All identifying values are synthetic. +//! +//! Run with: `cargo run --release --example build_annotated_kfx_fixture` +//! +//! Output: `tests/fixtures/annotated_metadata.kfx` + +use std::io::Write; + +use boko::kfx::ion::IonValue; +use boko::kfx::serialization::{SerializedEntity, create_entity_data, serialize_container}; +use boko::kfx::symbols::KfxSymbol; + +fn metadata_entry(key: &str, value: &str) -> IonValue { + IonValue::Struct(vec![ + (KfxSymbol::Key as u64, IonValue::String(key.to_string())), + (KfxSymbol::Value as u64, IonValue::String(value.to_string())), + ]) +} + +fn main() -> std::io::Result<()> { + // Build categorised_metadata list: a single "kindle_title_metadata" category + // with synthetic title/author/ASIN/etc. + let kindle_title_metadata = IonValue::Struct(vec![ + ( + KfxSymbol::Category as u64, + IonValue::String("kindle_title_metadata".to_string()), + ), + ( + KfxSymbol::Metadata as u64, + IonValue::List(vec![ + metadata_entry("title", "Annotated Entity Test Book"), + metadata_entry("author", "Boko Test Author"), + metadata_entry("publisher", "Boko Test Press"), + metadata_entry("language", "en"), + metadata_entry("ASIN", "B000TESTASIN"), + metadata_entry("book_id", "synthetic-book-id-0001"), + metadata_entry("cde_content_type", "EBOK"), + metadata_entry("issue_date", "2026-01-01"), + ]), + ), + ]); + + // book_metadata struct: { categorised_metadata: [ ... ] } + let book_metadata_inner = IonValue::Struct(vec![( + KfxSymbol::CategorisedMetadata as u64, + IonValue::List(vec![kindle_title_metadata]), + )]); + + // Wrap in the $490 (book_metadata) annotation — the pattern this fixture + // exists to test. + let annotated = IonValue::Annotated( + vec![KfxSymbol::BookMetadata as u64], + Box::new(book_metadata_inner), + ); + + let entity_data = create_entity_data(&annotated); + + let entities = vec![SerializedEntity { + id: 0, + entity_type: KfxSymbol::BookMetadata as u32, + data: entity_data, + }]; + + let container_id = "CR!BOKOTESTANNOTATEDMETADATAFIX"; + let symtab_ion: Vec = Vec::new(); + let format_caps_ion: Vec = Vec::new(); + + let bytes = serialize_container(container_id, &entities, &symtab_ion, &format_caps_ion); + + let out_path = "tests/fixtures/annotated_metadata.kfx"; + let mut f = std::fs::File::create(out_path)?; + f.write_all(&bytes)?; + println!("Wrote {} ({} bytes)", out_path, bytes.len()); + + Ok(()) +} diff --git a/src/import/kfx.rs b/src/import/kfx.rs index 5fefda2..9463e4d 100644 --- a/src/import/kfx.rs +++ b/src/import/kfx.rs @@ -404,10 +404,21 @@ impl KfxImporter { } /// Parse an entity as Ion and return the parsed value. + /// + /// Strips any top-level Ion type annotation (e.g. `$490::{ ... }`) so + /// callers can rely on the returned value being the entity struct itself, + /// matching every other callsite in this importer that does + /// `value.as_struct()` directly. Some KFX containers tag entities with an + /// annotation indicating their schema type; without this strip, those + /// entities silently fall through `get_field()` lookups because the + /// outer value is an `Annotated`, not a `Struct`. fn parse_entity_ion(&self, loc: EntityLoc) -> io::Result { let ion_data = self.read_entity(loc)?; let mut parser = IonParser::new(&ion_data); - parser.parse() + Ok(match parser.parse()? { + IonValue::Annotated(_, inner) => *inner, + other => other, + }) } /// Get a symbol's text from an IonValue (handles both Symbol and String). diff --git a/tests/fixtures/annotated_metadata.kfx.gz b/tests/fixtures/annotated_metadata.kfx.gz new file mode 100644 index 0000000..87ba85d Binary files /dev/null and b/tests/fixtures/annotated_metadata.kfx.gz differ diff --git a/tests/kfx_annotated_metadata.rs b/tests/kfx_annotated_metadata.rs new file mode 100644 index 0000000..2420025 --- /dev/null +++ b/tests/kfx_annotated_metadata.rs @@ -0,0 +1,55 @@ +//! Regression test: KFX entities wrapped in a top-level Ion type annotation +//! (e.g. `$490::{ ... }`) must still have their fields populated when imported. +//! +//! Before the fix, `parse_entity_ion` returned the `Annotated` value as-is and +//! every downstream `.as_struct()` call returned `None`, silently dropping the +//! entity's fields. The fixture (`tests/fixtures/annotated_metadata.kfx.gz`) is +//! a 641-byte synthetic KFX container with a single `$490`-annotated +//! `book_metadata` entity. It is regenerated by +//! `cargo run --release --example build_annotated_kfx_fixture`. + +use boko::Book; +use flate2::read::GzDecoder; +use std::io::Read; +use std::path::Path; + +fn decompress_fixture() -> Option { + let gz_path = "tests/fixtures/annotated_metadata.kfx.gz"; + if !Path::new(gz_path).exists() { + eprintln!("Skipping test - fixture not found: {gz_path}"); + return None; + } + + let gz_data = std::fs::read(gz_path).expect("Failed to read fixture"); + let mut decoder = GzDecoder::new(&gz_data[..]); + let mut kfx_data = Vec::new(); + decoder + .read_to_end(&mut kfx_data) + .expect("Failed to decompress fixture"); + + let mut tmp = tempfile::Builder::new() + .suffix(".kfx") + .tempfile() + .expect("Failed to create temp file"); + std::io::Write::write_all(&mut tmp, &kfx_data).expect("Failed to write temp file"); + Some(tmp) +} + +#[test] +fn annotated_book_metadata_entity_is_imported() { + let Some(tmp) = decompress_fixture() else { + return; + }; + + let book = Book::open(tmp.path()).expect("opening annotated-metadata KFX fixture"); + let meta = book.metadata(); + + // Every field in the synthetic fixture's book_metadata entity must round-trip. + // Without the annotation-stripping fix, all of these are empty / default. + assert_eq!(meta.title, "Annotated Entity Test Book"); + assert_eq!(meta.authors, vec!["Boko Test Author".to_string()]); + assert_eq!(meta.publisher.as_deref(), Some("Boko Test Press")); + assert_eq!(meta.language, "en"); + assert_eq!(meta.identifier, "synthetic-book-id-0001"); + assert_eq!(meta.date.as_deref(), Some("2026-01-01")); +}