From 347f9bd1879dc9688a801a099e09d64d9b4d69fe Mon Sep 17 00:00:00 2001
From: maclei <imaclean@gmail.com>
Date: Sun, 24 May 2026 23:41:25 +0900
Subject: [PATCH] EPUB exporter: resolve TOC fragments and write embedded font
 assets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two narrowly-scoped improvements to \`EpubExporter\`:

## TOC fragment resolution

AZW3 and MOBI importers leave TOC entries with bare chapter hrefs
(\`partNNNN.html\` or \`content.html\`) at open time — the \`#fileposN\`
/ \`#aid-XXXX\` fragment is populated only when \`Book::resolve_toc\` is
called. Previously, the EPUB exporter generated the NCX before calling
that, so every TOC entry within a single chapter collapsed onto the same
\`partNNNN.html\` href and readers landed on chapter starts instead of
the intended in-chapter target.

Both \`export_raw\` and \`export_normalized\` now call
\`book.resolve_toc()\` before generating the NCX. EPUB importers don't
need this (their TOC is already resolved by the importer), so the call
is a no-op for that backend.

## Font writing in normalized export

\`export_normalized\` writes assets from \`NormalizedContent::assets\`,
which only contains resources referenced from the IR DOM. Embedded
fonts are typically referenced from CSS \`@font-face\` rules rather
than DOM nodes, so they never made it into the normalized asset list
and the exported EPUB shipped \`@font-face\` declarations whose \`src:\`
URLs pointed at files that were never written into the ZIP.

The exporter now snapshots \`book.list_assets()\` before normalization
and, after writing the normalized assets, additionally enumerates every
\`fonts/\*\` path that wasn't already covered — adding both an OPF
manifest entry and a ZIP entry for each.

\`export_raw\` is unaffected here because it already writes the full
\`book.list_assets()\` set verbatim.

## Tests

\`tests/epub_exports_fonts_and_toc.rs\`:

- \`epub_export_writes_font_assets_from_kfx\` uses the existing
  \`tests/fixtures/fonts_only.kfx.gz\` fixture (the one added by #13)
  to confirm that the three KFX font assets the importer surfaces are
  written into the exported EPUB's \`OEBPS/fonts/\` directory and
  referenced in the OPF manifest.
- \`epub_export_resolves_azw3_toc_fragments\` uses the existing
  \`tests/fixtures/epictetus.azw3\` fixture to confirm that the
  generated \`toc.ncx\` contains resolved \`#aid-XXXX\` fragments
  rather than the bare chapter hrefs the importer initially produced.

Both tests fail without this patch's changes; both pass with them.

## Related work

#13 surfaced KFX fonts; this PR makes them survive the EPUB export.
For the AZW3 / MOBI side, font extraction is being added in a separate
PR — the font-writing logic here is format-agnostic and benefits any
importer that surfaces \`fonts/\*\` paths.
---
 src/export/epub.rs                  |  59 ++++++++++
 tests/epub_exports_fonts_and_toc.rs | 169 ++++++++++++++++++++++++++++
 2 files changed, 228 insertions(+)
 create mode 100644 tests/epub_exports_fonts_and_toc.rs

diff --git a/src/export/epub.rs b/src/export/epub.rs
index fd7eef2..6ea08c6 100644
--- a/src/export/epub.rs
+++ b/src/export/epub.rs
@@ -80,6 +80,14 @@ impl Exporter for EpubExporter {
 impl EpubExporter {
     /// Export with passthrough mode (preserves original HTML/CSS).
     fn export_raw<W: Write + Seek>(&self, book: &mut Book, writer: &mut W) -> io::Result<()> {
+        // Resolve TOC fragments before we generate the NCX. AZW3 and MOBI
+        // importers leave TOC entries with bare chapter hrefs until
+        // `resolve_toc()` populates the `#fileposN` / `#id` suffix from the
+        // book's NCX / position map. Without this call the exported NCX has
+        // unresolvable anchors and readers land on chapter starts instead of
+        // the intended in-chapter targets.
+        book.resolve_toc();
+
         let mut zip = ZipWriter::new(writer);
 
         let compression_level = self.config.compression_level.unwrap_or(6);
@@ -184,6 +192,18 @@ impl EpubExporter {
     ) -> io::Result<()> {
         use super::normalize::normalize_book;
 
+        // Resolve TOC fragments before generating the NCX. Same rationale as
+        // `export_raw`: AZW3 / MOBI importers leave TOC entries with bare
+        // chapter hrefs until this is called.
+        book.resolve_toc();
+
+        // Snapshot every asset the importer surfaces. The normalized content
+        // pipeline only records assets it actively references, which misses
+        // resources like embedded fonts that are referenced from CSS rather
+        // than from the IR DOM. We add those back into the manifest and ZIP
+        // below.
+        let all_assets: Vec<_> = book.list_assets().to_vec();
+
         // Normalize the book content
         let content = normalize_book(book)?;
 
@@ -243,6 +263,28 @@ impl EpubExporter {
             });
         }
 
+        // Add font assets the importer surfaced that aren't already covered
+        // by normalized content. Fonts are typically referenced from CSS, not
+        // from DOM nodes, so `normalize_book` doesn't pull them into
+        // `content.assets`. Without this we'd emit `@font-face` rules whose
+        // `src:` URLs point at files we never wrote into the ZIP.
+        let mut extra_font_idx = 0;
+        for asset_path in &all_assets {
+            let path_str = asset_path.to_string_lossy();
+            if !path_str.starts_with("fonts/") {
+                continue;
+            }
+            if content.assets.iter().any(|a| a == &*path_str) {
+                continue;
+            }
+            manifest_items.push(ManifestItem {
+                id: format!("font_{}", extra_font_idx),
+                href: format!("OEBPS/{}", sanitize_path(&path_str)),
+                media_type: guess_media_type(&path_str),
+            });
+            extra_font_idx += 1;
+        }
+
         // 4. Write content.opf
         let opf = generate_opf(book.metadata(), &manifest_items, &spine_refs);
         zip.start_file("OEBPS/content.opf", deflated)
@@ -280,6 +322,23 @@ impl EpubExporter {
             }
         }
 
+        // 9. Write font assets not already covered by normalized content.
+        // Matches the manifest entries added above.
+        for asset_path in &all_assets {
+            let path_str = asset_path.to_string_lossy();
+            if !path_str.starts_with("fonts/") {
+                continue;
+            }
+            if content.assets.iter().any(|a| a == &*path_str) {
+                continue;
+            }
+            let zip_path = format!("OEBPS/{}", sanitize_path(&path_str));
+            if let Ok(data) = book.load_asset(asset_path) {
+                zip.start_file(&zip_path, deflated).map_err(io_error)?;
+                zip.write_all(&data)?;
+            }
+        }
+
         zip.finish().map_err(io_error)?;
         Ok(())
     }
diff --git a/tests/epub_exports_fonts_and_toc.rs b/tests/epub_exports_fonts_and_toc.rs
new file mode 100644
index 0000000..84f3cb2
--- /dev/null
+++ b/tests/epub_exports_fonts_and_toc.rs
@@ -0,0 +1,169 @@
+//! Regression tests for the EPUB exporter's font writing and TOC resolution.
+//!
+//! Covers two distinct improvements to the EPUB exporter:
+//!
+//! - Embedded fonts surfaced by the importer (`fonts/font_NNNN.*` asset paths)
+//!   are now written into the exported EPUB even when the normalized-content
+//!   pipeline did not pull them into its asset list. Without this, books with
+//!   custom typography exported a `@font-face` stylesheet whose `src:` URLs
+//!   pointed at files the exporter never wrote into the ZIP.
+//! - `book.resolve_toc()` is now called by the exporter, so TOC entries that
+//!   importers leave with bare chapter hrefs (AZW3 / MOBI) get their fragment
+//!   suffix populated before the NCX is generated.
+
+use std::io::{Cursor, Read};
+use std::path::Path;
+
+use boko::Book;
+use boko::export::{EpubExporter, Exporter};
+use flate2::read::GzDecoder;
+use zip::ZipArchive;
+
+/// Decompress the `fonts_only.kfx.gz` fixture to a temp file.
+///
+/// The fixture is a stripped KFX containing only `bcRawFont` and `Font`
+/// entities — exactly the shape that exercises the importer's font discovery
+/// pipeline added in #13.
+fn decompress_fonts_only_fixture() -> Option<tempfile::NamedTempFile> {
+    let gz_path = "tests/fixtures/fonts_only.kfx.gz";
+    if !Path::new(gz_path).exists() {
+        eprintln!("Skipping test - fixture not found: {gz_path}");
+        return None;
+    }
+
+    let gz_data = std::fs::read(gz_path).expect("read fixture");
+    let mut decoder = GzDecoder::new(&gz_data[..]);
+    let mut kfx_data = Vec::new();
+    decoder.read_to_end(&mut kfx_data).expect("decompress");
+
+    let mut tmp = tempfile::Builder::new()
+        .suffix(".kfx")
+        .tempfile()
+        .expect("temp file");
+    std::io::Write::write_all(&mut tmp, &kfx_data).expect("write temp");
+    Some(tmp)
+}
+
+#[test]
+fn epub_export_writes_font_assets_from_kfx() {
+    let Some(tmp) = decompress_fonts_only_fixture() else {
+        return;
+    };
+
+    let mut book = Book::open(tmp.path()).expect("open KFX fonts_only fixture");
+
+    // Sanity: the importer surfaces the fonts via list_assets.
+    let surfaced_fonts: Vec<_> = book
+        .list_assets()
+        .iter()
+        .filter(|p| p.to_string_lossy().starts_with("fonts/"))
+        .cloned()
+        .collect();
+    assert_eq!(
+        surfaced_fonts.len(),
+        3,
+        "importer should expose 3 font assets, got {surfaced_fonts:?}",
+    );
+
+    // Export to an in-memory EPUB.
+    let mut buf = Cursor::new(Vec::<u8>::new());
+    EpubExporter::new()
+        .export(&mut book, &mut buf)
+        .expect("epub export");
+
+    let epub_bytes = buf.into_inner();
+    let mut zip = ZipArchive::new(Cursor::new(epub_bytes)).expect("open epub zip");
+
+    // The exporter must write every font asset the importer surfaced.
+    for asset in &surfaced_fonts {
+        let zip_path = format!("OEBPS/{}", asset.to_string_lossy());
+        let mut entry = zip
+            .by_name(&zip_path)
+            .unwrap_or_else(|_| panic!("missing {zip_path} in exported EPUB"));
+        let mut data = Vec::new();
+        entry.read_to_end(&mut data).expect("read font entry");
+        assert!(
+            data.len() > 1000,
+            "{zip_path} truncated: only {} bytes",
+            data.len()
+        );
+    }
+
+    // The OPF manifest must reference each font asset as well. The OPF emits
+    // hrefs relative to its own location (OEBPS/), so we look for the bare
+    // `fonts/font_NNNN.*` path, not the `OEBPS/...` ZIP path.
+    let mut opf = String::new();
+    zip.by_name("OEBPS/content.opf")
+        .expect("OEBPS/content.opf present")
+        .read_to_string(&mut opf)
+        .expect("read opf");
+    for asset in &surfaced_fonts {
+        let needle = asset.to_string_lossy().to_string();
+        assert!(
+            opf.contains(&needle),
+            "content.opf manifest missing href \"{needle}\""
+        );
+    }
+}
+
+#[test]
+fn epub_export_resolves_azw3_toc_fragments() {
+    let azw3_path = "tests/fixtures/epictetus.azw3";
+    if !Path::new(azw3_path).exists() {
+        eprintln!("Skipping test - fixture not found: {azw3_path}");
+        return;
+    }
+
+    // Sanity check: the AZW3 importer leaves TOC entries with bare chapter
+    // hrefs at open time — every Enchiridion section initially points at
+    // part0002.html with no fragment.
+    {
+        let book = Book::open(azw3_path).expect("open epictetus.azw3");
+        let mut enchiridion_section_hrefs = Vec::new();
+        for entry in book.toc() {
+            if entry.title == "The Enchiridion" {
+                for child in &entry.children {
+                    enchiridion_section_hrefs.push(child.href.clone());
+                }
+                break;
+            }
+        }
+        assert!(
+            !enchiridion_section_hrefs.is_empty(),
+            "expected nested Enchiridion sections"
+        );
+        for href in &enchiridion_section_hrefs {
+            assert!(
+                !href.contains('#'),
+                "expected bare chapter href before resolve_toc, got {href}"
+            );
+        }
+    }
+
+    // Export to in-memory EPUB. The exporter must call `resolve_toc()`
+    // internally so the generated NCX has fragments populated.
+    let mut book = Book::open(azw3_path).expect("re-open epictetus.azw3");
+    let mut buf = Cursor::new(Vec::<u8>::new());
+    EpubExporter::new()
+        .export(&mut book, &mut buf)
+        .expect("epub export");
+
+    let epub_bytes = buf.into_inner();
+    let mut zip = ZipArchive::new(Cursor::new(epub_bytes)).expect("open epub zip");
+
+    let mut ncx = String::new();
+    zip.by_name("OEBPS/toc.ncx")
+        .expect("OEBPS/toc.ncx present")
+        .read_to_string(&mut ncx)
+        .expect("read ncx");
+
+    // Resolved entries get an `#aid-XXXX` fragment populated from the NCX
+    // index. The pre-fix exporter produced bare `partNNNN.html` hrefs with
+    // no fragment, so any non-trivial count here proves resolve_toc ran.
+    let aid_refs = ncx.matches("#aid-").count();
+    assert!(
+        aid_refs > 10,
+        "expected resolved #aid- fragments in NCX, got only {aid_refs}; \
+         resolve_toc likely didn't run during export. NCX:\n{ncx}",
+    );
+}