From b4a00f3b5c74d31071dc00258404694b9f8aa432 Mon Sep 17 00:00:00 2001 From: vreoo <74427567+vreoo@users.noreply.github.com> Date: Thu, 7 May 2026 22:33:08 +0300 Subject: [PATCH 1/3] Enhance EPUB writer to support multiple EPUB versions and improve HTML format detection. Added checks for EPUB format variants and updated transformation functions to accommodate versioning. Introduced tests for raw EPUB attributes rendering. --- src/Text/Pandoc/Writers/EPUB.hs | 39 +++++++++++++++++-------------- src/Text/Pandoc/Writers/HTML.hs | 10 ++++++++ test/command/8880.md | 41 +++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 17 deletions(-) create mode 100644 test/command/8880.md diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 030270571801..39d3f6274ec8 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -466,8 +466,8 @@ pandocToEPUB :: PandocMonad m pandocToEPUB version opts doc = do let doc' = ensureValidXmlIdentifiers doc -- handle pictures - Pandoc meta blocks <- walkM (transformInline opts) doc' >>= - walkM transformBlock + Pandoc meta blocks <- walkM (transformInline version opts) doc' >>= + walkM (transformBlock version) picEntries <- mapMaybe (snd . snd) <$> gets stMediaPaths epubSubdir <- gets stEpubSubdir @@ -1203,42 +1203,47 @@ getMediaNextNewName ext = do modify $ \st -> st { stMediaNextId = nextId + 1 } return $ "file" ++ show nextId ++ ext -isHtmlFormat :: Format -> Bool -isHtmlFormat (Format "html") = True -isHtmlFormat (Format "html4") = True -isHtmlFormat (Format "html5") = True -isHtmlFormat _ = False +isHtmlFormat :: EPUBVersion -> Format -> Bool +isHtmlFormat _ (Format "html") = True +isHtmlFormat _ (Format "html4") = True +isHtmlFormat _ (Format "html5") = True +isHtmlFormat _ (Format "epub") = True +isHtmlFormat EPUB2 (Format "epub2") = True +isHtmlFormat EPUB3 (Format "epub3") = True +isHtmlFormat _ _ = False transformBlock :: PandocMonad m - => Block + => EPUBVersion + -> Block -> E m Block -transformBlock (RawBlock fmt raw) - | isHtmlFormat fmt = do +transformBlock version (RawBlock fmt raw) + | isHtmlFormat version fmt = do let tags = parseTags raw tags' <- mapM transformTag tags return $ RawBlock fmt (renderTags' tags') -transformBlock b = return b +transformBlock _ b = return b transformInline :: PandocMonad m - => WriterOptions + => EPUBVersion + -> WriterOptions -> Inline -> E m Inline -transformInline _opts (Image attr@(_,_,kvs) lab (src,tit)) +transformInline _ _opts (Image attr@(_,_,kvs) lab (src,tit)) | isNothing (lookup "external" kvs) = do newsrc <- modifyMediaRef $ T.unpack src return $ Image attr lab ("../" <> newsrc, tit) -transformInline opts x@(Math t m) +transformInline _ opts x@(Math t m) | WebTeX url <- writerHTMLMathMethod opts = do newsrc <- modifyMediaRef (T.unpack (url <> urlEncode m)) let mathclass = if t == DisplayMath then "display" else "inline" return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] ("../" <> newsrc, "")] -transformInline _opts (RawInline fmt raw) - | isHtmlFormat fmt = do +transformInline version _opts (RawInline fmt raw) + | isHtmlFormat version fmt = do let tags = parseTags raw tags' <- mapM transformTag tags return $ RawInline fmt (renderTags' tags') -transformInline _ x = return x +transformInline _ _ x = return x (!) :: (t -> Element) -> [(Text, Text)] -> t -> Element (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 93473a327d5d..0f1d378a9110 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1795,10 +1795,20 @@ intrinsicEventsHTML4 = isRawHtml :: PandocMonad m => Format -> StateT WriterState m Bool isRawHtml f = do html5 <- gets stHtml5 + epubVersion <- gets stEPUBVersion return $ f == Format "html" || ((html5 && f == Format "html5") || f == Format "html4") || + isEpubFormat epubVersion f || isSlideVariant f +-- | Check to see if Format matches with an EPUB variant +isEpubFormat :: Maybe EPUBVersion -> Format -> Bool +isEpubFormat Nothing _ = False +isEpubFormat (Just EPUB2) f = + f == Format "epub" || f == Format "epub2" +isEpubFormat (Just EPUB3) f = + f == Format "epub" || f == Format "epub3" + -- | Check to see if Format matches with an HTML slide variant isSlideVariant :: Format -> Bool isSlideVariant f = f `elem` [Format "s5", Format "slidy", Format "slideous", diff --git a/test/command/8880.md b/test/command/8880.md new file mode 100644 index 000000000000..a165ad8ef3cf --- /dev/null +++ b/test/command/8880.md @@ -0,0 +1,41 @@ +Raw EPUB attributes are rendered in EPUB output. + +``` +% pandoc -f markdown -t epub --metadata title=Raw -o - | pandoc -f epub -t html +~~~ {=epub} +

ok

+~~~ +^D +

+

+
+

Raw

+

ok

+
+``` + +``` +% pandoc -f native -t epub --metadata title=Raw -o - | pandoc -f epub -t html +[ RawBlock (Format "epub") "

ok

" ] +^D +

+

+
+

Raw

+

ok

+
+``` + +``` +% pandoc -f markdown -t epub3 --metadata title=Raw -o - | pandoc -f epub -t html +~~~ {=epub3} +

ok

+~~~ +^D +

+

+
+

Raw

+

ok

+
+``` From 329ad27948e0897289493069b90eb12e77f22eee Mon Sep 17 00:00:00 2001 From: vreoo <74427567+vreoo@users.noreply.github.com> Date: Sun, 17 May 2026 08:37:34 +0300 Subject: [PATCH 2/3] Enhance test cases --- test/command/8880.md | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/test/command/8880.md b/test/command/8880.md index a165ad8ef3cf..f9b198c11cf0 100644 --- a/test/command/8880.md +++ b/test/command/8880.md @@ -1,10 +1,8 @@ Raw EPUB attributes are rendered in EPUB output. ``` -% pandoc -f markdown -t epub --metadata title=Raw -o - | pandoc -f epub -t html -~~~ {=epub} -

ok

-~~~ +% pandoc -f native -t epub --metadata title=Raw -o - | pandoc -f epub -t html +[ RawBlock (Format "epub") "

ok

" ] ^D

@@ -14,16 +12,15 @@ Raw EPUB attributes are rendered in EPUB output. ``` +Raw EPUB2 attributes are omitted from EPUB3 output. + ``` -% pandoc -f native -t epub --metadata title=Raw -o - | pandoc -f epub -t html -[ RawBlock (Format "epub") "

ok

" ] -^D -

-

-
-

Raw

+% pandoc -f markdown -t epub3 --metadata title=Raw -o - | pandoc -f epub -t html | grep ok +~~~ {=epub2}

ok

-
+~~~ +^D +=> 1 ``` ``` From f8a61dde994edd9fe9c8e2f6d78a1550b3f7c865 Mon Sep 17 00:00:00 2001 From: vreoo <74427567+vreoo@users.noreply.github.com> Date: Sun, 17 May 2026 11:43:42 +0300 Subject: [PATCH 3/3] Remove `grep` from test --- test/command/8880.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/command/8880.md b/test/command/8880.md index f9b198c11cf0..3aa9417b29d4 100644 --- a/test/command/8880.md +++ b/test/command/8880.md @@ -15,12 +15,16 @@ Raw EPUB attributes are rendered in EPUB output. Raw EPUB2 attributes are omitted from EPUB3 output. ``` -% pandoc -f markdown -t epub3 --metadata title=Raw -o - | pandoc -f epub -t html | grep ok +% pandoc -f markdown -t epub3 --metadata title=Raw -o - | pandoc -f epub -t html ~~~ {=epub2}

ok

~~~ ^D -=> 1 +

+

+
+

Raw

+
``` ```