From 7d4ed316a9d9a01a1cf56e914383f190f6bcaa53 Mon Sep 17 00:00:00 2001 From: smc181002 Date: Mon, 11 May 2026 00:04:43 +0530 Subject: [PATCH 01/29] Man reader: add auto_identifiers support Add support for the auto_identifiers, gfm_auto_identifiers, and ascii_identifiers extensions in the man reader. Section headings parsed from .SH and .SS macros now receive auto-generated id attributes when the extension is enabled, enabling --toc to produce working anchor links. - Add autoIdExtensions to default man extensions - Added HasReaderOptions, HasLogMessages and HasIdentifierList to ManState to run registerHeader - Use headerWith instead of header to attach the computed Attr with identifiers Closes #8852 --- src/Text/Pandoc/Extensions.hs | 1 + src/Text/Pandoc/Readers/Man.hs | 21 ++++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 7fcb214cdbf4..3d698edcbb38 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -664,4 +664,5 @@ getAllExtensions f = universalExtensions <> getAll f [ Ext_smart ] getAll "typst" = extensionsFromList [Ext_citations, Ext_smart] getAll "djot" = extensionsFromList [Ext_sourcepos] + getAll "man" = autoIdExtensions getAll _ = mempty diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index 0260b6e5d39b..a788cf2b2858 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -26,23 +26,40 @@ import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report) import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options +import Text.Pandoc.Parsing.Capabilities import Text.Pandoc.Parsing import Text.Pandoc.Walk (query) import Text.Pandoc.Readers.Roff -- TODO explicit imports import qualified Text.Pandoc.Parsing as P import qualified Data.Foldable as Foldable +import qualified Data.Set as Set import Text.Pandoc.Shared (extractSpaces) data ManState = ManState { readerOptions :: ReaderOptions + , manLogMessages :: []LogMessage + , manIdentifiers :: Set.Set T.Text , metadata :: Meta , tableCellsPlain :: Bool } deriving Show instance Default ManState where def = ManState { readerOptions = def + , manLogMessages = [] + , manIdentifiers = Set.empty , metadata = nullMeta , tableCellsPlain = True } +instance HasReaderOptions ManState where + extractReaderOptions = readerOptions + +instance HasLogMessages ManState where + addLogMessage msg st = st{ manLogMessages = msg : manLogMessages st } + getLogMessages st = reverse $ manLogMessages st + +instance HasIdentifierList ManState where + extractIdentifierList = manIdentifiers + updateIdentifierList f st = st{ manIdentifiers = f $ manIdentifiers st } + type ManParser m = P.ParsecT [RoffToken] ManState m @@ -415,7 +432,9 @@ parseHeader = do else return $ mconcat $ intersperse B.space $ map linePartsToInlines args let lvl = if name == "SH" then 1 else 2 - return $ header lvl contents + attr <- registerHeader nullAttr contents + + return $ B.headerWith attr lvl contents parseBlockQuote :: PandocMonad m => ManParser m Blocks parseBlockQuote = blockQuote <$> From e06bf5cf541cda57445b0f0a59375caee5ae5be0 Mon Sep 17 00:00:00 2001 From: smc181002 Date: Sun, 17 May 2026 14:49:44 +0530 Subject: [PATCH 02/29] Add tests for Man Reader with auto identifiers - The auto identifers are verified with 3 different test cases. - The same 3 tests cases are verified with GFM Auto identifiers algorithm. - The AsciiIdentifers option is additionally tested with one test case Closes #8852 --- test/Tests/Old.hs | 2 +- test/Tests/Readers/Man.hs | 52 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index ed5a3ef938bf..174cbd1cbee7 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -224,7 +224,7 @@ tests pandocPath = "creole-reader.txt" "creole-reader.native" ] , testGroup "man" - [ test' "reader" ["-r", "man", "-w", "native", "-s"] + [ test' "reader" ["-r", "man-auto_identifiers", "-w", "native", "-s"] "man-reader.man" "man-reader.native" ] , testGroup "org" diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs index bb5316b5c497..0030264c5818 100644 --- a/test/Tests/Readers/Man.hs +++ b/test/Tests/Readers/Man.hs @@ -22,7 +22,22 @@ import Text.Pandoc.Arbitrary () import Text.Pandoc.Builder man :: Text -> Pandoc -man = purely $ readMan def +man = purely $ readMan def { readerExtensions = + disableExtension Ext_auto_identifiers pandocExtensions } + +manAutoIds :: Text -> Pandoc +manAutoIds = purely $ readMan def { readerExtensions = + enableExtension Ext_auto_identifiers pandocExtensions } + +manGfmIds :: Text -> Pandoc +manGfmIds = purely $ readMan def { readerExtensions = + enableExtension Ext_gfm_auto_identifiers $ + enableExtension Ext_auto_identifiers pandocExtensions } + +manAsciiIds :: Text -> Pandoc +manAsciiIds = purely $ readMan def { readerExtensions = + enableExtension Ext_ascii_identifiers $ + enableExtension Ext_auto_identifiers pandocExtensions } infix 4 =: (=:) :: (ToString c, HasCallStack) @@ -47,10 +62,10 @@ tests = [ =?> para (strong (str "foo") <> emph (str "bar")) , "H1" =: ".SH The header\n" - =?> header 1 (text "The header") + =?> headerWith ("",[],[]) 1 (text "The header") , "H2" =: ".SS \"The header 2\"" - =?> header 2 (text "The header 2") + =?> headerWith ("",[],[]) 2 (text "The header 2") , "Macro args" =: ".B \"single arg with \"\"Q\"\"\"" =?>para (strong $ text "single arg with \"Q\"") @@ -140,5 +155,36 @@ tests = [ (TableHead nullAttr []) [TableBody nullAttr 0 [] $ map toRow [[plain $ text "a b c d"], [plain $ str "f"]]] (TableFoot nullAttr []) + ], + testGroup "AutoIdentifiers" [ + test manAutoIds "H1 with auto id" + (".SH The header\n" + =?> headerWith ("the-header",[],[]) 1 (text "The header")) + , test manAutoIds "H2 with auto id" + (".SS \"The header 2\"" + =?> headerWith ("the-header-2",[],[]) 2 (text "The header 2")) + , test manAutoIds "Multiple headers with auto ids" + (".SH First\n.SH Second\n.SH 3rd Header" + =?> headerWith ("first",[],[]) 1 (text "First") <> + headerWith ("second",[],[]) 1 (text "Second") <> + headerWith ("rd-header",[],[]) 1 (text "3rd Header")) + ], + testGroup "GFMAutoIdentifiers" [ + test manGfmIds "H1 with auto id" + (".SH The header\n" + =?> headerWith ("the-header",[],[]) 1 (text "The header")) + , test manGfmIds "H2 with auto id" + (".SS \"The header 2\"" + =?> headerWith ("the-header-2",[],[]) 2 (text "The header 2")) + , test manGfmIds "Multiple headers with auto ids" + (".SH First\n.SH Second\n.SH 3rd Header" + =?> headerWith ("first",[],[]) 1 (text "First") <> + headerWith ("second",[],[]) 1 (text "Second") <> + headerWith ("3rd-header",[],[]) 1 (text "3rd Header")) + ], + testGroup "ParseAsciiIdentifiers" [ + test manAsciiIds "H1 for autoid and non ascii chars in header" + (".SH Über den Flüssen\n" + =?> headerWith ("uber-den-flussen",[],[]) 1 (text "Über den Flüssen")) ] ] From 71fbfd32e015d2485d4f5514515eca8c57699c91 Mon Sep 17 00:00:00 2001 From: smc181002 Date: Sun, 17 May 2026 14:56:43 +0530 Subject: [PATCH 03/29] Updated MANUAL.txt to include Man under auto_identifiers --- MANUAL.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MANUAL.txt b/MANUAL.txt index fbe4bb7c4818..e22b577bc9ed 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -3668,13 +3668,13 @@ automatically assigned a unique identifier based on the heading text. This extension can be enabled/disabled for the following formats: input formats -: `markdown`, `latex`, `rst`, `mediawiki`, `textile` +: `markdown`, `latex`, `rst`, `mediawiki`, `textile`, `man` output formats : `markdown`, `muse` enabled by default in -: `markdown`, `muse` +: `markdown`, `muse`, `man` The default algorithm used to derive the identifier from the heading text is: From db13ea0238c6c7ca80d9062e364f7dc2a5719c6f Mon Sep 17 00:00:00 2001 From: smc181002 Date: Sun, 17 May 2026 15:17:21 +0530 Subject: [PATCH 04/29] fixed spacing in Extensions.hs --- src/Text/Pandoc/Extensions.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 3d698edcbb38..71b5da799d51 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -664,5 +664,5 @@ getAllExtensions f = universalExtensions <> getAll f [ Ext_smart ] getAll "typst" = extensionsFromList [Ext_citations, Ext_smart] getAll "djot" = extensionsFromList [Ext_sourcepos] - getAll "man" = autoIdExtensions + getAll "man" = autoIdExtensions getAll _ = mempty From 3b3bc70972162e97663e26bd4581cd5a2083ef30 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 11 May 2026 12:29:45 +0200 Subject: [PATCH 05/29] Add NAME heading to pandoc-lua, pandoc-server man pages. Closes #11634. --- doc/pandoc-lua.md | 4 ++++ doc/pandoc-server.md | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/doc/pandoc-lua.md b/doc/pandoc-lua.md index aa01b48db1f4..4a78a857a42f 100644 --- a/doc/pandoc-lua.md +++ b/doc/pandoc-lua.md @@ -4,6 +4,10 @@ section: 1 date: September 22, 2022 --- +# NAME + +pandoc-lua - Lua interface to pandoc API + # SYNOPSIS `pandoc-lua` [*options*] [*script* [*args*]] diff --git a/doc/pandoc-server.md b/doc/pandoc-server.md index 8289314eb719..4e138ca57aff 100644 --- a/doc/pandoc-server.md +++ b/doc/pandoc-server.md @@ -4,6 +4,10 @@ section: 1 date: August 15, 2022 --- +# NAME + +pandoc-server - web server exposing pandoc API + # SYNOPSIS `pandoc-server` [*options*] From 92aa56011514a085aba3990fa6f7c1be7870766c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 11 May 2026 12:30:20 +0200 Subject: [PATCH 06/29] Roff reader: handle `\` line continuation in table cells. Closes #11635. --- src/Text/Pandoc/Readers/Roff.hs | 3 ++- test/command/11635.md | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 test/command/11635.md diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs index 607b7c92239c..134ee4c94292 100644 --- a/src/Text/Pandoc/Readers/Roff.hs +++ b/src/Text/Pandoc/Readers/Roff.hs @@ -268,7 +268,8 @@ tableCell = do manyTill anyChar (try (string "T}")) simpleCell = do tabChar <- tableTabChar <$> getState - many (notFollowedBy (char tabChar <|> newline) >> anyChar) + many $ (char '\\' >> anyChar) + <|> (notFollowedBy (char tabChar <|> newline) >> anyChar) tableRow :: PandocMonad m => RoffLexer m [RoffTokens] tableRow = do diff --git a/test/command/11635.md b/test/command/11635.md new file mode 100644 index 000000000000..1639c82f68ca --- /dev/null +++ b/test/command/11635.md @@ -0,0 +1,28 @@ +``` +% pandoc -f man -t html +.TH "TEST" "1" "2026-05-08" "test v1.0.0" "test manual" +.SH HEADING +.TS +tab(;); +l l. +this is a table element \ +written in two lines;abcdefg +another \ +one;123456 +.TE +^D +

HEADING

+ + + + + + + + + + + +
this is a table element written in two +linesabcdefg
another one123456
+``` From 0b935b731464fa48d0d0fcbfe4516cbfaab7f0ec Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 12 May 2026 14:50:59 +0200 Subject: [PATCH 07/29] Translations: find lang-script type translations. E.g. for zh-Hant-TW look for (in order) zh-Hant-TW.yaml, zh-Hant.yaml, zh.yaml. Closes #11648. --- src/Text/Pandoc/Translations.hs | 43 +++++++++++++++------------------ 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs index 00562e1896f9..ef149683df3a 100644 --- a/src/Text/Pandoc/Translations.hs +++ b/src/Text/Pandoc/Translations.hs @@ -23,7 +23,7 @@ import Text.Pandoc.Translations.Types import Text.Pandoc.Class (PandocMonad(..), toTextM, report) import Text.Pandoc.Class.CommonState (CommonState(..)) import Text.Pandoc.Data (readDataFile) -import Text.Pandoc.Error (PandocError(..)) +import Data.Containers.ListUtils (nubOrd) import Text.Pandoc.Logging (LogMessage(..)) import Control.Monad.Except (catchError) import qualified Data.Text as T @@ -56,34 +56,29 @@ getTranslations = do Nothing -> return mempty -- no language defined Just (_, Just t) -> return t Just (lang, Nothing) -> do -- read from file - let translationFile = "translations/" <> renderLang lang <> ".yaml" - let fallbackFile = "translations/" <> langLanguage lang <> ".yaml" - let getTrans fp = do - txt <- readDataFile fp >>= toTextM fp - case readTranslations txt of - Left e -> do + let translationFiles = map (\x -> "translations/" <> T.unpack x <> ".yaml") + (nubOrd [renderLang lang, + langLanguage lang <> maybe "" ("-" <>) (langScript lang), + langLanguage lang]) + let getTrans [] = return mempty + getTrans (fp:fps) = do + result <- catchError (Right <$> (readDataFile fp >>= toTextM fp)) + (\_ -> pure (Left "")) + case result >>= readTranslations of + Left e + | null fps -> do report $ CouldNotLoadTranslations (renderLang lang) - (T.pack fp <> ": " <> e) + (T.pack fp <> ": " <> e) -- make sure we don't try again... modifyCommonState $ \st -> st{ stTranslations = Nothing } return mempty - Right t -> do - modifyCommonState $ \st -> - st{ stTranslations = Just (lang, Just t) } - return t - catchError (getTrans $ T.unpack translationFile) - (\_ -> - catchError (getTrans $ T.unpack fallbackFile) - (\e -> do - report $ CouldNotLoadTranslations (renderLang lang) - $ case e of - PandocCouldNotFindDataFileError _ -> - "data file " <> fallbackFile <> " not found" - _ -> "" - -- make sure we don't try again... - modifyCommonState $ \st -> st{ stTranslations = Nothing } - return mempty)) + | otherwise -> getTrans fps + Right t -> do + modifyCommonState $ \st -> + st{ stTranslations = Just (lang, Just t) } + return t + getTrans translationFiles -- | Get a translation from the current term map. -- Issue a warning if the term is not defined. From ccf272bc85a1f3a2837e017aea91e17709eb4a5a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 12 May 2026 14:59:31 +0200 Subject: [PATCH 08/29] Docx reader: improve treatment of tblHeader element. If tblHeader exists but has `w:val="0"`, then don't consider the element a header. See #8299...but this change doesn't seem to fix things completely. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index ab8641c2ef20..2c9e029cfe9a 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -764,9 +764,12 @@ elemToRow ns element | isElem ns "w" "tr" element = let cellElems = findChildrenByName ns "w" "tc" element let beforeCells = genericReplicate (fromMaybe 0 gridBefore) emptyCell cells <- mapD (elemToCell ns) cellElems - let hasTblHeader = maybe NoTblHeader (const HasTblHeader) - (properties - >>= findChildByName ns "w" "tblHeader") + let hasTblHeader = + case (properties >>= findChildByName ns "w" "tblHeader") of + Nothing -> NoTblHeader + Just he -> case findAttrByName ns "w" "val" he of + Just "0" -> NoTblHeader + _ -> HasTblHeader return $ Row hasTblHeader (beforeCells ++ cells) elemToRow _ _ = throwError WrongElem From 428be28cd9a305ba346da2c7b718b866ff9db36a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 12 May 2026 15:33:45 +0200 Subject: [PATCH 09/29] Docx reader: fix bug in bitmask checking. This led to some table rows being wrongly considered header rows. We now correctly handle the example from https://github.com/jgm/pandoc/issues/8299#issuecomment-4397472060 See #8299. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 2c9e029cfe9a..150e5011b258 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -67,7 +67,7 @@ import Control.Monad import Control.Monad.Except import Control.Monad.Reader import Control.Monad.State.Strict -import Data.Bits ((.|.)) +import Data.Bits ((.&.)) import qualified Data.ByteString.Lazy as B import Data.Char (chr, ord, readLitChar) import Data.List @@ -807,7 +807,7 @@ testBitMask :: Text -> Int -> Bool testBitMask bitMaskS n = case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of [] -> False - ((n', _) : _) -> (n' .|. n) /= 0 + ((n', _) : _) -> (n' .&. n) /= 0 pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int) pHeading = getParStyleField headingLev . pStyle From 91a47d20326f8b3ad05624c5faabe2e8eb325be1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 13 May 2026 17:15:34 +0200 Subject: [PATCH 10/29] Error messages: use single quotes around paths and format names. Closes #11645. --- pandoc-lua-engine/test/lua/module/pandoc.lua | 12 +++--- src/Text/Pandoc/Error.hs | 41 +++++++++++--------- test/command/5876.md | 4 +- test/command/7861.md | 2 +- 4 files changed, 31 insertions(+), 28 deletions(-) diff --git a/pandoc-lua-engine/test/lua/module/pandoc.lua b/pandoc-lua-engine/test/lua/module/pandoc.lua index f9c58f5aa5a0..b8df9c93a725 100644 --- a/pandoc-lua-engine/test/lua/module/pandoc.lua +++ b/pandoc-lua-engine/test/lua/module/pandoc.lua @@ -273,7 +273,7 @@ return { test('unsupported extension', function () assert.error_matches( function () pandoc.read('foo', 'gfm+empty_paragraphs') end, - 'The extension empty_paragraphs is not supported for gfm' + 'The extension \'empty_paragraphs\' is not supported for gfm' ) end), test('read with other indented code classes', function() @@ -297,7 +297,7 @@ return { test('failing read', function () assert.error_matches( function () pandoc.read('foo', 'nosuchreader') end, - 'Unknown input format nosuchreader' + 'Unknown input format \'nosuchreader\'' ) end), group 'read_env' { @@ -364,21 +364,21 @@ return { test('unsupported extension', function () assert.error_matches( function () pandoc.read('foo', 'gfm+empty_paragraphs') end, - 'The extension empty_paragraphs is not supported for gfm' + 'The extension \'empty_paragraphs\' is not supported for gfm' ) end), test('unknown extension', function () local format_spec = { format = 'markdown', extensions = {'nope'}} assert.error_matches( function () pandoc.read('x', format_spec) end, - 'The extension nope is not supported for markdown' + 'The extension \'nope\' is not supported for markdown' ) end), test('fails on invalid extension', function () local format_spec = { format = 'markdown', extensions = {'nope'}} assert.error_matches( function () pandoc.read('nu-uh', format_spec) end, - 'The extension nope is not supported for markdown' + 'The extension \'nope\' is not supported for markdown' ) end), }, @@ -458,7 +458,7 @@ return { local format_spec = { format = 'plain', extensions = {'nope'}} assert.error_matches( function () pandoc.write(doc, format_spec) end, - 'The extension nope is not supported for plain' + 'The extension \'nope\' is not supported for plain' ) end), }, diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 22f613c1ecc1..d5fde4827362 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -86,40 +86,41 @@ renderError e = PandocSyntaxMapError s -> s PandocFailOnWarningError -> "Failing because there were warnings." PandocPDFProgramNotFoundError pdfprog -> - pdfprog <> " not found. Please select a different --pdf-engine or install " <> pdfprog + quote pdfprog <> " not found. Please select a different --pdf-engine or install " + <> quote pdfprog PandocPDFError logmsg -> "Error producing PDF.\n" <> logmsg PandocXMLError fp logmsg -> "Invalid XML" <> - (if T.null fp then "" else " in " <> fp) <> ":\n" <> logmsg + (if T.null fp then "" else " in " <> quote fp) <> ":\n" <> logmsg PandocFilterError filtername msg -> "Error running filter " <> - filtername <> ":\n" <> msg + quote filtername <> ":\n" <> msg PandocLuaError msg -> "Error running Lua:\n" <> msg PandocNoScriptingEngine -> "This version of pandoc has been compiled " <> "without Lua support." PandocCouldNotFindDataFileError fn -> - "Could not find data file " <> fn + "Could not find data file " <> quote fn PandocCouldNotFindMetadataFileError fn -> - "Could not find metadata file " <> fn + "Could not find metadata file " <> quote fn PandocResourceNotFound fn -> - "File " <> fn <> " not found in resource path" - PandocTemplateError s -> "Error compiling template " <> s - PandocNoTemplateError fp -> "No template defined in " <> fp + "File " <> quote fn <> " not found in resource path" + PandocTemplateError s -> "Error compiling template " <> quote s + PandocNoTemplateError fp -> "No template defined in " <> quote fp PandocAppError s -> s PandocEpubSubdirectoryError s -> - "EPUB subdirectory name '" <> s <> "' contains illegal characters" + "EPUB subdirectory name " <> quote s <> " contains illegal characters" PandocMacroLoop s -> "Loop encountered in expanding macro " <> s PandocUTF8DecodingError f offset w -> - "UTF-8 decoding error in " <> f <> " at byte offset " <> tshow offset <> - " (" <> T.pack (printf "%2x" w) <> ").\n" <> - "The input must be a UTF-8 encoded text." + "UTF-8 decoding error in " <> quote f <> " at byte offset " + <> tshow offset <> " (" <> T.pack (printf "%2x" w) <> ").\n" + <> "The input must be a UTF-8 encoded text." PandocIpynbDecodingError w -> "ipynb decoding error: " <> w PandocUnsupportedCharsetError charset -> - "Unsupported charset " <> charset + "Unsupported charset " <> quote charset PandocFormatError format s -> - "Error parsing format " <> tshow format <> ": " <> s + "Error parsing format " <> quote format <> ": " <> s PandocUnknownReaderError r -> - "Unknown input format " <> r <> + "Unknown input format " <> quote r <> case r of "doc" -> "\nPandoc can convert from DOCX, but not from DOC." <> "\nTry using Word to save your DOC file as DOCX," <> @@ -127,7 +128,7 @@ renderError e = "pdf" -> "\nPandoc can convert to PDF, but not from PDF." _ -> "" PandocUnknownWriterError w -> - "Unknown output format " <> w <> + "Unknown output format " <> quote w <> case w of "pdf" -> "To create a pdf using pandoc, use" <> " -t latex|beamer|context|ms|html5|typst" <> @@ -136,21 +137,23 @@ renderError e = "doc" -> "\nPandoc can convert to DOCX, but not to DOC." _ -> "" PandocUnsupportedExtensionError ext f -> - "The extension " <> ext <> " is not supported " <> + "The extension " <> quote ext <> " is not supported " <> "for " <> f <> ".\nUse --list-extensions=" <> f <> " to " <> "list supported extensions." PandocCiteprocError e' -> prettyCiteprocError e' PandocBibliographyError fp msg -> - "Error reading bibliography file " <> fp <> ":\n" <> msg + "Error reading bibliography file " <> quote fp <> ":\n" <> msg PandocInputNotTextError fp -> "Expected text as an input, but received binary data from " <> (if T.null fp then "stdin" - else "file " <> fp) <> + else "file " <> quote fp) <> ".\nIf you intended to convert from binary format, verify that it's " <> "supported and use\nexplicit -f FORMAT." +quote :: Text -> Text +quote s = "'" <> s <> "'" -- | Handle PandocError by exiting with an error message. handleError :: Either PandocError a -> IO a diff --git a/test/command/5876.md b/test/command/5876.md index a8ede1887584..13e3614d7d0c 100644 --- a/test/command/5876.md +++ b/test/command/5876.md @@ -48,13 +48,13 @@ Pandoc % pandoc -s -t native --data-dir=command/5876 --metadata-file=does-not-exist.yaml Hello ^D -2> Could not find metadata file does-not-exist.yaml +2> Could not find metadata file 'does-not-exist.yaml' => 98 ``` ``` % pandoc -s -t native --metadata-file=does-not-exist.yaml Hello ^D -2> Could not find metadata file does-not-exist.yaml +2> Could not find metadata file 'does-not-exist.yaml' => 98 ``` diff --git a/test/command/7861.md b/test/command/7861.md index a5b68de6fd69..31aa7e2fb09b 100644 --- a/test/command/7861.md +++ b/test/command/7861.md @@ -2,6 +2,6 @@ % pandoc -s -t native --data-dir=command/7861 --metadata-file=../../7861.yaml Hello ^D -2> Could not find metadata file ../../7861.yaml +2> Could not find metadata file '../../7861.yaml' => 98 ``` From a02b3bd6c74208b537121742afc4cea1b6df7ab4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 13 May 2026 17:29:12 +0200 Subject: [PATCH 11/29] MANUAL: improve description of reference links. See #11643. --- MANUAL.txt | 54 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/MANUAL.txt b/MANUAL.txt index e22b577bc9ed..64884e69a6f4 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -5657,21 +5657,33 @@ before or after the link). The link consists of link text in square brackets, followed by a label in square brackets. (There cannot be space between the two unless the -`spaced_reference_links` extension is enabled.) The link definition -consists of the bracketed label, followed by a colon and a space, followed by -the URL, and optionally (after a space) a link title either in quotes or in -parentheses. The label must not be parseable as a citation (assuming -the `citations` extension is enabled): citations take precedence over -link labels. +`spaced_reference_links` extension is enabled.) If the label is empty +(`[]`), then it will be implicitly be taken to be the same as the link text; +thus `[foo][]` is equivalent to `[foo][foo]`. (If the +`shortcut_reference_links` extension is enabled, the empty `[]` +may be omitted.) -Here are some examples: +The link definition consists of the bracketed label, followed by +a colon and a space, followed by the URL, and optionally (after a +space) a link title either in quotes or in parentheses. The label +must not be parseable as a citation (assuming the `citations` +extension is enabled): citations take precedence over link +labels. - [my label 1]: /foo/bar.html "My title, optional" - [my label 2]: /foo - [my label 3]: https://fsf.org (The Free Software Foundation) - [my label 4]: /bar#special 'A title in single quotes' +Here are some examples of reference links and link definitions; -The URL may optionally be surrounded by angle brackets: + See [the website *I* built][my website]. + + See [my website][] and [the bar page][1] and + the [home page of the FSF][fsf]. + + [my website]: http://foo.bar.baz + [1]: /foo/bar.html "My title, optional" + [fsf]: https://fsf.org (The Free Software Foundation) + [special page]: /bar#special 'A title in single quotes' + +The URL in a link definition may optionally be surrounded by +angle brackets: [my label 5]: @@ -5686,18 +5698,14 @@ Note that link labels are not case sensitive. So, this will work: [Foo]: /bar/baz -In an *implicit* reference link, the second pair of brackets is -empty: - - See [my website][]. - - [my website]: http://foo.bar.baz +The link definition may come either before or after a +reference link that uses the label. -Note: In `Markdown.pl` and most other Markdown implementations, -reference link definitions cannot occur in nested constructions -such as list items or block quotes. Pandoc lifts this -arbitrary-seeming restriction. So the following is fine in pandoc, -though not in most other implementations: +Note: In some Markdown implementations, reference link +definitions cannot occur in nested constructions such as list +items or block quotes. Pandoc lifts this arbitrary-seeming +restriction. So the following is fine in pandoc, though not in +all implementations: > My block [quote]. > From fa0cf7636b36a29136f4854d66bd0312b91ba376 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 14 May 2026 10:37:50 +0200 Subject: [PATCH 12/29] HTML reader: parse aside as a Div. (Instead of using raw HTML.) The "aside" class is added to the Div. Also, add "header" class to Divs created from headers. See #11626. --- src/Text/Pandoc/Readers/HTML.hs | 17 ++++++++++------- test/Tests/Readers/HTML.hs | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 9178ab8480da..93b1e83d7346 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -35,7 +35,6 @@ import Data.Foldable (for_) import Data.List.Split (splitWhen) import qualified Data.List as L import qualified Data.Map as M -import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Either (partitionEithers) import Data.Monoid (First (..)) import qualified Data.Set as Set @@ -70,6 +69,7 @@ import Text.Pandoc.URI (escapeURI) import Text.Pandoc.Walk import Text.TeXMath (readMathML, writeTeX) import qualified Data.Sequence as Seq +import Data.Maybe (fromMaybe, isJust) -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: (PandocMonad m, ToSources a) @@ -237,6 +237,7 @@ block = ((do -> pLineBlock | otherwise -> pDiv + "aside" -> pDiv "section" -> pDiv "header" -> pDiv "main" -> pDiv @@ -489,6 +490,7 @@ isDivLike "div" = True isDivLike "section" = True isDivLike "header" = True isDivLike "main" = True +isDivLike "aside" = True isDivLike _ = False pDiv :: PandocMonad m => TagParser m Blocks @@ -502,12 +504,13 @@ pDiv = try $ do | hident == ident -> B.Many $ Header lev ("",hclasses,hkvs) ils Seq.<| rest _ -> contents - let classes' = if tag == "section" - then "section":classes - else classes - kvs' = if tag == "main" && isNothing (lookup "role" kvs) - then ("role", "main"):kvs - else kvs + let (classes', kvs') = + case tag of + "section" -> ("section":classes, kvs) + "aside" -> ("aside":classes, kvs) + "header" -> ("header":classes, kvs) + "main" | Nothing <- lookup "role" kvs -> (classes, ("role", "main"):kvs) + _ -> (classes, kvs) return $ B.divWith (ident, classes', kvs') contents' pIframe :: PandocMonad m => TagParser m Blocks diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs index 7c075de0b3de..4ad6d596973c 100644 --- a/test/Tests/Readers/HTML.hs +++ b/test/Tests/Readers/HTML.hs @@ -124,7 +124,7 @@ tests = [ testGroup "base tag" , testGroup "header" [ test htmlNativeDivs "
is parsed as a div" $ "
Title
" =?> - divWith ("title", mempty, mempty) (plain "Title") + divWith ("title", ["header"], mempty) (plain "Title") ] , testGroup "code block" [ test html "attributes in pre > code element" $ From ae93b4f3ccbb0eb7e3c8b3555a99bf4e47f41aa5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 14 May 2026 15:30:32 +0200 Subject: [PATCH 13/29] Use latest citeproc. --- cabal.project | 5 +++++ stack.yaml | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cabal.project b/cabal.project index 82e5ec061518..47247958bbac 100644 --- a/cabal.project +++ b/cabal.project @@ -27,6 +27,11 @@ source-repository-package location: https://github.com/jgm/texmath.git tag: 0a3fbebc5d0e21769f01b048eb63e1451ccf0e1a +source-repository-package + type: git + location: https://github.com/jgm/citeproc.git + tag: 1b684f1e06fc1093d20c1a2d474f4c3fdf2f65bd + package pandoc flags: +embed_data_files +http diff --git a/stack.yaml b/stack.yaml index 8134a2612049..d92d445c349b 100644 --- a/stack.yaml +++ b/stack.yaml @@ -21,7 +21,6 @@ extra-deps: - hslua-packaging-2.4.1 - hslua-typing-0.2.0 - pandoc-lua-marshal-0.3.2.1 -- citeproc-0.13 - skylighting-format-blaze-html-0.1.2 - djot-0.1.4 - asciidoc-0.1.0.2 @@ -32,6 +31,8 @@ extra-deps: commit: 6e97668c9f2ffea09f3187c34b7641038370fd21 - git: https://github.com/jgm/texmath.git commit: 0a3fbebc5d0e21769f01b048eb63e1451ccf0e1a +- git: https://github.com/jgm/citeproc.git + commit: 1b684f1e06fc1093d20c1a2d474f4c3fdf2f65bd ghc-options: "$locals": -fhide-source-paths -Wno-missing-home-modules From c94a324ed08d239560518cce8363a1d0cd9e029c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 15 May 2026 15:25:14 +0200 Subject: [PATCH 14/29] Typst writer: add zero-width space before a Span label... ...if otherwise the label doesn't come after anything. (In this case typst will raise an error.) Closes #11568. --- src/Text/Pandoc/Writers/Typst.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Text/Pandoc/Writers/Typst.hs b/src/Text/Pandoc/Writers/Typst.hs index 1aa59767d257..0e324b6ae136 100644 --- a/src/Text/Pandoc/Writers/Typst.hs +++ b/src/Text/Pandoc/Writers/Typst.hs @@ -414,6 +414,10 @@ listItemToTypst ind marker blocks = do return $ hang ind (marker <> space) contents inlinesToTypst :: PandocMonad m => [Inline] -> TW m (Doc Text) +inlinesToTypst (i@(Span (ident,_,_) _):is) | not (T.null ident) = + -- insert a zero-width space U+200B before the label + -- because a typst label refers to preceding element (see #11568) + ("\x200B" <>) . hcat <$> mapM inlineToTypst (escapeParens (i:is)) inlinesToTypst ils = hcat <$> mapM inlineToTypst (escapeParens ils) -- Add an escape before a parenthesis right after a non-space element. From 039663e1909f6ac67ffc7b7c795faa2007684074 Mon Sep 17 00:00:00 2001 From: nibras shami <74427567+vreoo@users.noreply.github.com> Date: Sun, 17 May 2026 12:02:34 +0300 Subject: [PATCH 15/29] EPUB writer: support multiple EPUB versions for raw content (#11628). This change ensures that raw content marked `epub2` will appear in (only) EPUBv2 output and content marked `epub3` will appear in (only) EPUBv3 output. --- src/Text/Pandoc/Writers/EPUB.hs | 39 +++++++++++++++++------------- src/Text/Pandoc/Writers/HTML.hs | 10 ++++++++ test/command/8880.md | 42 +++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 17 deletions(-) create mode 100644 test/command/8880.md diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 030270571801..39d3f6274ec8 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -466,8 +466,8 @@ pandocToEPUB :: PandocMonad m pandocToEPUB version opts doc = do let doc' = ensureValidXmlIdentifiers doc -- handle pictures - Pandoc meta blocks <- walkM (transformInline opts) doc' >>= - walkM transformBlock + Pandoc meta blocks <- walkM (transformInline version opts) doc' >>= + walkM (transformBlock version) picEntries <- mapMaybe (snd . snd) <$> gets stMediaPaths epubSubdir <- gets stEpubSubdir @@ -1203,42 +1203,47 @@ getMediaNextNewName ext = do modify $ \st -> st { stMediaNextId = nextId + 1 } return $ "file" ++ show nextId ++ ext -isHtmlFormat :: Format -> Bool -isHtmlFormat (Format "html") = True -isHtmlFormat (Format "html4") = True -isHtmlFormat (Format "html5") = True -isHtmlFormat _ = False +isHtmlFormat :: EPUBVersion -> Format -> Bool +isHtmlFormat _ (Format "html") = True +isHtmlFormat _ (Format "html4") = True +isHtmlFormat _ (Format "html5") = True +isHtmlFormat _ (Format "epub") = True +isHtmlFormat EPUB2 (Format "epub2") = True +isHtmlFormat EPUB3 (Format "epub3") = True +isHtmlFormat _ _ = False transformBlock :: PandocMonad m - => Block + => EPUBVersion + -> Block -> E m Block -transformBlock (RawBlock fmt raw) - | isHtmlFormat fmt = do +transformBlock version (RawBlock fmt raw) + | isHtmlFormat version fmt = do let tags = parseTags raw tags' <- mapM transformTag tags return $ RawBlock fmt (renderTags' tags') -transformBlock b = return b +transformBlock _ b = return b transformInline :: PandocMonad m - => WriterOptions + => EPUBVersion + -> WriterOptions -> Inline -> E m Inline -transformInline _opts (Image attr@(_,_,kvs) lab (src,tit)) +transformInline _ _opts (Image attr@(_,_,kvs) lab (src,tit)) | isNothing (lookup "external" kvs) = do newsrc <- modifyMediaRef $ T.unpack src return $ Image attr lab ("../" <> newsrc, tit) -transformInline opts x@(Math t m) +transformInline _ opts x@(Math t m) | WebTeX url <- writerHTMLMathMethod opts = do newsrc <- modifyMediaRef (T.unpack (url <> urlEncode m)) let mathclass = if t == DisplayMath then "display" else "inline" return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] ("../" <> newsrc, "")] -transformInline _opts (RawInline fmt raw) - | isHtmlFormat fmt = do +transformInline version _opts (RawInline fmt raw) + | isHtmlFormat version fmt = do let tags = parseTags raw tags' <- mapM transformTag tags return $ RawInline fmt (renderTags' tags') -transformInline _ x = return x +transformInline _ _ x = return x (!) :: (t -> Element) -> [(Text, Text)] -> t -> Element (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 93473a327d5d..0f1d378a9110 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1795,10 +1795,20 @@ intrinsicEventsHTML4 = isRawHtml :: PandocMonad m => Format -> StateT WriterState m Bool isRawHtml f = do html5 <- gets stHtml5 + epubVersion <- gets stEPUBVersion return $ f == Format "html" || ((html5 && f == Format "html5") || f == Format "html4") || + isEpubFormat epubVersion f || isSlideVariant f +-- | Check to see if Format matches with an EPUB variant +isEpubFormat :: Maybe EPUBVersion -> Format -> Bool +isEpubFormat Nothing _ = False +isEpubFormat (Just EPUB2) f = + f == Format "epub" || f == Format "epub2" +isEpubFormat (Just EPUB3) f = + f == Format "epub" || f == Format "epub3" + -- | Check to see if Format matches with an HTML slide variant isSlideVariant :: Format -> Bool isSlideVariant f = f `elem` [Format "s5", Format "slidy", Format "slideous", diff --git a/test/command/8880.md b/test/command/8880.md new file mode 100644 index 000000000000..3aa9417b29d4 --- /dev/null +++ b/test/command/8880.md @@ -0,0 +1,42 @@ +Raw EPUB attributes are rendered in EPUB output. + +``` +% pandoc -f native -t epub --metadata title=Raw -o - | pandoc -f epub -t html +[ RawBlock (Format "epub") "

ok

" ] +^D +

+

+
+

Raw

+

ok

+
+``` + +Raw EPUB2 attributes are omitted from EPUB3 output. + +``` +% pandoc -f markdown -t epub3 --metadata title=Raw -o - | pandoc -f epub -t html +~~~ {=epub2} +

ok

+~~~ +^D +

+

+
+

Raw

+
+``` + +``` +% pandoc -f markdown -t epub3 --metadata title=Raw -o - | pandoc -f epub -t html +~~~ {=epub3} +

ok

+~~~ +^D +

+

+
+

Raw

+

ok

+
+``` From 1137a2cee9c0f9045734d9fe5ac8333ba77a32ea Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 23 May 2026 18:09:01 +0200 Subject: [PATCH 16/29] gridTable: fix calculation of column widths for default columns. This fixes a bug which produced too-narrow columns in some cases. Closes #11664. --- src/Text/Pandoc/Writers/Shared.hs | 69 ++++++++++++++++++++++--------- test/command/11664.md | 35 ++++++++++++++++ 2 files changed, 84 insertions(+), 20 deletions(-) create mode 100644 test/command/11664.md diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 42b33a64b1b0..3aca2e39f70c 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -63,7 +63,7 @@ import Control.Monad (MonadPlus, mzero) import Data.Either (isRight) import Data.Aeson (ToJSON (..), encode) import Data.Char (chr, ord, isSpace, isLetter, isUpper) -import Data.List (groupBy, intersperse, transpose) +import Data.List (groupBy, intersperse, transpose, zipWith4) import qualified Data.List as L import Data.List.NonEmpty (NonEmpty((:|))) import Data.Text.Conversions (FromText(..)) @@ -322,9 +322,18 @@ gridTable opts blocksToDoc colspecs' thead' tbodies' tfoot' = do (setTopBorder DoubleLine . setBottomBorder DoubleLine) footCells pure $ gridRows $ redoWidths opts colspecs rows +data ColWidthInfo = + ColWidthInfo + { colWidthSpecified :: Int + , colWidthFull :: Int + , colWidthMin :: Int + , colWidthUsed :: Int } + deriving (Show, Ord, Eq) + -- Returns (current widths, full widths, min widths) -extractColWidths :: WriterOptions -> [[RenderedCell Text]] -> ([Int], [Int], [Int]) -extractColWidths opts rows = (currentwidths, fullwidths, minwidths) +extractColWidths :: WriterOptions -> [[RenderedCell Text]] -> [ColWidthInfo] +extractColWidths opts rows = + zipWith4 ColWidthInfo specifiedwidths fullwidths minwidths currentwidths where getWidths calcOffset = map (fromMaybe 0 . maximumMay) (transpose (map (concatMap (getCellWidths calcOffset)) rows)) @@ -332,11 +341,15 @@ extractColWidths opts rows = (currentwidths, fullwidths, minwidths) (calcOffset c `div` (cellColSpan c) + calcOffset c `rem` (cellColSpan c)) fullwidths = getWidths (max 1 . offset . cellContents) - currentwidths = getWidths cellWidth minwidths = case writerWrapText opts of WrapNone -> fullwidths _ -> getWidths (minOffset . cellContents) + specifiedwidths = getWidths cellWidth + currentwidths = zipWith (\specw minw -> if specw == 0 + then 0 + else max specw minw) + specifiedwidths minwidths resetWidths :: [Int] -> [RenderedCell Text] -> [RenderedCell Text] resetWidths _ [] = [] @@ -353,21 +366,37 @@ redoWidths _ _ [] = [] redoWidths opts colspecs rows = map (resetWidths newwidths) rows where numcols = length colspecs - isSimple = all ((== ColWidthDefault) . snd) colspecs - (actualwidths, fullwidths, minwidths) = extractColWidths opts rows - totwidth = writerColumns opts - (3 * numcols) - 1 - evenwidth = totwidth `div` numcols + totwidth `rem` numcols - keepwidths = filter (< evenwidth) fullwidths - evenwidth' = (totwidth - sum keepwidths) `div` - (numcols - length keepwidths) - ensureMinWidths = zipWith max minwidths - newwidths = ensureMinWidths $ - case isSimple of - True | sum fullwidths <= totwidth -> fullwidths - | otherwise -> map (\w -> if w < evenwidth - then w - else evenwidth') fullwidths - False -> actualwidths + widthInfos = extractColWidths opts rows + colsAvailable = writerColumns opts - (3 * numcols) - 1 + -- now, we need to change colWidthUsed 0 to an appropriate number; + -- either the full width if it fits easily, or an appropriate fraction + -- of the remaining width. This must be done recursively, because + -- once we decide that the full width can fit, that may leave less + -- space for the remaining columns with default width to fill. + recalculateWidths finalRun numRuns infos = + let numUnassigned = length (filter ((== 0) . colWidthUsed) infos) + unusedCols = colsAvailable - sum (map colWidthUsed infos) + defwidth = if numUnassigned == 0 + then 0 + else unusedCols `div` numUnassigned + infos' = map (\info -> if colWidthUsed info == 0 + then if finalRun + then info{ colWidthUsed = + max (colWidthMin info) defwidth } + else if colWidthFull info <= defwidth + then info{ colWidthUsed = colWidthFull info } + else info + else info) infos + in if finalRun || numRuns > 4 + then infos' + else if infos == infos' + then recalculateWidths True (numRuns + 1) infos + -- run again, filling in unassigned widths with a fraction of the + -- remaining width + else recalculateWidths False (numRuns + 1) infos' + -- run again, filling in unassigned widths only if the full with + -- would be less than the fraction of remaining width + newwidths = map colWidthUsed $ recalculateWidths False (1 :: Int) widthInfos makeDummy :: RenderedCell Text -> RenderedCell Text makeDummy c = @@ -534,7 +563,7 @@ getColWidth (_, ColWidthDefault) = 0 -- TODO? toCharWidth :: WriterOptions -> Double -> Int toCharWidth opts width = - max 1 (floor (width * fromIntegral (writerColumns opts)) - 3) + max 0 (floor (width * fromIntegral (writerColumns opts)) - 3) gridRow :: (Monad m, HasChars a) => WriterOptions diff --git a/test/command/11664.md b/test/command/11664.md new file mode 100644 index 000000000000..7076a529f130 --- /dev/null +++ b/test/command/11664.md @@ -0,0 +1,35 @@ +``` +% pandoc -t markdown -f html + + + + + + + + + + + +
+

A

B

+
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do + eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut + enim ad minim veniam, quis nostrud exercitation ullamco laboris + nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor + in reprehenderit in voluptate velit esse cillum dolore eu fugiat + nulla pariatur. Excepteur sint occaecat cupidatat non proident, + sunt in culpa qui officia deserunt mollit anim id est laborum. +
+^D ++---+------------------------------------------------------------------+ +| A | Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do | +| | eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut | +| B | enim ad minim veniam, quis nostrud exercitation ullamco laboris | +| | nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor | +| | in reprehenderit in voluptate velit esse cillum dolore eu fugiat | +| | nulla pariatur. Excepteur sint occaecat cupidatat non proident, | +| | sunt in culpa qui officia deserunt mollit anim id est laborum. | ++---+------------------------------------------------------------------+ +``` From 9e029d20bf5f2bfe9f441f60b138424ce8610a83 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 25 May 2026 12:27:26 +0200 Subject: [PATCH 17/29] Use latest dev commonmark-hs. --- cabal.project | 5 +++++ stack.yaml | 2 ++ 2 files changed, 7 insertions(+) diff --git a/cabal.project b/cabal.project index 47247958bbac..81ef97bfe789 100644 --- a/cabal.project +++ b/cabal.project @@ -32,6 +32,11 @@ source-repository-package location: https://github.com/jgm/citeproc.git tag: 1b684f1e06fc1093d20c1a2d474f4c3fdf2f65bd +source-repository-package + type: git + location: https://github.com/jgm/commonmark-hs.git + tag: 48b95bab66401d94f80b7c84c5c33ab9e2cd39ea + package pandoc flags: +embed_data_files +http diff --git a/stack.yaml b/stack.yaml index d92d445c349b..3a763cceb11e 100644 --- a/stack.yaml +++ b/stack.yaml @@ -33,6 +33,8 @@ extra-deps: commit: 0a3fbebc5d0e21769f01b048eb63e1451ccf0e1a - git: https://github.com/jgm/citeproc.git commit: 1b684f1e06fc1093d20c1a2d474f4c3fdf2f65bd +- git: https://github.com/jgm/commonmark-hs.git + commit: 48b95bab66401d94f80b7c84c5c33ab9e2cd39ea ghc-options: "$locals": -fhide-source-paths -Wno-missing-home-modules From 2b93ecc8f5e4bc00c99ea129889d409faa3f8b5b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 25 May 2026 12:39:55 +0200 Subject: [PATCH 18/29] Fix cabal.project, stack.yaml (specify subdir). --- cabal.project | 1 + stack.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/cabal.project b/cabal.project index 81ef97bfe789..836f36264350 100644 --- a/cabal.project +++ b/cabal.project @@ -35,6 +35,7 @@ source-repository-package source-repository-package type: git location: https://github.com/jgm/commonmark-hs.git + subdir: commonmark tag: 48b95bab66401d94f80b7c84c5c33ab9e2cd39ea package pandoc diff --git a/stack.yaml b/stack.yaml index 3a763cceb11e..2de2d7374f9b 100644 --- a/stack.yaml +++ b/stack.yaml @@ -35,6 +35,7 @@ extra-deps: commit: 1b684f1e06fc1093d20c1a2d474f4c3fdf2f65bd - git: https://github.com/jgm/commonmark-hs.git commit: 48b95bab66401d94f80b7c84c5c33ab9e2cd39ea + subdirs: [commonmark] ghc-options: "$locals": -fhide-source-paths -Wno-missing-home-modules From 28a6130d0af5d94e404a7f9a475f79d955601c08 Mon Sep 17 00:00:00 2001 From: Sai Asish Y Date: Mon, 25 May 2026 06:16:18 -0700 Subject: [PATCH 19/29] Docx writer: fix empty keywords in core document properties (#11666) `stringify` returns the empty string for a MetaString, so each keyword in the `cp:keywords` list of `docProps/core.xml` was rendered as empty. Convert each metadata value like `lookupMetaString` does instead. Signed-off-by: Sai Asish Y --- src/Text/Pandoc/Writers/Docx.hs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index db8439d42a76..a654d494d0d9 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -738,8 +738,13 @@ mkStylesEntry epochtime styledoc styleMaps st opts = -- | Create core document properties entry mkCorePropsEntry :: Integer -> UTCTime -> Meta -> Entry mkCorePropsEntry epochtime utctime meta = - let keywords = case lookupMeta "keywords" meta of - Just (MetaList xs) -> map stringify xs + let metaValueToText (MetaString s) = s + metaValueToText (MetaInlines ils) = stringify ils + metaValueToText (MetaBlocks bs) = stringify bs + metaValueToText (MetaBool b) = T.pack (show b) + metaValueToText _ = "" + keywords = case lookupMeta "keywords" meta of + Just (MetaList xs) -> map metaValueToText xs _ -> [] docPropsPath = "docProps/core.xml" extraCoreProps = ["subject","lang","category","description"] From b7bf083b59c40b01af49930ea73f86b8656f01c7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 26 May 2026 12:55:58 +0200 Subject: [PATCH 20/29] Man reader: better handling of .TP macro. We parse these as DefinitionList items, but we previously sometimes stopped prematurely in including material in the definition. We should include everything until we hit a new indentation-changing macro. Closes #11668. --- src/Text/Pandoc/Readers/Man.hs | 15 ++++++++---- test/command/11668.md | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 4 deletions(-) create mode 100644 test/command/11668.md diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index a788cf2b2858..28a72275bfcf 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -217,9 +217,11 @@ memptyLine = msatisfy isEmptyLine where isEmptyLine _ = False mmacro :: PandocMonad m => T.Text -> ManParser m RoffToken -mmacro mk = msatisfy isControlLine where - isControlLine (ControlLine mk' _ _) | mk == mk' = True - | otherwise = False +mmacro mk = mmacroMatching (== mk) + +mmacroMatching :: PandocMonad m => (T.Text -> Bool) -> ManParser m RoffToken +mmacroMatching f = msatisfy isControlLine where + isControlLine (ControlLine mk _ _) = f mk isControlLine _ = False mmacroAny :: PandocMonad m => ManParser m RoffToken @@ -498,7 +500,12 @@ parseList = try $ do continuation :: PandocMonad m => ManParser m Blocks continuation = (mmacro "RS" *> (mconcat <$> manyTill parseBlock (endmacro "RE"))) - <|> try ((memptyLine <|> bareIP) *> (parsePara <|> parseCodeBlock)) + <|> (bareIP *> parsePara) + <|> (notFollowedBy (mmacroMatching + (`elem` ["TP","IP","LP","P","PP","HP", + "RE","RS","SH","SS","SV","YS","TH", + "TQ","YS","AT","DT","OP","PD","UC"])) + *> parseBlock) definitionListItem :: PandocMonad m => ManParser m (Inlines, [Blocks]) diff --git a/test/command/11668.md b/test/command/11668.md new file mode 100644 index 000000000000..664d992f3d3e --- /dev/null +++ b/test/command/11668.md @@ -0,0 +1,42 @@ +``` +% pandoc -f man -t html +TH "TEST" "1" "2026-05-25" "test v1.0.0" "test manual" +.SH HEADING +.TP 4 +This is the unindented tag. +This text, along with the table, should be indented. +.TS +tab(;); +l l. +aaa;bbb +ccc;ddd +.TE +This should still be indented, until some other macro is called to change it. +.PP +Like this. +^D +

TH "TEST" "1" "2026-05-25" "test v1.0.0" "test manual"

+

HEADING

+
+
This is the unindented tag.
+
+

This text, along with the table, should be indented.

+ + + + + + + + + + + +
aaabbb
cccddd
+

This should still be indented, until some other macro is called to +change it.

+
+
+

Like this.

+ +``` From 18f5c0bdf2681e37640c8f38d906fbd4e2d62654 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 26 May 2026 22:44:13 +0200 Subject: [PATCH 21/29] Use MathJax v4 in default HTML templates. See #11669. --- src/Text/Pandoc/Options.hs | 2 +- test/s5-fancy.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 3705b9c41583..261766b08190 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -444,7 +444,7 @@ isEnabled :: HasSyntaxExtensions a => Extension -> a -> Bool isEnabled ext opts = ext `extensionEnabled` getExtensions opts defaultMathJaxURL :: Text -defaultMathJaxURL = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" +defaultMathJaxURL = "https://cdn.jsdelivr.net/npm/mathjax@4/tex-chtml.js" defaultWebTeXURL :: Text defaultWebTeXURL = "https://latex.codecogs.com/png.latex?" diff --git a/test/s5-fancy.html b/test/s5-fancy.html index f2f54811d275..16107033d2c3 100644 --- a/test/s5-fancy.html +++ b/test/s5-fancy.html @@ -43,7 +43,7 @@ From 83bbb1ed2e74c85bb97d6f7b24061de85db39e6b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 27 May 2026 17:35:39 +0200 Subject: [PATCH 22/29] OpenDocument/ODT writer: use predefined styles. (#11672) Previously the OpenDocument writer emitted a fresh automatic style (L1..Ln, P1..Pn, T1..Tn) for nearly every list, list-item paragraph, block quote, preformatted block, and inline text style. This produced large ODT files, made `--reference-doc` customization ineffective (the user's predefined styles were never referenced), and gave each list its own indentation independent of any containing block quote. This commit teaches the writer to reference the predefined styles that LibreOffice ships and that pandoc's reference.odt now exports: - Bullet lists use `List_20_1`; ordered lists with default start and decimal format use `Numbering_20_1`. Non-default ordered lists generate a single named override style (`Pandoc_Numbering_N`) memoised by (ListNumberStyle, ListNumberDelim); a non-default start value with the default format is expressed via `text:start-value` on the `text:list` element instead of a new style. - List-item paragraphs use `List_20_Bullet[_Tight]` and `List_20_Number[_Tight]`. The Tight variants are pandoc-specific (zero top/bottom margin) and are injected into the user's reference.odt if missing, just like the Skylighting token styles. - Block quotes use the predefined `Quotations` paragraph style directly. Nested block quotes use a single automatic style that inherits from Quotations and only adds extra margin-left, so a list inside a block quote now inherits its container's indent (#2747). - Preformatted blocks use `Preformatted_20_Text` directly. - Emphasis, Strong, Strikeout, Subscript, Superscript and Code spans use the predefined `Emphasis`, `Strong_20_Emphasis`, `Strikeout`, `Subscript`, `Superscript` and `Source_20_Text` text styles. - `paraStyle`/`paraStyleFromParent` no longer emit a wrapper automatic style when its only attribute would be `parent-style-name`; the parent name is returned directly. Closes #9136. Closes #5086. Closes #2747. Closes #3426. Closes #7336. Co-authored by: Claude Opus 4.7. --- data/odt/styles.xml | 18 + src/Text/Pandoc/Writers/ODT.hs | 55 +- src/Text/Pandoc/Writers/OpenDocument.hs | 336 ++--- test/command/10791.md | 7 +- test/command/2434.md | 35 +- test/command/8256.md | 3 +- test/command/9136.md | 89 ++ test/command/table-with-column-span.md | 4 +- test/writer.opendocument | 1715 +++++------------------ 9 files changed, 746 insertions(+), 1516 deletions(-) create mode 100644 test/command/9136.md diff --git a/data/odt/styles.xml b/data/odt/styles.xml index 7f7e06d9cec2..b4dc2bde8aeb 100644 --- a/data/odt/styles.xml +++ b/data/odt/styles.xml @@ -136,6 +136,24 @@ xmlns:css3t="http://www.w3.org/TR/css3-text/" office:version="1.3"> style:parent-style-name="Text_20_body" style:class="list"> + + + + + + + + qName qn == "styles" && qPrefix qn == Just "office" ) - (case writerHighlightMethod opts of + (addListItemStyles . + (case writerHighlightMethod opts of Skylighting style -> addHlStyles style - _ -> id) + _ -> id)) $ d ) | otherwise = pure e entries <- mapM goEntry (zEntries arch) @@ -228,6 +229,56 @@ addHlStyles sty el = isHlStyle (Elem e) = "Tok" `T.isSuffixOf` (qName (elName e)) isHlStyle _ = False +-- | Ensure the office:styles element contains paragraph styles for +-- list items used by the OpenDocument writer. This injects only the +-- styles that are missing, so a user-supplied reference.odt may +-- override any of them. +addListItemStyles :: Element -> Element +addListItemStyles el = + el{ elContent = elContent el ++ map Elem missingStyles } + where + existing = [ T.concat [ attrVal a + | a <- elAttribs e + , qName (attrKey a) == "name" + , qPrefix (attrKey a) == Just "style" ] + | Elem e <- elContent el + , qName (elName e) == "style" + , qPrefix (elName e) == Just "style" ] + missingStyles = + [ s | s <- listItemStyleElements + , styleNameOf s `notElem` existing ] + styleNameOf e = T.concat [ attrVal a + | a <- elAttribs e + , qName (attrKey a) == "name" + , qPrefix (attrKey a) == Just "style" ] + +listItemStyleElements :: [Element] +listItemStyleElements = + [ mkParaStyle "List_20_Bullet" "List Bullet" Nothing + , mkParaStyle "List_20_Bullet_20_Tight" "List Bullet Tight" (Just tightProps) + , mkParaStyle "List_20_Number" "List Number" Nothing + , mkParaStyle "List_20_Number_20_Tight" "List Number Tight" (Just tightProps) + ] + where + styleQN n p = QName n Nothing (Just p) + mkAttr p n v = Attr (styleQN n p) v + tightProps = + Element (styleQN "paragraph-properties" "style") + [ mkAttr "fo" "margin-top" "0in" + , mkAttr "fo" "margin-bottom" "0in" + , mkAttr "style" "contextual-spacing" "false" + ] [] Nothing + mkParaStyle name display mbProps = + Element (styleQN "style" "style") + [ mkAttr "style" "name" name + , mkAttr "style" "display-name" display + , mkAttr "style" "family" "paragraph" + , mkAttr "style" "parent-style-name" "List" + , mkAttr "style" "class" "list" + ] + (maybe [] (\p -> [Elem p]) mbProps) + Nothing + -- top-down search transformElement :: (QName -> Bool) -> (Element -> Element) diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 0d49d72e10a4..5ce96d68b4dc 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -51,6 +51,29 @@ plainToPara :: Block -> Block plainToPara (Plain x) = Para x plainToPara x = x +-- Names of predefined styles in the reference.odt; see data/odt/styles.xml. +defaultBulletListStyleName, defaultNumberedListStyleName :: Text +defaultBulletListStyleName = "List_20_1" +defaultNumberedListStyleName = "Numbering_20_1" + +bulletItemStyleName, bulletItemTightStyleName, + numberItemStyleName, numberItemTightStyleName :: Text +bulletItemStyleName = "List_20_Bullet" +bulletItemTightStyleName = "List_20_Bullet_20_Tight" +numberItemStyleName = "List_20_Number" +numberItemTightStyleName = "List_20_Number_20_Tight" + +-- | Predefined inline style names for single-style spans. +wellKnownTextStyle :: Set.Set TextStyle -> Maybe Text +wellKnownTextStyle s = case Set.toList s of + [Italic] -> Just "Emphasis" + [Bold] -> Just "Strong_20_Emphasis" + [Strike] -> Just "Strikeout" + [Sub] -> Just "Subscript" + [Sup] -> Just "Superscript" + [Pre] -> Just "Source_20_Text" + _ -> Nothing + -- -- OpenDocument writer -- @@ -66,7 +89,8 @@ data WriterState = WriterState { stNotes :: [Doc Text] , stTableStyles :: [Doc Text] , stParaStyles :: [Doc Text] - , stListStyles :: [(Int, [Doc Text])] + , stListOverrides :: Map.Map (ListNumberStyle,ListNumberDelim) + (Text, Doc Text) , stTextStyles :: Map.Map (Set.Set TextStyle) (Text, Doc Text) , stTextStyleAttr :: Set.Set TextStyle @@ -85,7 +109,7 @@ defaultWriterState = WriterState { stNotes = [] , stTableStyles = [] , stParaStyles = [] - , stListStyles = [] + , stListOverrides = Map.empty , stTextStyles = Map.empty , stTextStyleAttr = Set.empty , stIndentPara = 0 @@ -121,10 +145,6 @@ increaseIndent = modify $ \s -> s { stIndentPara = 1 + stIndentPara s } resetIndent :: PandocMonad m => OD m () resetIndent = modify $ \s -> s { stIndentPara = stIndentPara s - 1 } -inTightList :: PandocMonad m => OD m a -> OD m a -inTightList f = modify (\s -> s { stTight = True }) >> f >>= \r -> - modify (\s -> s { stTight = False }) >> return r - setInDefinitionList :: PandocMonad m => Bool -> OD m () setInDefinitionList b = modify $ \s -> s { stInDefinition = b } @@ -163,22 +183,25 @@ inTextStyle d = do at <- gets stTextStyleAttr if Set.null at then return d - else do - styles <- gets stTextStyles - case Map.lookup at styles of - Just (styleName, _) -> return $ - inTags False "text:span" [("text:style-name",styleName)] d - Nothing -> do - let styleName = "T" <> tshow (Map.size styles + 1) - addTextStyle at (styleName, - inTags False "style:style" - [("style:name", styleName) - ,("style:family", "text")] - $ selfClosingTag "style:text-properties" - (sortOn fst . Map.toList - $ L.foldl' textStyleAttr mempty (Set.toList at))) - return $ inTags False - "text:span" [("text:style-name",styleName)] d + else case wellKnownTextStyle at of + Just styleName -> return $ + inTags False "text:span" [("text:style-name", styleName)] d + Nothing -> do + styles <- gets stTextStyles + case Map.lookup at styles of + Just (styleName, _) -> return $ + inTags False "text:span" [("text:style-name",styleName)] d + Nothing -> do + let styleName = "T" <> tshow (Map.size styles + 1) + addTextStyle at (styleName, + inTags False "style:style" + [("style:name", styleName) + ,("style:family", "text")] + $ selfClosingTag "style:text-properties" + (sortOn fst . Map.toList + $ L.foldl' textStyleAttr mempty (Set.toList at))) + return $ inTags False + "text:span" [("text:style-name",styleName)] d formulaStyles :: [Doc Text] formulaStyles = [formulaStyle InlineMath, formulaStyle DisplayMath] @@ -263,10 +286,8 @@ writeOpenDocument opts (Pandoc meta blocks) = do let styles = stTableStyles s ++ stParaStyles s ++ formulaStyles ++ map snd (sortBy (comparing (Down . fst)) ( Map.elems (stTextStyles s))) - listStyle (n,l) = inTags True "text:list-style" - [("style:name", "L" <> tshow n)] (vcat l) - let listStyles = map listStyle (stListStyles s) - let automaticStyles = vcat $ reverse $ styles ++ listStyles + let listStyles = map snd (Map.elems (stListOverrides s)) + let automaticStyles = vcat $ reverse styles ++ listStyles let context = defField "body" body . defField "toc" (writerTableOfContents opts) . defField "toc-depth" (tshow $ writerTOCDepth opts) @@ -286,32 +307,86 @@ withParagraphStyle o s (b:bs) withParagraphStyle _ _ [] = return empty inPreformattedTags :: PandocMonad m => [Doc Text] -> OD m (Doc Text) -inPreformattedTags s = do - n <- paraStyle [("style:parent-style-name","Preformatted_20_Text")] - return $ inParagraphTagsWithStyle ("P" <> tshow n) $ hcat s +inPreformattedTags s = + return $ inParagraphTagsWithStyle "Preformatted_20_Text" $ hcat s + +-- | Get the list-style name to use for an ordered list with the given +-- numbering style and delimiter, registering an override automatic style +-- if needed. Lists with default style/delimiter use the predefined +-- @Numbering_20_1@ style and no override is generated. +orderedListStyleName :: PandocMonad m + => ListNumberStyle -> ListNumberDelim -> OD m Text +orderedListStyleName ns nd + | (ns == DefaultStyle || ns == Decimal) && + (nd == DefaultDelim || nd == Period) = + return defaultNumberedListStyleName + | otherwise = do + overrides <- gets stListOverrides + case Map.lookup (ns, nd) overrides of + Just (name, _) -> return name + Nothing -> do + let name = "Pandoc_5f_Numbering_5f_" <> + tshow (Map.size overrides + 1) + doc = inTags True "text:list-style" + [("style:name", name)] + (vcat (map (orderedListLevel ns nd) [1..10])) + modify $ \s -> s { stListOverrides = + Map.insert (ns, nd) (name, doc) + (stListOverrides s) } + return name + +orderedListLevel :: ListNumberStyle -> ListNumberDelim -> Int -> Doc Text +orderedListLevel ns nd lvl = + let suffix = case nd of + OneParen -> [("style:num-suffix", ")")] + TwoParens -> [("style:num-prefix", "(") + ,("style:num-suffix", ")")] + _ -> [("style:num-suffix", ".")] + format = case ns of + UpperAlpha -> "A" + LowerAlpha -> "a" + UpperRoman -> "I" + LowerRoman -> "i" + _ -> "1" + in inTags True "text:list-level-style-number" + ([ ("text:level" , tshow lvl) + , ("text:style-name" , "Numbering_20_Symbols") + , ("style:num-format", format) + ] ++ suffix) + (selfClosingTag "style:list-level-properties" + [ ("text:space-before", + T.pack (printf "%.4fin" (0.1972 * fromIntegral (lvl - 1) :: Double))) + , ("text:min-label-width", "0.1965in") + , ("text:min-label-distance", "0.1in") + ]) orderedListToOpenDocument :: PandocMonad m - => WriterOptions -> Int -> [[Block]] -> OD m (Doc Text) -orderedListToOpenDocument o pn bs = + => WriterOptions -> Text -> [[Block]] + -> OD m (Doc Text) +orderedListToOpenDocument o paraName bs = vcat . map (inTagsIndented "text:list-item") <$> - mapM (orderedItemToOpenDocument o pn . map plainToPara) bs + mapM (orderedItemToOpenDocument o paraName . map plainToPara) bs orderedItemToOpenDocument :: PandocMonad m - => WriterOptions -> Int -> [Block] -> OD m (Doc Text) -orderedItemToOpenDocument o n bs = vcat <$> mapM go bs - where go (OrderedList a l) = newLevel a l - go (Para l) = inParagraphTagsWithStyle ("P" <> tshow n) <$> + => WriterOptions -> Text -> [Block] + -> OD m (Doc Text) +orderedItemToOpenDocument o paraName bs = vcat <$> mapM go bs + where go (OrderedList a l) = orderedList a l + go (Para l) = inParagraphTagsWithStyle paraName <$> inlinesToOpenDocument o l go b = blockToOpenDocument o b - newLevel a l = do - nn <- length <$> gets stParaStyles - liststyles <- gets stListStyles - let ls = case liststyles of - [] -> (1,[]) -- should never happen - (s:_) -> s - modify $ \s -> s { stListStyles = orderedListLevelStyle a ls : - drop 1 (stListStyles s) } - inTagsIndented "text:list" <$> orderedListToOpenDocument o nn l + orderedList a@(_,ns,nd) l = do + lstName <- orderedListStyleName ns nd + let pn = if isTightList l then numberItemTightStyleName + else numberItemStyleName + let listAttrs = ("text:style-name", lstName) : startValueAttr a + inTags True "text:list" listAttrs <$> + orderedListToOpenDocument o pn l + +-- | Generate a @text:start-value@ attribute when the start value is not 1. +startValueAttr :: ListAttributes -> [(Text, Text)] +startValueAttr (s,_,_) | s /= 1 = [("text:start-value", tshow s)] +startValueAttr _ = [] isTightList :: [[Block]] -> Bool isTightList [] = False @@ -319,23 +394,14 @@ isTightList (b:_) | Plain {} : _ <- b = True | otherwise = False -newOrderedListStyle :: PandocMonad m - => Bool -> ListAttributes -> OD m (Int,Int) -newOrderedListStyle b a = do - ln <- (+) 1 . length <$> gets stListStyles - let nbs = orderedListLevelStyle a (ln, []) - pn <- if b then inTightList (paraListStyle ln) else paraListStyle ln - modify $ \s -> s { stListStyles = nbs : stListStyles s } - return (ln,pn) - bulletListToOpenDocument :: PandocMonad m => WriterOptions -> [[Block]] -> OD m (Doc Text) bulletListToOpenDocument o b = do - ln <- (+) 1 . length <$> gets stListStyles - (pn,ns) <- if isTightList b then inTightList (bulletListStyle ln) else bulletListStyle ln - modify $ \s -> s { stListStyles = ns : stListStyles s } - is <- listItemsToOpenDocument ("P" <> tshow pn) o b - return $ inTags True "text:list" [("text:style-name", "L" <> tshow ln)] is + let pn = if isTightList b then bulletItemTightStyleName + else bulletItemStyleName + is <- listItemsToOpenDocument pn o b + return $ inTags True "text:list" + [("text:style-name", defaultBulletListStyleName)] is listItemsToOpenDocument :: PandocMonad m => Text -> WriterOptions -> [[Block]] -> OD m (Doc Text) @@ -354,16 +420,18 @@ deflistItemToOpenDocument o (t,d) = do return $ t' $$ d' inBlockQuote :: PandocMonad m - => WriterOptions -> Int -> [Block] -> OD m (Doc Text) -inBlockQuote o i (b:bs) + => WriterOptions -> Text -> [Block] -> OD m (Doc Text) +inBlockQuote o sty (b:bs) | BlockQuote l <- b = do increaseIndent ni <- paraStyle [("style:parent-style-name","Quotations")] - go =<< inBlockQuote o ni (map plainToPara l) - | Para l <- b = go =<< inParagraphTagsWithStyle ("P" <> tshow i) <$> inlinesToOpenDocument o l + inner <- inBlockQuote o ni (map plainToPara l) + resetIndent + go inner + | Para l <- b = go =<< inParagraphTagsWithStyle sty <$> inlinesToOpenDocument o l | otherwise = go =<< blockToOpenDocument o b - where go block = ($$) block <$> inBlockQuote o i bs -inBlockQuote _ _ [] = resetIndent >> return empty + where go block = ($$) block <$> inBlockQuote o sty bs +inBlockQuote _ _ [] = return empty -- | Convert a list of Pandoc blocks to OpenDocument. blocksToOpenDocument :: PandocMonad m => WriterOptions -> [Block] -> OD m (Doc Text) @@ -425,13 +493,16 @@ blockToOpenDocument o = \case if T.null ident then i else fmap mkBookmarkedDiv i - mkBlockQuote b = do increaseIndent - i <- paraStyle - [("style:parent-style-name","Quotations")] - inBlockQuote o i (map plainToPara b) - orderedList a b = do (ln,pn) <- newOrderedListStyle (isTightList b) a - inTags True "text:list" [ ("text:style-name", "L" <> tshow ln)] - <$> orderedListToOpenDocument o pn b + mkBlockQuote b = do sty <- paraStyle + [("style:parent-style-name","Quotations")] + inBlockQuote o sty (map plainToPara b) + orderedList a@(_,ns,nd) b = do + lstName <- orderedListStyleName ns nd + let pn = if isTightList b then numberItemTightStyleName + else numberItemStyleName + let listAttrs = ("text:style-name", lstName) : startValueAttr a + inTags True "text:list" listAttrs <$> + orderedListToOpenDocument o pn b table :: PandocMonad m => WriterOptions -> Ann.Table -> OD m (Doc Text) table opts (Ann.Table (ident, _, _) (Caption _ c) colspecs thead tbodies tfoot) = do @@ -591,14 +662,9 @@ tableItemToOpenDocument o s (n,c) = do aa = alignAttrib align a = [ ("table:style-name" , s ) , ("office:value-type", "string" ) ] ++ csa ++ rsa - itemParaStyle <- case aa of - [] -> return 0 - _ -> paraStyleFromParent n aa - let itemParaStyle' = case itemParaStyle of - 0 -> n - x -> "P" <> tshow x + itemParaStyle <- paraStyleFromParent n aa inTags True "table:table-cell" a <$> - withParagraphStyle o itemParaStyle' (map plainToPara i) + withParagraphStyle o itemParaStyle (map plainToPara i) -- | Convert a list of inline elements to OpenDocument. inlinesToOpenDocument :: PandocMonad m => WriterOptions -> [Inline] -> OD m (Doc Text) @@ -761,55 +827,6 @@ mkLink o identTypes s t d = then linkOrReference else link -bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc Text])) -bulletListStyle l = do - let doStyles i = inTags True "text:list-level-style-bullet" - [ ("text:level" , tshow (i + 1)) - , ("text:style-name" , "Bullet_20_Symbols" ) - , ("style:num-suffix", "." ) - , ("text:bullet-char", T.singleton (bulletList !! i)) - ] (listLevelStyle (1 + i)) - bulletList = map chr $ cycle [8226,9702,9642] - listElStyle = map doStyles [0..9] - pn <- paraListStyle l - return (pn, (l, listElStyle)) - -orderedListLevelStyle :: ListAttributes -> (Int, [Doc Text]) -> (Int,[Doc Text]) -orderedListLevelStyle (s,n, d) (l,ls) = - let suffix = case d of - OneParen -> [("style:num-suffix", ")")] - TwoParens -> [("style:num-prefix", "(") - ,("style:num-suffix", ")")] - _ -> [("style:num-suffix", ".")] - format = case n of - UpperAlpha -> "A" - LowerAlpha -> "a" - UpperRoman -> "I" - LowerRoman -> "i" - _ -> "1" - listStyle = inTags True "text:list-level-style-number" - ([ ("text:level" , tshow $ 1 + length ls ) - , ("text:style-name" , "Numbering_20_Symbols") - , ("style:num-format", format ) - , ("text:start-value", tshow s ) - ] ++ suffix) (listLevelStyle (1 + length ls)) - in (l, ls ++ [listStyle]) - -listLevelStyle :: Int -> Doc Text -listLevelStyle i = - let indent = tshow (0.25 + (0.25 * fromIntegral i :: Double)) in - inTags True "style:list-level-properties" - [ ("text:list-level-position-and-space-mode", - "label-alignment") - , ("fo:text-align", "right") - ] $ - selfClosingTag "style:list-level-label-alignment" - [ ("text:label-followed-by", "listtab") - , ("text:list-tab-stop-position", indent <> "in") - , ("fo:text-indent", "-0.25in") - , ("fo:margin-left", indent <> "in") - ] - tableStyle :: Int -> Double -> [(Char,Double)] -> Doc Text tableStyle num textWidth wcs = let tableId = "Table" <> tshow (num + 1) @@ -847,15 +864,17 @@ tableStyle num textWidth wcs = columnStyles = map colStyle wcs in cellStyles $$ table $$ vcat columnStyles -paraStyle :: PandocMonad m => [(Text,Text)] -> OD m Int +-- | Generate (or reuse) a paragraph style with the given attributes. +-- When the only attribute is @style:parent-style-name@ and no +-- indent/tight overrides are active, the parent name is returned +-- directly (no @Pn@ automatic style is created), so that the +-- predefined style passes through unchanged. +paraStyle :: PandocMonad m => [(Text,Text)] -> OD m Text paraStyle attrs = do - pn <- (+) 1 . length <$> gets stParaStyles i <- (*) (0.5 :: Double) . fromIntegral <$> gets stIndentPara b <- gets stInDefinition t <- gets stTight - let styleAttr = [ ("style:name" , "P" <> tshow pn) - , ("style:family" , "paragraph" )] - indentVal = flip (<>) "in" . tshow $ if b then max 0.5 i else i + let indentVal = flip (<>) "in" . tshow $ if b then max 0.5 i else i tight = if t then [ ("fo:margin-top" , "0in" ) , ("fo:margin-bottom" , "0in" )] else [] @@ -866,31 +885,32 @@ paraStyle attrs = do , ("style:auto-text-indent" , "false" )] else [] attributes = indent <> tight - paraProps = if null attributes - then mempty - else selfClosingTag - "style:paragraph-properties" attributes - addParaStyle $ inTags True "style:style" (styleAttr <> attrs) paraProps - return pn - -paraStyleFromParent :: PandocMonad m => Text -> [(Text,Text)] -> OD m Int -paraStyleFromParent parent attrs = do - pn <- (+) 1 . length <$> gets stParaStyles - let styleAttr = [ ("style:name" , "P" <> tshow pn) - , ("style:family" , "paragraph") - , ("style:parent-style-name", parent)] - paraProps = if null attrs - then mempty - else selfClosingTag - "style:paragraph-properties" attrs - addParaStyle $ inTags True "style:style" styleAttr paraProps - return pn - - -paraListStyle :: PandocMonad m => Int -> OD m Int -paraListStyle l = paraStyle - [("style:parent-style-name","Text_20_body") - ,("style:list-style-name", "L" <> tshow l)] + case (attributes, attrs) of + ([], [("style:parent-style-name", parent)]) -> return parent + _ -> do + pn <- (+) 1 . length <$> gets stParaStyles + let name = "P" <> tshow pn + styleAttr = [ ("style:name" , name) + , ("style:family", "paragraph") ] + paraProps = if null attributes + then mempty + else selfClosingTag + "style:paragraph-properties" attributes + addParaStyle $ inTags True "style:style" (styleAttr <> attrs) paraProps + return name + +paraStyleFromParent :: PandocMonad m => Text -> [(Text,Text)] -> OD m Text +paraStyleFromParent parent attrs + | null attrs = return parent + | otherwise = do + pn <- (+) 1 . length <$> gets stParaStyles + let name = "P" <> tshow pn + styleAttr = [ ("style:name" , name) + , ("style:family" , "paragraph") + , ("style:parent-style-name", parent)] + paraProps = selfClosingTag "style:paragraph-properties" attrs + addParaStyle $ inTags True "style:style" styleAttr paraProps + return name paraTableStyles :: Text -> Int -> [Alignment] -> [(Text, Doc Text)] paraTableStyles _ _ [] = [] diff --git a/test/command/10791.md b/test/command/10791.md index e44878cb687b..4bf305cd6d63 100644 --- a/test/command/10791.md +++ b/test/command/10791.md @@ -5,10 +5,11 @@ Aboard **the luxury cruise ship Heart of the Ocean[^1] in the Atlantic Ocean**.. [^1]: **Heart of the Ocean** (海洋之心) – The Heart of the Ocean ^D Aboard -the luxury cruise ship Heart of the -Ocean1Heart +the luxury cruise ship +Heart of the +Ocean1Heart of the Ocean (海洋之心) – The Heart of the -Ocean +Ocean in the Atlantic Ocean ``` diff --git a/test/command/2434.md b/test/command/2434.md index 4f12b6f56b47..d5c9800e1f36 100644 --- a/test/command/2434.md +++ b/test/command/2434.md @@ -6,23 +6,23 @@ 2. beta * gamma ^D - + - a + a - b - + b + - alpha + alpha - beta + beta - + - gamma + gamma @@ -40,20 +40,23 @@ more text -- this line is missing in the odt output ^D - + - text - some text - + text + some text + - sub item 1 + sub item + 1 - sub item 2 + sub item + 2 - more text – this line is missing in the - odt output + more text – this line is + missing in the odt output ``` + diff --git a/test/command/8256.md b/test/command/8256.md index 76398f607ae6..2eeedef6608c 100644 --- a/test/command/8256.md +++ b/test/command/8256.md @@ -16,14 +16,13 @@ Testing. - This -isa +isa test Someone Testing. diff --git a/test/command/9136.md b/test/command/9136.md new file mode 100644 index 000000000000..6718a4064f57 --- /dev/null +++ b/test/command/9136.md @@ -0,0 +1,89 @@ +Verify that the OpenDocument writer uses predefined list, paragraph, and +text styles instead of generating per-instance automatic styles. + +``` +% pandoc -t opendocument +- one +- two + +1. ordered one +2. ordered two + +Some *italic*, **bold**, ~~strike~~, and `code`. + +> A block quote. + +Final paragraph. +^D + + + one + + + two + + + + + ordered + one + + + ordered + two + + +Some +italic, +bold, +strike, and +code. +A block quote. +Final paragraph. +``` + +A non-default ordered list (start value other than 1, or non-decimal +format) generates a single named override style, not one per instance. + +``` +% pandoc -t opendocument +A) upper-alpha one +B) upper-alpha two + +5. start at five +6. and continue +^D + + + upper-alpha + one + + + upper-alpha + two + + + + + start at + five + + + and + continue + + +``` + +A nested block quote uses the predefined `Quotations` style for the +outer paragraph, and inherits indent from it for the inner paragraph. + +``` +% pandoc -t opendocument +> First. +> +> > Nested. +^D +First. +Nested. +``` diff --git a/test/command/table-with-column-span.md b/test/command/table-with-column-span.md index d4e8bebd4968..a53b24be5ec7 100644 --- a/test/command/table-with-column-span.md +++ b/test/command/table-with-column-span.md @@ -315,11 +315,11 @@ - Octet + Octet no. 1 - Octet + Octet no. 2 diff --git a/test/writer.opendocument b/test/writer.opendocument index 27703db00514..e617fdd460a3 100644 --- a/test/writer.opendocument +++ b/test/writer.opendocument @@ -4,1172 +4,212 @@ - - - - - + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - + + - - - - - - + + - - - - - - + + - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + - - - - + + - - - - + + - - - - - - + + - - - - + + - - - - + + - - - - + + - - - - - - + + - - - - + + - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1187,7 +227,7 @@ of them are adapted from John Gruber’s markdown test suite. link Level 3 with -emphasis +emphasis Level 4 Level @@ -1196,7 +236,7 @@ link Level 2 with -emphasis +emphasis Level 3 with no blank line @@ -1218,23 +258,24 @@ breakhere. Block Quotes E-mail style: -This is a block quote. It is pretty short. -Code in a block quote: -sub status { -print "working"; -} -A list: - +This is a block quote. It is pretty +short. +Code in a block quote: +sub status { +print "working"; +} +A list: + - item one + item one - item two + item two -Nested block quotes: -nested -nested +Nested block quotes: +nested +nested This should not be a block quote: 2 > 1. And a following paragraph. @@ -1242,163 +283,163 @@ Quotes Code Blocks Code: ----- (should be four hyphens) - -sub status { -print "working"; -} - -this code block is indented by one tab +---- (should be four hyphens) + +sub status { +print "working"; +} + +this code block is indented by one tab And: -this code block is indented by two tabs - -These should not be escaped: \$ \\ \> \[ \{ +this code block is indented by two tabs + +These should not be escaped: \$ \\ \> \[ \{ Lists Unordered Asterisks tight: - + - asterisk 1 + asterisk 1 - asterisk 2 + asterisk 2 - asterisk 3 + asterisk 3 Asterisks loose: - + - asterisk 1 + asterisk 1 - asterisk 2 + asterisk 2 - asterisk 3 + asterisk 3 Pluses tight: - + - Plus 1 + Plus 1 - Plus 2 + Plus 2 - Plus 3 + Plus 3 Pluses loose: - + - Plus 1 + Plus 1 - Plus 2 + Plus 2 - Plus 3 + Plus 3 Minuses tight: - + - Minus 1 + Minus 1 - Minus 2 + Minus 2 - Minus 3 + Minus 3 Minuses loose: - + - Minus 1 + Minus 1 - Minus 2 + Minus 2 - Minus 3 + Minus 3 Ordered Tight: - + - First + First - Second + Second - Third + Third and: - + - One + One - Two + Two - Three + Three Loose using tabs: - + - First + First - Second + Second - Third + Third and using spaces: - + - One + One - Two + Two - Three + Three Multiple paragraphs: - + - Item 1, graf one. - Item 1. graf two. The quick brown fox jumped - over the lazy dog’s back. + Item 1, graf one. + Item 1. graf two. The quick brown + fox jumped over the lazy dog’s back. - Item 2. + Item 2. - Item 3. + Item 3. Nested - + - Tab + Tab - Tab + Tab - Tab + Tab @@ -1406,95 +447,97 @@ Blocks Here’s another: - + - First + First - Second: - + Second: + - Fee + Fee - Fie + Fie - Foe + Foe - Third + Third Same thing but with paragraphs: - + - First + First - Second: - + Second: + - Fee + Fee - Fie + Fie - Foe + Foe - Third + Third Tabs and spaces - + - this is a list item indented with + this is a list item indented with tabs - this is a list item indented with - spaces + this is a list item indented with + spaces - this is an example list item indented with - tabs + this is an example list item + indented with tabs - this is an example list item indented with - spaces + this is an example list item + indented with spaces Fancy list markers - + - begins with 2 + begins with 2 - and now 3 - with a continuation - + and now 3 + with a continuation + - sublist with roman numerals, starting with - 4 + sublist with roman + numerals, starting with 4 - more items - + more items + - a subsublist + a + subsublist - a subsublist + a + subsublist @@ -1502,18 +545,20 @@ list markers Nesting: - + - Upper Alpha - + Upper Alpha + - Upper Roman. - + Upper Roman. + - Decimal start with 6 - + Decimal start with + 6 + - Lower alpha with paren + Lower alpha + with paren @@ -1523,15 +568,15 @@ list markers Autonumbering: - + - Autonumber. + Autonumber. - More. - + More. + - Nested. + Nested. @@ -1569,13 +614,13 @@ fruit yellow fruit Multiple blocks with italics: -apple +apple red fruitcontains seeds, crisp, pleasant to taste -orange +orange orange -fruit{ orange code block }orange +fruit{ orange code block }orange block quote Multiple definitions, tight: @@ -1601,12 +646,12 @@ marker, alternate markers: computer orange orange -fruit +fruit - sublist + sublist - sublist + sublist HTML @@ -1618,66 +663,67 @@ Blocks bar Interpreted markdown in a table: This is -emphasized +emphasized And this is -strong +strong Here’s a simple block: foo This should be a code block, though: -<div> -foo -</div> +<div> +foo +</div> As should this: -<div>foo</div> +<div>foo</div> Now, nested: foo This should just be an HTML comment: Multiline: Code block: -<!-- Comment --> +<!-- Comment --> Just plain comment, with trailing spaces on the line: Code: -<hr /> +<hr /> Hr’s: Inline Markup This is -emphasized, and so -is this. +emphasized, and so +is this. This is -strong, and so -is this. +strong, and so +is this. An -emphasized +emphasized link. -This is +This is strong and em. So is -this word. -This is +this word. +This is strong and em. So is -this word. +this word. This is code: >, $, \, \$, <html>. -This is -strikeout. +This +is +strikeout. Superscripts: -abcd -ahello -ahello there. +abcd +ahello +ahello there. Subscripts: -H2O, -H23O, -Hmany of themO. +H2O, +H23O, +Hmany of themO. These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d. @@ -1702,52 +748,55 @@ five. Ellipses…and…and…. LaTeX - + - + - 2 + 2 = 4 + 2 + 2 = 4 - x ∈ y + x ∈ y - α ∧ ω + α ∧ ω - 223 + 223 - p-Tree + p-Tree - Here’s some display math: + Here’s some display math: $$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$ - Here’s one that has a line break in it: - α + ω × x2. + Here’s one that has a line + break in it: + α + ω × x2. These shouldn’t be math: - + - To get the famous equation, write + To get the famous + equation, write $e = mc^2$. - $22,000 is a - lot of money. So is $34,000. (It - worked if “lot” is emphasized.) + $22,000 is a + lot of money. So is + $34,000. (It worked if “lot” is emphasized.) - Shoes ($20) and socks ($5). + Shoes ($20) and socks + ($5). - Escaped + Escaped $: $73 - this should be emphasized + this should be emphasized 23$. @@ -1756,21 +805,21 @@ five. Special Characters Here is some unicode: - + - I hat: Î + I hat: Î - o umlaut: ö + o umlaut: ö - section: § + section: § - set membership: ∈ + set membership: ∈ - copyright: © + copyright: © AT&T has an ampersand in their @@ -1830,7 +879,7 @@ by itself should be a link. Indented thrice. This should [not][] be a link. -[not]: /url +[not]: /url Foo bar. Foo @@ -1852,24 +901,24 @@ link in pointy braces. Autolinks With an ampersand: http://example.com/?foo=1&bar=2 - + - In a list? + In a list? - http://example.com/ + http://example.com/ - It should. + It should. An e-mail address: nobody@nowhere.net -Blockquoted: +Blockquoted: http://example.com/ Auto-links should not occur here: <http://example.com/> -or here: <http://example.com/> +or here: <http://example.com/> Images From “Voyage dans la Lune” by @@ -1889,24 +938,24 @@ another.Subsequent blocks are indented to show that they belong to the footnote (as with list -items).{ <code> }If +items).{ <code> }If you want, you can indent every line, but you can also be lazy and just indent the first line of each block. This should -not be a footnote reference, because -it contains a space.[^my note] Here is an inline +not be a footnote reference, +because it contains a space.[^my note] Here is an inline note.3This -is easier to type. Inline notes may -contain +is easier to type. Inline +notes may contain links and ] verbatim characters, as well as [bracketed text]. -Notes can go in +Notes can go in quotes.4In quote. - + - And in list + And in list items.5In list. From 80b70b76b3261949b4e2c409080a3a1a14ea2013 Mon Sep 17 00:00:00 2001 From: Johan Larsson <13087841+jolars@users.noreply.github.com> Date: Thu, 28 May 2026 00:07:29 +0200 Subject: [PATCH 23/29] Markdown reader: allow grid tables to be indented. (#11671) Like the other table syntaxes (pipe, simple, and multiline tables) and block-level constructs generally, a grid table may now be indented by up to three spaces and still be recognized as a table. Previously the grid-table parser required the table to begin at the left margin, so an indented grid table was parsed as a paragraph. The leading indentation is stripped uniformly from each line before the table is parsed, so an indented grid table produces the same AST as its non-indented equivalent. Adds a command test. --- src/Text/Pandoc/Readers/Markdown.hs | 15 ++++++- test/command/grid-table-indented.md | 65 +++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 test/command/grid-table-indented.md diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index f0f6744c6000..a5237219b347 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1403,7 +1403,20 @@ multilineTableHeader headless = try $ do -- ending with a footer (dashed line followed by blank line). gridTable :: PandocMonad m => MarkdownParser m (F TableComponents) -gridTable = gridTableWith' NormalizeHeader parseBlocks +gridTable = try $ do + -- Like other block-level constructs, a grid table may be indented by + -- up to three spaces. The underlying grid-table parser expects the + -- table to begin at the left margin, so strip a uniform indentation + -- from every line before handing it off. + indent <- T.length <$> lookAhead nonindentSpaces + if indent == 0 + then gridTableWith' NormalizeHeader parseBlocks + else do + let gridLine = try $ count indent (char ' ') + *> lookAhead (oneOf "+|") + *> anyLineNewline + rawTable <- T.concat <$> many1 gridLine + parseFromString' (gridTableWith' NormalizeHeader parseBlocks) rawTable pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int]) pipeBreak = try $ do diff --git a/test/command/grid-table-indented.md b/test/command/grid-table-indented.md new file mode 100644 index 000000000000..cf6f24c07610 --- /dev/null +++ b/test/command/grid-table-indented.md @@ -0,0 +1,65 @@ +Like other block-level constructs, grid tables may be indented by up to +three spaces and are still recognized as tables. + +``` +% pandoc -f markdown -t html + +---+---+ + | a | b | + +===+===+ + | 1 | 2 | + +---+---+ +^D + ++++ + + + + + + + + + + + + +
ab
12
+``` + +A headerless indented grid table is recognized too. + +``` +% pandoc -f markdown -t html + +------+------+ + | foo | bar | + +------+------+ +^D + ++++ + + + + + + +
foobar
+``` + +A grid table indented four spaces is a code block, not a table. + +``` +% pandoc -f markdown -t html + +---+---+ + | a | b | + +---+---+ +^D +
+---+---+
+| a | b |
++---+---+
+``` From 1ed849f162ab99c03bf46bdd9d8f27d9721fb27d Mon Sep 17 00:00:00 2001 From: smc181002 Date: Fri, 29 May 2026 16:05:03 +0530 Subject: [PATCH 24/29] added auto_identifiers to existing tests --- test/command/11635.md | 2 +- test/command/11668.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/command/11635.md b/test/command/11635.md index 1639c82f68ca..cf067493514e 100644 --- a/test/command/11635.md +++ b/test/command/11635.md @@ -11,7 +11,7 @@ another \ one;123456 .TE ^D -

HEADING

+

HEADING

diff --git a/test/command/11668.md b/test/command/11668.md index 664d992f3d3e..6fbb1d134612 100644 --- a/test/command/11668.md +++ b/test/command/11668.md @@ -16,7 +16,7 @@ This should still be indented, until some other macro is called to change it. Like this. ^D

TH "TEST" "1" "2026-05-25" "test v1.0.0" "test manual"

-

HEADING

+

HEADING

This is the unindented tag.
From 06524d8f9fd28ce35648a2afb136ea5331279a6d Mon Sep 17 00:00:00 2001 From: smc181002 Date: Fri, 29 May 2026 16:27:37 +0530 Subject: [PATCH 25/29] Moved tests for GFM and Ascii from Old tests to the command based tests --- test/Tests/Readers/Man.hs | 45 --------------------------------------- test/command/8852.md | 23 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 45 deletions(-) create mode 100644 test/command/8852.md diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs index 0030264c5818..0de03213be85 100644 --- a/test/Tests/Readers/Man.hs +++ b/test/Tests/Readers/Man.hs @@ -25,20 +25,6 @@ man :: Text -> Pandoc man = purely $ readMan def { readerExtensions = disableExtension Ext_auto_identifiers pandocExtensions } -manAutoIds :: Text -> Pandoc -manAutoIds = purely $ readMan def { readerExtensions = - enableExtension Ext_auto_identifiers pandocExtensions } - -manGfmIds :: Text -> Pandoc -manGfmIds = purely $ readMan def { readerExtensions = - enableExtension Ext_gfm_auto_identifiers $ - enableExtension Ext_auto_identifiers pandocExtensions } - -manAsciiIds :: Text -> Pandoc -manAsciiIds = purely $ readMan def { readerExtensions = - enableExtension Ext_ascii_identifiers $ - enableExtension Ext_auto_identifiers pandocExtensions } - infix 4 =: (=:) :: (ToString c, HasCallStack) => String -> (Text, c) -> TestTree @@ -155,36 +141,5 @@ tests = [ (TableHead nullAttr []) [TableBody nullAttr 0 [] $ map toRow [[plain $ text "a b c d"], [plain $ str "f"]]] (TableFoot nullAttr []) - ], - testGroup "AutoIdentifiers" [ - test manAutoIds "H1 with auto id" - (".SH The header\n" - =?> headerWith ("the-header",[],[]) 1 (text "The header")) - , test manAutoIds "H2 with auto id" - (".SS \"The header 2\"" - =?> headerWith ("the-header-2",[],[]) 2 (text "The header 2")) - , test manAutoIds "Multiple headers with auto ids" - (".SH First\n.SH Second\n.SH 3rd Header" - =?> headerWith ("first",[],[]) 1 (text "First") <> - headerWith ("second",[],[]) 1 (text "Second") <> - headerWith ("rd-header",[],[]) 1 (text "3rd Header")) - ], - testGroup "GFMAutoIdentifiers" [ - test manGfmIds "H1 with auto id" - (".SH The header\n" - =?> headerWith ("the-header",[],[]) 1 (text "The header")) - , test manGfmIds "H2 with auto id" - (".SS \"The header 2\"" - =?> headerWith ("the-header-2",[],[]) 2 (text "The header 2")) - , test manGfmIds "Multiple headers with auto ids" - (".SH First\n.SH Second\n.SH 3rd Header" - =?> headerWith ("first",[],[]) 1 (text "First") <> - headerWith ("second",[],[]) 1 (text "Second") <> - headerWith ("3rd-header",[],[]) 1 (text "3rd Header")) - ], - testGroup "ParseAsciiIdentifiers" [ - test manAsciiIds "H1 for autoid and non ascii chars in header" - (".SH Über den Flüssen\n" - =?> headerWith ("uber-den-flussen",[],[]) 1 (text "Über den Flüssen")) ] ] diff --git a/test/command/8852.md b/test/command/8852.md new file mode 100644 index 000000000000..05bd4a358e0e --- /dev/null +++ b/test/command/8852.md @@ -0,0 +1,23 @@ +Github Formatted Markdown Identifiers +``` +% pandoc -f man+gfm_auto_identifiers -t html +.TH "TEST" "1" "2026-05-08" "test v1.0.0" "test manual" +.SH 1st HEADING +.SH C++ & Rust +.SH HEADING +.SH HEADING +^D +

1st HEADING

+

C++ & Rust

+

HEADING

+

HEADING

+``` + +Ascii Identifiers test +``` +% pandoc -f man+ascii_identifiers -t html +.TH "TEST" "1" "2026-05-08" "test v1.0.0" "test manual" +.SH Über den Flüssen +^D +

Über den Flüssen

+``` From 387234e60a6dfdee29764a5a6a1f4630d219414d Mon Sep 17 00:00:00 2001 From: smc181002 Date: Fri, 29 May 2026 22:18:05 +0530 Subject: [PATCH 26/29] Updated old tests for man to use default auto_identifiers --- test/Tests/Old.hs | 2 +- test/Tests/Readers/Man.hs | 3 +-- test/command/8852.md | 2 +- test/man-reader.native | 36 ++++++++++++++++++++---------------- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index 174cbd1cbee7..ed5a3ef938bf 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -224,7 +224,7 @@ tests pandocPath = "creole-reader.txt" "creole-reader.native" ] , testGroup "man" - [ test' "reader" ["-r", "man-auto_identifiers", "-w", "native", "-s"] + [ test' "reader" ["-r", "man", "-w", "native", "-s"] "man-reader.man" "man-reader.native" ] , testGroup "org" diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs index 0de03213be85..cd8b33358c7d 100644 --- a/test/Tests/Readers/Man.hs +++ b/test/Tests/Readers/Man.hs @@ -22,8 +22,7 @@ import Text.Pandoc.Arbitrary () import Text.Pandoc.Builder man :: Text -> Pandoc -man = purely $ readMan def { readerExtensions = - disableExtension Ext_auto_identifiers pandocExtensions } +man = purely $ readMan def infix 4 =: (=:) :: (ToString c, HasCallStack) diff --git a/test/command/8852.md b/test/command/8852.md index 05bd4a358e0e..3a0048b49ab9 100644 --- a/test/command/8852.md +++ b/test/command/8852.md @@ -4,7 +4,7 @@ Github Formatted Markdown Identifiers .TH "TEST" "1" "2026-05-08" "test v1.0.0" "test manual" .SH 1st HEADING .SH C++ & Rust -.SH HEADING +.SH HEADING .SH HEADING ^D

1st HEADING

diff --git a/test/man-reader.native b/test/man-reader.native index e0d14ca792ee..117793fcd07a 100644 --- a/test/man-reader.native +++ b/test/man-reader.native @@ -42,11 +42,11 @@ Pandoc , Space , Str "*" ] - , Header 1 ( "" , [] , [] ) [ Str "Headers" ] + , Header 1 ( "headers" , [] , [] ) [ Str "Headers" ] , Header - 1 ( "" , [] , [] ) [ Str "Level" , Space , Str "1" ] + 1 ( "level-1" , [] , [] ) [ Str "Level" , Space , Str "1" ] , Header - 2 ( "" , [] , [] ) [ Str "Level" , Space , Str "2" ] + 2 ( "level-2" , [] , [] ) [ Str "Level" , Space , Str "2" ] , Para [ Str "*" , Space @@ -58,7 +58,7 @@ Pandoc , Space , Str "*" ] - , Header 1 ( "" , [] , [] ) [ Str "Paragraphs" ] + , Header 1 ( "paragraphs" , [] , [] ) [ Str "Paragraphs" ] , Para [ Str "Here's" , Space @@ -159,7 +159,9 @@ Pandoc , Str "*" ] , Header - 1 ( "" , [] , [] ) [ Str "Block" , Space , Str "Quotes" ] + 1 + ( "block-quotes" , [] , [] ) + [ Str "Block" , Space , Str "Quotes" ] , Para [ Str "Code" , Space @@ -182,7 +184,9 @@ Pandoc , [ Para [ Str "item" , Space , Str "two" ] ] ] , Header - 1 ( "" , [] , [] ) [ Str "Code" , Space , Str "Blocks" ] + 1 + ( "code-blocks" , [] , [] ) + [ Str "Code" , Space , Str "Blocks" ] , Para [ Str "Code:" ] , CodeBlock ( "" , [] , [] ) @@ -201,22 +205,22 @@ Pandoc , Space , Str "*" ] - , Header 1 ( "" , [] , [] ) [ Str "Lists" ] - , Header 2 ( "" , [] , [] ) [ Str "Unordered" ] + , Header 1 ( "lists" , [] , [] ) [ Str "Lists" ] + , Header 2 ( "unordered" , [] , [] ) [ Str "Unordered" ] , Para [ Str "Asterisks:" ] , BulletList [ [ Para [ Str "asterisk" , Space , Str "1" ] ] , [ Para [ Str "asterisk" , Space , Str "2" ] ] , [ Para [ Str "asterisk" , Space , Str "3" ] ] ] - , Header 2 ( "" , [] , [] ) [ Str "Ordered" ] + , Header 2 ( "ordered" , [] , [] ) [ Str "Ordered" ] , OrderedList ( 1 , Decimal , Period ) [ [ Para [ Str "First" ] ] , [ Para [ Str "Second" ] ] , [ Para [ Str "Third" ] ] ] - , Header 2 ( "" , [] , [] ) [ Str "Nested" ] + , Header 2 ( "nested" , [] , [] ) [ Str "Nested" ] , BulletList [ [ Para [ Str "Tab" ] , BulletList @@ -254,7 +258,7 @@ Pandoc ] , Header 2 - ( "" , [] , [] ) + ( "different-styles" , [] , [] ) [ Str "different" , Space , Str "styles:" ] , OrderedList ( 1 , UpperAlpha , Period ) @@ -292,7 +296,7 @@ Pandoc ] ] ] - , Header 2 ( "" , [] , [] ) [ Str "Ordered" ] + , Header 2 ( "ordered-1" , [] , [] ) [ Str "Ordered" ] , Para [ Str "Definition" , Space , Str "lists" ] , DefinitionList [ ( [ Strong [ Str "term1" ] ] @@ -323,7 +327,7 @@ Pandoc ] , Header 1 - ( "" , [] , [] ) + ( "special-characters" , [] , [] ) [ Str "Special" , Space , Str "Characters" ] , Para [ Str "AT&T" @@ -364,7 +368,7 @@ Pandoc , Para [ Str "Bang:" , Space , Str "!" ] , Para [ Str "Plus:" , Space , Str "+" ] , Para [ Str "Minus:" , Space , Str "-" ] - , Header 1 ( "" , [] , [] ) [ Str "Links" ] + , Header 1 ( "links" , [] , [] ) [ Str "Links" ] , Para [ Link ( "" , [] , [] ) @@ -379,7 +383,7 @@ Pandoc ( "mailto:me@example.com" , "" ) , Str "." ] - , Header 1 ( "" , [] , [] ) [ Str "Macros" ] + , Header 1 ( "macros" , [] , [] ) [ Str "Macros" ] , Para [ Strong [ Str "Me" , Space , Str "Myself" ] , Space @@ -406,7 +410,7 @@ Pandoc , Strong [ Str "Author" ] , Str "." ] - , Header 1 ( "" , [] , [] ) [ Str "Tables" ] + , Header 1 ( "tables" , [] , [] ) [ Str "Tables" ] , Table ( "" , [] , [] ) (Caption Nothing []) From a9c2c02833aa63188a525e1309179a3a4a1e1b9d Mon Sep 17 00:00:00 2001 From: smc181002 Date: Fri, 29 May 2026 22:22:08 +0530 Subject: [PATCH 27/29] Added reportLogMessages for logs from registerHeader --- src/Text/Pandoc/Readers/Man.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index 28a72275bfcf..0b5d58d69b04 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -91,6 +91,7 @@ parseMan = do bs <- many parseBlock <* eof meta <- metadata <$> getState let (Pandoc _ blocks) = doc $ mconcat bs + reportLogMessages return $ Pandoc meta blocks parseBlock :: PandocMonad m => ManParser m Blocks From fe0a68d1760b32bfb0abb6bf0b0144148d764e35 Mon Sep 17 00:00:00 2001 From: smc181002 Date: Fri, 29 May 2026 23:03:45 +0530 Subject: [PATCH 28/29] Replaced headerWith to header in tests without auto_identifiers --- test/Tests/Readers/Man.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs index cd8b33358c7d..bb5316b5c497 100644 --- a/test/Tests/Readers/Man.hs +++ b/test/Tests/Readers/Man.hs @@ -47,10 +47,10 @@ tests = [ =?> para (strong (str "foo") <> emph (str "bar")) , "H1" =: ".SH The header\n" - =?> headerWith ("",[],[]) 1 (text "The header") + =?> header 1 (text "The header") , "H2" =: ".SS \"The header 2\"" - =?> headerWith ("",[],[]) 2 (text "The header 2") + =?> header 2 (text "The header 2") , "Macro args" =: ".B \"single arg with \"\"Q\"\"\"" =?>para (strong $ text "single arg with \"Q\"") From 55a55816b8f7f153f33ade2ead5bc4b5f717494b Mon Sep 17 00:00:00 2001 From: smc181002 Date: Sat, 30 May 2026 00:06:57 +0530 Subject: [PATCH 29/29] added tests for headers without auto_identifiers --- test/command/8852.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/command/8852.md b/test/command/8852.md index 3a0048b49ab9..e9e9b7522289 100644 --- a/test/command/8852.md +++ b/test/command/8852.md @@ -21,3 +21,23 @@ Ascii Identifiers test ^D

Über den Flüssen

``` + +Headers without Auto Identifiers test +``` +% pandoc -f man-auto_identifiers -t native +.TH "TEST" "1" "2026-05-08" "test v1.0.0" "test manual" +.SS Level 2 +.SS different styles: +.SS Ordered +.SS Ordered +^D +[ Header + 2 ( "" , [] , [] ) [ Str "Level" , Space , Str "2" ] +, Header + 2 + ( "" , [] , [] ) + [ Str "different" , Space , Str "styles:" ] +, Header 2 ( "" , [] , [] ) [ Str "Ordered" ] +, Header 2 ( "" , [] , [] ) [ Str "Ordered" ] +] +```