diff --git a/MANUAL.txt b/MANUAL.txt index e2a3e90d76d1..64884e69a6f4 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -3668,13 +3668,13 @@ automatically assigned a unique identifier based on the heading text. This extension can be enabled/disabled for the following formats: input formats -: `markdown`, `latex`, `rst`, `mediawiki`, `textile` +: `markdown`, `latex`, `rst`, `mediawiki`, `textile`, `man` output formats : `markdown`, `muse` enabled by default in -: `markdown`, `muse` +: `markdown`, `muse`, `man` The default algorithm used to derive the identifier from the heading text is: diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 7fcb214cdbf4..71b5da799d51 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -664,4 +664,5 @@ getAllExtensions f = universalExtensions <> getAll f [ Ext_smart ] getAll "typst" = extensionsFromList [Ext_citations, Ext_smart] getAll "djot" = extensionsFromList [Ext_sourcepos] + getAll "man" = autoIdExtensions getAll _ = mempty diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index 50586daf87c0..0b5d58d69b04 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -26,23 +26,40 @@ import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report) import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options +import Text.Pandoc.Parsing.Capabilities import Text.Pandoc.Parsing import Text.Pandoc.Walk (query) import Text.Pandoc.Readers.Roff -- TODO explicit imports import qualified Text.Pandoc.Parsing as P import qualified Data.Foldable as Foldable +import qualified Data.Set as Set import Text.Pandoc.Shared (extractSpaces) data ManState = ManState { readerOptions :: ReaderOptions + , manLogMessages :: []LogMessage + , manIdentifiers :: Set.Set T.Text , metadata :: Meta , tableCellsPlain :: Bool } deriving Show instance Default ManState where def = ManState { readerOptions = def + , manLogMessages = [] + , manIdentifiers = Set.empty , metadata = nullMeta , tableCellsPlain = True } +instance HasReaderOptions ManState where + extractReaderOptions = readerOptions + +instance HasLogMessages ManState where + addLogMessage msg st = st{ manLogMessages = msg : manLogMessages st } + getLogMessages st = reverse $ manLogMessages st + +instance HasIdentifierList ManState where + extractIdentifierList = manIdentifiers + updateIdentifierList f st = st{ manIdentifiers = f $ manIdentifiers st } + type ManParser m = P.ParsecT [RoffToken] ManState m @@ -74,6 +91,7 @@ parseMan = do bs <- many parseBlock <* eof meta <- metadata <$> getState let (Pandoc _ blocks) = doc $ mconcat bs + reportLogMessages return $ Pandoc meta blocks parseBlock :: PandocMonad m => ManParser m Blocks @@ -417,7 +435,9 @@ parseHeader = do else return $ mconcat $ intersperse B.space $ map linePartsToInlines args let lvl = if name == "SH" then 1 else 2 - return $ header lvl contents + attr <- registerHeader nullAttr contents + + return $ B.headerWith attr lvl contents parseBlockQuote :: PandocMonad m => ManParser m Blocks parseBlockQuote = blockQuote <$> diff --git a/test/command/11635.md b/test/command/11635.md index 1639c82f68ca..cf067493514e 100644 --- a/test/command/11635.md +++ b/test/command/11635.md @@ -11,7 +11,7 @@ another \ one;123456 .TE ^D -

HEADING

+

HEADING

diff --git a/test/command/11668.md b/test/command/11668.md index 664d992f3d3e..6fbb1d134612 100644 --- a/test/command/11668.md +++ b/test/command/11668.md @@ -16,7 +16,7 @@ This should still be indented, until some other macro is called to change it. Like this. ^D

TH "TEST" "1" "2026-05-25" "test v1.0.0" "test manual"

-

HEADING

+

HEADING

This is the unindented tag.
diff --git a/test/command/8852.md b/test/command/8852.md new file mode 100644 index 000000000000..e9e9b7522289 --- /dev/null +++ b/test/command/8852.md @@ -0,0 +1,43 @@ +Github Formatted Markdown Identifiers +``` +% pandoc -f man+gfm_auto_identifiers -t html +.TH "TEST" "1" "2026-05-08" "test v1.0.0" "test manual" +.SH 1st HEADING +.SH C++ & Rust +.SH HEADING +.SH HEADING +^D +

1st HEADING

+

C++ & Rust

+

HEADING

+

HEADING

+``` + +Ascii Identifiers test +``` +% pandoc -f man+ascii_identifiers -t html +.TH "TEST" "1" "2026-05-08" "test v1.0.0" "test manual" +.SH Über den Flüssen +^D +

Über den Flüssen

+``` + +Headers without Auto Identifiers test +``` +% pandoc -f man-auto_identifiers -t native +.TH "TEST" "1" "2026-05-08" "test v1.0.0" "test manual" +.SS Level 2 +.SS different styles: +.SS Ordered +.SS Ordered +^D +[ Header + 2 ( "" , [] , [] ) [ Str "Level" , Space , Str "2" ] +, Header + 2 + ( "" , [] , [] ) + [ Str "different" , Space , Str "styles:" ] +, Header 2 ( "" , [] , [] ) [ Str "Ordered" ] +, Header 2 ( "" , [] , [] ) [ Str "Ordered" ] +] +``` diff --git a/test/man-reader.native b/test/man-reader.native index e0d14ca792ee..117793fcd07a 100644 --- a/test/man-reader.native +++ b/test/man-reader.native @@ -42,11 +42,11 @@ Pandoc , Space , Str "*" ] - , Header 1 ( "" , [] , [] ) [ Str "Headers" ] + , Header 1 ( "headers" , [] , [] ) [ Str "Headers" ] , Header - 1 ( "" , [] , [] ) [ Str "Level" , Space , Str "1" ] + 1 ( "level-1" , [] , [] ) [ Str "Level" , Space , Str "1" ] , Header - 2 ( "" , [] , [] ) [ Str "Level" , Space , Str "2" ] + 2 ( "level-2" , [] , [] ) [ Str "Level" , Space , Str "2" ] , Para [ Str "*" , Space @@ -58,7 +58,7 @@ Pandoc , Space , Str "*" ] - , Header 1 ( "" , [] , [] ) [ Str "Paragraphs" ] + , Header 1 ( "paragraphs" , [] , [] ) [ Str "Paragraphs" ] , Para [ Str "Here's" , Space @@ -159,7 +159,9 @@ Pandoc , Str "*" ] , Header - 1 ( "" , [] , [] ) [ Str "Block" , Space , Str "Quotes" ] + 1 + ( "block-quotes" , [] , [] ) + [ Str "Block" , Space , Str "Quotes" ] , Para [ Str "Code" , Space @@ -182,7 +184,9 @@ Pandoc , [ Para [ Str "item" , Space , Str "two" ] ] ] , Header - 1 ( "" , [] , [] ) [ Str "Code" , Space , Str "Blocks" ] + 1 + ( "code-blocks" , [] , [] ) + [ Str "Code" , Space , Str "Blocks" ] , Para [ Str "Code:" ] , CodeBlock ( "" , [] , [] ) @@ -201,22 +205,22 @@ Pandoc , Space , Str "*" ] - , Header 1 ( "" , [] , [] ) [ Str "Lists" ] - , Header 2 ( "" , [] , [] ) [ Str "Unordered" ] + , Header 1 ( "lists" , [] , [] ) [ Str "Lists" ] + , Header 2 ( "unordered" , [] , [] ) [ Str "Unordered" ] , Para [ Str "Asterisks:" ] , BulletList [ [ Para [ Str "asterisk" , Space , Str "1" ] ] , [ Para [ Str "asterisk" , Space , Str "2" ] ] , [ Para [ Str "asterisk" , Space , Str "3" ] ] ] - , Header 2 ( "" , [] , [] ) [ Str "Ordered" ] + , Header 2 ( "ordered" , [] , [] ) [ Str "Ordered" ] , OrderedList ( 1 , Decimal , Period ) [ [ Para [ Str "First" ] ] , [ Para [ Str "Second" ] ] , [ Para [ Str "Third" ] ] ] - , Header 2 ( "" , [] , [] ) [ Str "Nested" ] + , Header 2 ( "nested" , [] , [] ) [ Str "Nested" ] , BulletList [ [ Para [ Str "Tab" ] , BulletList @@ -254,7 +258,7 @@ Pandoc ] , Header 2 - ( "" , [] , [] ) + ( "different-styles" , [] , [] ) [ Str "different" , Space , Str "styles:" ] , OrderedList ( 1 , UpperAlpha , Period ) @@ -292,7 +296,7 @@ Pandoc ] ] ] - , Header 2 ( "" , [] , [] ) [ Str "Ordered" ] + , Header 2 ( "ordered-1" , [] , [] ) [ Str "Ordered" ] , Para [ Str "Definition" , Space , Str "lists" ] , DefinitionList [ ( [ Strong [ Str "term1" ] ] @@ -323,7 +327,7 @@ Pandoc ] , Header 1 - ( "" , [] , [] ) + ( "special-characters" , [] , [] ) [ Str "Special" , Space , Str "Characters" ] , Para [ Str "AT&T" @@ -364,7 +368,7 @@ Pandoc , Para [ Str "Bang:" , Space , Str "!" ] , Para [ Str "Plus:" , Space , Str "+" ] , Para [ Str "Minus:" , Space , Str "-" ] - , Header 1 ( "" , [] , [] ) [ Str "Links" ] + , Header 1 ( "links" , [] , [] ) [ Str "Links" ] , Para [ Link ( "" , [] , [] ) @@ -379,7 +383,7 @@ Pandoc ( "mailto:me@example.com" , "" ) , Str "." ] - , Header 1 ( "" , [] , [] ) [ Str "Macros" ] + , Header 1 ( "macros" , [] , [] ) [ Str "Macros" ] , Para [ Strong [ Str "Me" , Space , Str "Myself" ] , Space @@ -406,7 +410,7 @@ Pandoc , Strong [ Str "Author" ] , Str "." ] - , Header 1 ( "" , [] , [] ) [ Str "Tables" ] + , Header 1 ( "tables" , [] , [] ) [ Str "Tables" ] , Table ( "" , [] , [] ) (Caption Nothing [])