Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
7d4ed31
Man reader: add auto_identifiers support
smc181002 May 10, 2026
e06bf5c
Add tests for Man Reader with auto identifiers
smc181002 May 17, 2026
71fbfd3
Updated MANUAL.txt to include Man under auto_identifiers
smc181002 May 17, 2026
db13ea0
fixed spacing in Extensions.hs
smc181002 May 17, 2026
3b3bc70
Add NAME heading to pandoc-lua, pandoc-server man pages.
jgm May 11, 2026
92aa560
Roff reader: handle `\` line continuation in table cells.
jgm May 11, 2026
0b935b7
Translations: find lang-script type translations.
jgm May 12, 2026
ccf272b
Docx reader: improve treatment of tblHeader element.
jgm May 12, 2026
428be28
Docx reader: fix bug in bitmask checking.
jgm May 12, 2026
91a47d2
Error messages: use single quotes around paths and format names.
jgm May 13, 2026
a02b3bd
MANUAL: improve description of reference links.
jgm May 13, 2026
fa0cf76
HTML reader: parse aside as a Div.
jgm May 14, 2026
ae93b4f
Use latest citeproc.
jgm May 14, 2026
c94a324
Typst writer: add zero-width space before a Span label...
jgm May 15, 2026
039663e
EPUB writer: support multiple EPUB versions for raw content (#11628).
vreoo May 17, 2026
1137a2c
gridTable: fix calculation of column widths for default columns.
jgm May 23, 2026
9e029d2
Use latest dev commonmark-hs.
jgm May 25, 2026
2b93ecc
Fix cabal.project, stack.yaml (specify subdir).
jgm May 25, 2026
28a6130
Docx writer: fix empty keywords in core document properties (#11666)
SAY-5 May 25, 2026
b7bf083
Man reader: better handling of .TP macro.
jgm May 26, 2026
18f5c0b
Use MathJax v4 in default HTML templates.
jgm May 26, 2026
83bbb1e
OpenDocument/ODT writer: use predefined styles. (#11672)
jgm May 27, 2026
80b70b7
Markdown reader: allow grid tables to be indented. (#11671)
jolars May 27, 2026
1ed849f
added auto_identifiers to existing tests
smc181002 May 29, 2026
06524d8
Moved tests for GFM and Ascii from Old tests to the command based tests
smc181002 May 29, 2026
4826003
Merge branch 'main' into issue-8852
smc181002 May 29, 2026
387234e
Updated old tests for man to use default auto_identifiers
smc181002 May 29, 2026
a9c2c02
Added reportLogMessages for logs from registerHeader
smc181002 May 29, 2026
fe0a68d
Replaced headerWith to header in tests without auto_identifiers
smc181002 May 29, 2026
55a5581
added tests for headers without auto_identifiers
smc181002 May 29, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions MANUAL.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3668,13 +3668,13 @@ automatically assigned a unique identifier based on the heading text.
This extension can be enabled/disabled for the following formats:

input formats
: `markdown`, `latex`, `rst`, `mediawiki`, `textile`
: `markdown`, `latex`, `rst`, `mediawiki`, `textile`, `man`

output formats
: `markdown`, `muse`

enabled by default in
: `markdown`, `muse`
: `markdown`, `muse`, `man`

The default algorithm used to derive the identifier from the
heading text is:
Expand Down
1 change: 1 addition & 0 deletions src/Text/Pandoc/Extensions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -664,4 +664,5 @@ getAllExtensions f = universalExtensions <> getAll f
[ Ext_smart ]
getAll "typst" = extensionsFromList [Ext_citations, Ext_smart]
getAll "djot" = extensionsFromList [Ext_sourcepos]
getAll "man" = autoIdExtensions
getAll _ = mempty
22 changes: 21 additions & 1 deletion src/Text/Pandoc/Readers/Man.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,40 @@ import Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report)
import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing.Capabilities
import Text.Pandoc.Parsing
import Text.Pandoc.Walk (query)
import Text.Pandoc.Readers.Roff -- TODO explicit imports
import qualified Text.Pandoc.Parsing as P
import qualified Data.Foldable as Foldable
import qualified Data.Set as Set
import Text.Pandoc.Shared (extractSpaces)

data ManState = ManState { readerOptions :: ReaderOptions
, manLogMessages :: []LogMessage
, manIdentifiers :: Set.Set T.Text
, metadata :: Meta
, tableCellsPlain :: Bool
} deriving Show

instance Default ManState where
def = ManState { readerOptions = def
, manLogMessages = []
, manIdentifiers = Set.empty
, metadata = nullMeta
, tableCellsPlain = True }

instance HasReaderOptions ManState where
extractReaderOptions = readerOptions

instance HasLogMessages ManState where
addLogMessage msg st = st{ manLogMessages = msg : manLogMessages st }
getLogMessages st = reverse $ manLogMessages st

instance HasIdentifierList ManState where
extractIdentifierList = manIdentifiers
updateIdentifierList f st = st{ manIdentifiers = f $ manIdentifiers st }

type ManParser m = P.ParsecT [RoffToken] ManState m


Expand Down Expand Up @@ -74,6 +91,7 @@ parseMan = do
bs <- many parseBlock <* eof
meta <- metadata <$> getState
let (Pandoc _ blocks) = doc $ mconcat bs
reportLogMessages
return $ Pandoc meta blocks

parseBlock :: PandocMonad m => ManParser m Blocks
Expand Down Expand Up @@ -417,7 +435,9 @@ parseHeader = do
else return $ mconcat $ intersperse B.space
$ map linePartsToInlines args
let lvl = if name == "SH" then 1 else 2
return $ header lvl contents
attr <- registerHeader nullAttr contents

Comment on lines +438 to +439
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that registerHeader doesn't emit log messages directly with report; it adds them to a list of log messages in state (using addLogMessage); to make sure that items in this list are actually output, you need to call reportLogMessages after parsing is finished. (I actually no longer remember why we had to do this indirect thing in the markdown reader, rather than using report directly, but there was some reason registerHeader was designed this way.)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The registerHeader would not write any logs for Man State.
Because registerHeader only generates logs when there are duplicate identifiers.

Logs generate in markdown when we have duplicate identifiers defined in markdown. But for Man, we do not have option for defining identifiers.

I can still add the reportLogMessages in ParseMan function if required.

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true. OK.

return $ B.headerWith attr lvl contents

parseBlockQuote :: PandocMonad m => ManParser m Blocks
parseBlockQuote = blockQuote <$>
Expand Down
2 changes: 1 addition & 1 deletion test/command/11635.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ another \
one;123456
.TE
^D
<h1>HEADING</h1>
<h1 id="heading">HEADING</h1>
<table>
<tbody>
<tr>
Expand Down
2 changes: 1 addition & 1 deletion test/command/11668.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ This should still be indented, until some other macro is called to change it.
Like this.
^D
<p>TH "TEST" "1" "2026-05-25" "test v1.0.0" "test manual"</p>
<h1>HEADING</h1>
<h1 id="heading">HEADING</h1>
<dl>
<dt>This is the unindented tag.</dt>
<dd>
Expand Down
43 changes: 43 additions & 0 deletions test/command/8852.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
Github Formatted Markdown Identifiers
```
% pandoc -f man+gfm_auto_identifiers -t html
.TH "TEST" "1" "2026-05-08" "test v1.0.0" "test manual"
.SH 1st HEADING
.SH C++ & Rust
.SH HEADING
.SH HEADING
^D
<h1 id="1st-heading">1st HEADING</h1>
<h1 id="c--rust">C++ &amp; Rust</h1>
<h1 id="heading">HEADING</h1>
<h1 id="heading-1">HEADING</h1>
```

Ascii Identifiers test
```
% pandoc -f man+ascii_identifiers -t html
.TH "TEST" "1" "2026-05-08" "test v1.0.0" "test manual"
.SH Über den Flüssen
^D
<h1 id="uber-den-flussen">Über den Flüssen</h1>
```

Headers without Auto Identifiers test
```
% pandoc -f man-auto_identifiers -t native
.TH "TEST" "1" "2026-05-08" "test v1.0.0" "test manual"
.SS Level 2
.SS different styles:
.SS Ordered
.SS Ordered
^D
[ Header
2 ( "" , [] , [] ) [ Str "Level" , Space , Str "2" ]
, Header
2
( "" , [] , [] )
[ Str "different" , Space , Str "styles:" ]
, Header 2 ( "" , [] , [] ) [ Str "Ordered" ]
, Header 2 ( "" , [] , [] ) [ Str "Ordered" ]
]
```
36 changes: 20 additions & 16 deletions test/man-reader.native
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ Pandoc
, Space
, Str "*"
]
, Header 1 ( "" , [] , [] ) [ Str "Headers" ]
, Header 1 ( "headers" , [] , [] ) [ Str "Headers" ]
, Header
1 ( "" , [] , [] ) [ Str "Level" , Space , Str "1" ]
1 ( "level-1" , [] , [] ) [ Str "Level" , Space , Str "1" ]
, Header
2 ( "" , [] , [] ) [ Str "Level" , Space , Str "2" ]
2 ( "level-2" , [] , [] ) [ Str "Level" , Space , Str "2" ]
, Para
[ Str "*"
, Space
Expand All @@ -58,7 +58,7 @@ Pandoc
, Space
, Str "*"
]
, Header 1 ( "" , [] , [] ) [ Str "Paragraphs" ]
, Header 1 ( "paragraphs" , [] , [] ) [ Str "Paragraphs" ]
, Para
[ Str "Here's"
, Space
Expand Down Expand Up @@ -159,7 +159,9 @@ Pandoc
, Str "*"
]
, Header
1 ( "" , [] , [] ) [ Str "Block" , Space , Str "Quotes" ]
1
( "block-quotes" , [] , [] )
[ Str "Block" , Space , Str "Quotes" ]
, Para
[ Str "Code"
, Space
Expand All @@ -182,7 +184,9 @@ Pandoc
, [ Para [ Str "item" , Space , Str "two" ] ]
]
, Header
1 ( "" , [] , [] ) [ Str "Code" , Space , Str "Blocks" ]
1
( "code-blocks" , [] , [] )
[ Str "Code" , Space , Str "Blocks" ]
, Para [ Str "Code:" ]
, CodeBlock
( "" , [] , [] )
Expand All @@ -201,22 +205,22 @@ Pandoc
, Space
, Str "*"
]
, Header 1 ( "" , [] , [] ) [ Str "Lists" ]
, Header 2 ( "" , [] , [] ) [ Str "Unordered" ]
, Header 1 ( "lists" , [] , [] ) [ Str "Lists" ]
, Header 2 ( "unordered" , [] , [] ) [ Str "Unordered" ]
, Para [ Str "Asterisks:" ]
, BulletList
[ [ Para [ Str "asterisk" , Space , Str "1" ] ]
, [ Para [ Str "asterisk" , Space , Str "2" ] ]
, [ Para [ Str "asterisk" , Space , Str "3" ] ]
]
, Header 2 ( "" , [] , [] ) [ Str "Ordered" ]
, Header 2 ( "ordered" , [] , [] ) [ Str "Ordered" ]
, OrderedList
( 1 , Decimal , Period )
[ [ Para [ Str "First" ] ]
, [ Para [ Str "Second" ] ]
, [ Para [ Str "Third" ] ]
]
, Header 2 ( "" , [] , [] ) [ Str "Nested" ]
, Header 2 ( "nested" , [] , [] ) [ Str "Nested" ]
, BulletList
[ [ Para [ Str "Tab" ]
, BulletList
Expand Down Expand Up @@ -254,7 +258,7 @@ Pandoc
]
, Header
2
( "" , [] , [] )
( "different-styles" , [] , [] )
[ Str "different" , Space , Str "styles:" ]
, OrderedList
( 1 , UpperAlpha , Period )
Expand Down Expand Up @@ -292,7 +296,7 @@ Pandoc
]
]
]
, Header 2 ( "" , [] , [] ) [ Str "Ordered" ]
, Header 2 ( "ordered-1" , [] , [] ) [ Str "Ordered" ]
, Para [ Str "Definition" , Space , Str "lists" ]
, DefinitionList
[ ( [ Strong [ Str "term1" ] ]
Expand Down Expand Up @@ -323,7 +327,7 @@ Pandoc
]
, Header
1
( "" , [] , [] )
( "special-characters" , [] , [] )
[ Str "Special" , Space , Str "Characters" ]
, Para
[ Str "AT&T"
Expand Down Expand Up @@ -364,7 +368,7 @@ Pandoc
, Para [ Str "Bang:" , Space , Str "!" ]
, Para [ Str "Plus:" , Space , Str "+" ]
, Para [ Str "Minus:" , Space , Str "-" ]
, Header 1 ( "" , [] , [] ) [ Str "Links" ]
, Header 1 ( "links" , [] , [] ) [ Str "Links" ]
, Para
[ Link
( "" , [] , [] )
Expand All @@ -379,7 +383,7 @@ Pandoc
( "mailto:me@example.com" , "" )
, Str "."
]
, Header 1 ( "" , [] , [] ) [ Str "Macros" ]
, Header 1 ( "macros" , [] , [] ) [ Str "Macros" ]
, Para
[ Strong [ Str "Me" , Space , Str "Myself" ]
, Space
Expand All @@ -406,7 +410,7 @@ Pandoc
, Strong [ Str "Author" ]
, Str "."
]
, Header 1 ( "" , [] , [] ) [ Str "Tables" ]
, Header 1 ( "tables" , [] , [] ) [ Str "Tables" ]
, Table
( "" , [] , [] )
(Caption Nothing [])
Expand Down
Loading