From 5ce91f6730f4d47699f4eeb6f39249f82c1517c9 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Tue, 3 Feb 2026 21:33:46 +0000 Subject: [PATCH 01/76] Initial stub MoinMoin Reader --- pandoc.cabal | 3 ++- src/Text/Pandoc/Readers.hs | 2 ++ src/Text/Pandoc/Readers/MoinMoin.hs | 36 +++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 src/Text/Pandoc/Readers/MoinMoin.hs diff --git a/pandoc.cabal b/pandoc.cabal index 30afcacb004d..d6eb53f3396c 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -33,7 +33,7 @@ description: Pandoc is a Haskell library for converting from one markup - Interactive notebook formats (Jupyter notebook ipynb) - Page layout formats (InDesign ICML) - Wiki markup formats (MediaWiki, DokuWiki, TikiWiki, TWiki, - Vimwiki, XWiki, ZimWiki, Jira wiki, Creole) + Vimwiki, XWiki, ZimWiki, Jira wiki, Creole, MoinMoin) - Slide show formats (LaTeX Beamer, PowerPoint, Slidy, reveal.js, Slideous, S5, DZSlides) - Data formats (CSV and TSV tables, Excel spreadsheets) @@ -616,6 +616,7 @@ library Text.Pandoc.Readers.RIS, Text.Pandoc.Readers.CslJson, Text.Pandoc.Readers.MediaWiki, + Text.Pandoc.Readers.MoinMoin, Text.Pandoc.Readers.Vimwiki, Text.Pandoc.Readers.RST, Text.Pandoc.Readers.Org, diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 5fa0ebeb7b07..5955edc64b1e 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -69,6 +69,7 @@ module Text.Pandoc.Readers , readDjot , readPod , readXML + , readMoinMoin -- * Miscellaneous , getReader , getDefaultExtensions @@ -103,6 +104,7 @@ import Text.Pandoc.Readers.JATS (readJATS) import Text.Pandoc.Readers.Jira (readJira) import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.MediaWiki +import Text.Pandoc.Readers.MoinMoin import Text.Pandoc.Readers.Muse import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.ODT diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs new file mode 100644 index 000000000000..b49aa574d23e --- /dev/null +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -0,0 +1,36 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Readers.MoinMoin + Copyright : Copyright (C) 2026 Jonathan Dowland + License : GNU GPL, version 2 or above + + Maintainer : Jonathan Dowland + Stability : alpha + Portability : portable + +Conversion of MoinMoin text to 'Pandoc' document. +-} + +module Text.Pandoc.Readers.MoinMoin( readMoinMoin ) where + +import qualified Data.Map.Strict as Map +import Text.Pandoc.Definition +import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) +import Text.Pandoc.Options (ReaderOptions) +import Text.Pandoc.Parsing (ToSources, toSources) + +-- | Read MoinMoin from an input string and return a Pandoc document. +readMoinMoin :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc +readMoinMoin opts s = let + sources = toSources s + meta = Meta $ Map.fromList + [ ("Title", MetaString "Hello world") + , ("Author", MetaString "Jon") + ] + in return $ Pandoc meta + [ Header 1 nullAttr [Str "Hello World"] + , Plain [Str "Hello world"] + ] From 0265d040d30f84995981bdaf19369783c80a8f33 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Wed, 4 Feb 2026 18:00:33 +0000 Subject: [PATCH 02/76] Readers.MoinMoin: processing instructions and headers --- src/Text/Pandoc/Readers/MoinMoin.hs | 106 +++++++++++++++++++++++----- 1 file changed, 90 insertions(+), 16 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index b49aa574d23e..f7114ad31b61 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -13,24 +13,98 @@ Conversion of MoinMoin text to 'Pandoc' document. module Text.Pandoc.Readers.MoinMoin( readMoinMoin ) where -import qualified Data.Map.Strict as Map +import Control.Monad (guard) +import Control.Monad.Except (throwError) import Text.Pandoc.Definition import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Options (ReaderOptions) -import Text.Pandoc.Parsing (ToSources, toSources) +import Text.Pandoc.Parsing +import qualified Text.Pandoc.Builder as B -- | Read MoinMoin from an input string and return a Pandoc document. -readMoinMoin :: (PandocMonad m, ToSources a) - => ReaderOptions - -> a - -> m Pandoc -readMoinMoin opts s = let - sources = toSources s - meta = Meta $ Map.fromList - [ ("Title", MetaString "Hello world") - , ("Author", MetaString "Jon") - ] - in return $ Pandoc meta - [ Header 1 nullAttr [Str "Hello World"] - , Plain [Str "Hello world"] - ] +readMoinMoin :: (PandocMonad m, ToSources a) => ReaderOptions -> a -> m Pandoc +readMoinMoin opts s = do + let sources = toSources s + parsed <- readWithM parseMoinMoin MoinState sources + case parsed of + Left err -> throwError err + Right res -> return res + +data MoinState = MoinState -- context that needs to be passed around the parser +type MoinParser m = ParsecT Sources MoinState m + +parseMoinMoin :: PandocMonad m => MoinParser m Pandoc +parseMoinMoin = do + many processingInstruction + blocks <- mconcat <$> many block + spaces -- optional space? + eof + + let meta = nullMeta + + -- reportLogMessages -- could not deduce 'HasLogMessages MoinState' + return $ Pandoc meta (B.toList blocks) + +{- + - we may wish to handle: + - #format creole|plain|python|rst| + - #REDIRECT | #refresh Xs + - #pragma + - section-numbers (headings) + - keywords => meta keywords + - description => meta description + - #DEPRECATED + - #language (iso-639-1 code) + - + - -} +processingInstruction :: PandocMonad m => MoinParser m () +processingInstruction = do + char '#' + manyUntil anyChar newline + return () + +block :: PandocMonad m => MoinParser m B.Blocks +block = do + res <- mempty <$ skipMany1 blankline + <|> header + <|> para + return res + +-- from Readers.Mediawiki +header :: PandocMonad m => MoinParser m B.Blocks +header = try $ do + guardColumnOne + lev <- length <$> many1 (char '=') + guard $ lev <= 5 + contents <- B.trimInlines . mconcat <$> manyTill inline (count lev $ char '=') + return $ B.header lev contents + +-- from Readers.Mediawiki +guardColumnOne :: PandocMonad m => MoinParser m () +guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1) + +-- from Readers.Mediawiki +para :: PandocMonad m => MoinParser m B.Blocks +para = do + contents <- B.trimInlines . mconcat <$> many1 inline + return $ B.para contents + +inline :: PandocMonad m => MoinParser m B.Inlines +inline = whitespace + <|> str + +-- from Readers.Mediawiki +whitespace :: PandocMonad m => MoinParser m B.Inlines +whitespace = B.space <$ skipMany1 spaceChar + +-- from Readers.Mediawiki +str :: PandocMonad m => MoinParser m B.Inlines +str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars) + +-- from Readers.Mediawiki +specialChars :: [Char] +specialChars = "'[]<=&*{}|\":\\_" + +-- from Readers.Mediawiki +spaceChars :: [Char] +spaceChars = " \n\t" From f61bb1258b0a12a7905ca857c882d2de1801851d Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Wed, 4 Feb 2026 22:15:10 +0000 Subject: [PATCH 03/76] basic bulletList --- src/Text/Pandoc/Readers/MoinMoin.hs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index f7114ad31b61..da181f1a2cd2 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -67,6 +67,7 @@ block :: PandocMonad m => MoinParser m B.Blocks block = do res <- mempty <$ skipMany1 blankline <|> header + <|> bulletList <|> para return res @@ -89,6 +90,15 @@ para = do contents <- B.trimInlines . mconcat <$> many1 inline return $ B.para contents +-- XXX only handles one line/item +bulletList :: PandocMonad m => MoinParser m B.Blocks +bulletList = do + lev <- length <$> many1 space + char '*' + spaces + contents <- B.plain . B.trimInlines . mconcat <$> manyTill inline newline + return $ B.bulletList [contents] + inline :: PandocMonad m => MoinParser m B.Inlines inline = whitespace <|> str From d1da1fde689e4ef77fe7552b4c1075dfc004d01f Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Wed, 4 Feb 2026 22:29:30 +0000 Subject: [PATCH 04/76] initial externalLink (quite broken) --- src/Text/Pandoc/Readers/MoinMoin.hs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index da181f1a2cd2..2518789fc460 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -37,7 +37,7 @@ parseMoinMoin :: PandocMonad m => MoinParser m Pandoc parseMoinMoin = do many processingInstruction blocks <- mconcat <$> many block - spaces -- optional space? + spaces eof let meta = nullMeta @@ -102,6 +102,7 @@ bulletList = do inline :: PandocMonad m => MoinParser m B.Inlines inline = whitespace <|> str + <|> externalLink -- from Readers.Mediawiki whitespace :: PandocMonad m => MoinParser m B.Inlines @@ -111,6 +112,17 @@ whitespace = B.space <$ skipMany1 spaceChar str :: PandocMonad m => MoinParser m B.Inlines str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars) +externalLink :: PandocMonad m => MoinParser m B.Inlines +externalLink = do + string "[[" + (src,_) <- uri -- XXX is eating '|' + title <- option src $ do + char '|' + many1TillChar anyChar (try (char ']')) + string "]]" + let label = B.fromList [] -- XXX convert title to B.Inlines + return $ B.link src "" label + -- from Readers.Mediawiki specialChars :: [Char] specialChars = "'[]<=&*{}|\":\\_" From 4b5ab88b4a884c3fae4be70052ae3244a6f73a51 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 5 Feb 2026 17:51:21 +0000 Subject: [PATCH 05/76] fixed externalLink --- src/Text/Pandoc/Readers/MoinMoin.hs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 2518789fc460..196e2c390b49 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -115,13 +115,18 @@ str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars) externalLink :: PandocMonad m => MoinParser m B.Inlines externalLink = do string "[[" - (src,_) <- uri -- XXX is eating '|' - title <- option src $ do - char '|' - many1TillChar anyChar (try (char ']')) - string "]]" - let label = B.fromList [] -- XXX convert title to B.Inlines - return $ B.link src "" label + (src,label) <- try labelledLink <|> unlabelledLink + --string "]]" + return $ B.link src "" $ B.str label + where + labelledLink = do + src <- manyTillChar (noneOf "|") (try (char '|')) + lbl <- manyTillChar (noneOf "]") (string "]]") + return (src,lbl) + + unlabelledLink = do + src <- manyTillChar (noneOf "|") (string "]]") + return (src,src) -- from Readers.Mediawiki specialChars :: [Char] From 7171fb2465e6c648b3f4779cf900fa7d97a3f09f Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 5 Feb 2026 21:25:34 +0000 Subject: [PATCH 06/76] Fix multiple-item bullet lists --- src/Text/Pandoc/Readers/MoinMoin.hs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 196e2c390b49..cda72376f1ac 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -90,14 +90,15 @@ para = do contents <- B.trimInlines . mconcat <$> many1 inline return $ B.para contents --- XXX only handles one line/item bulletList :: PandocMonad m => MoinParser m B.Blocks -bulletList = do +bulletList = many1 bulletListItem >>= return . B.bulletList + +bulletListItem :: PandocMonad m => MoinParser m B.Blocks +bulletListItem = try $ do lev <- length <$> many1 space char '*' spaces - contents <- B.plain . B.trimInlines . mconcat <$> manyTill inline newline - return $ B.bulletList [contents] + B.plain . B.trimInlines . mconcat <$> manyTill inline newline inline :: PandocMonad m => MoinParser m B.Inlines inline = whitespace From 71f41928a35b9db97a1ea03153b73e08df55e4f5 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Fri, 6 Feb 2026 22:20:19 +0000 Subject: [PATCH 07/76] handle italics --- src/Text/Pandoc/Readers/MoinMoin.hs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index cda72376f1ac..4f9ce85c7211 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -103,6 +103,7 @@ bulletListItem = try $ do inline :: PandocMonad m => MoinParser m B.Inlines inline = whitespace <|> str + <|> italic <|> externalLink -- from Readers.Mediawiki @@ -113,6 +114,13 @@ whitespace = B.space <$ skipMany1 spaceChar str :: PandocMonad m => MoinParser m B.Inlines str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars) +italic :: PandocMonad m => MoinParser m B.Inlines +italic = + enclosed doubleApostrophe doubleApostrophe inline >>= + return . B.singleton . B.Emph . B.toList . mconcat + where + doubleApostrophe = char '\'' >> char '\'' + externalLink :: PandocMonad m => MoinParser m B.Inlines externalLink = do string "[[" From 86d40852958673eb2aa11e64aaf984baa41b1b0e Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sat, 7 Feb 2026 21:58:44 +0000 Subject: [PATCH 08/76] Add (most of) the inline formatters In particular, the approach of adding comma to specialChars really shows the problem with the current approach (comma can't occur naturally at this point) --- src/Text/Pandoc/Readers/MoinMoin.hs | 40 ++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 4f9ce85c7211..f5d36e46c253 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -104,6 +104,11 @@ inline :: PandocMonad m => MoinParser m B.Inlines inline = whitespace <|> str <|> italic + <|> bold + <|> underline + <|> superscript + <|> subscript +-- <|> stroke <|> externalLink -- from Readers.Mediawiki @@ -114,12 +119,33 @@ whitespace = B.space <$ skipMany1 spaceChar str :: PandocMonad m => MoinParser m B.Inlines str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars) -italic :: PandocMonad m => MoinParser m B.Inlines -italic = - enclosed doubleApostrophe doubleApostrophe inline >>= - return . B.singleton . B.Emph . B.toList . mconcat - where - doubleApostrophe = char '\'' >> char '\'' +-- utility fn for most of the inline text formatters +formatter :: PandocMonad m + => String + -> ([Inline] -> B.Inline) + -> MoinParser m B.Inlines +formatter delim inliner = + enclosed delim' delim' inline >>= + return . B.singleton . inliner . B.toList . mconcat + where delim' = string delim + +italic :: PandocMonad m => MoinParser m B.Inlines +italic = formatter ("''") B.Emph +bold :: PandocMonad m => MoinParser m B.Inlines +bold = formatter ("'''") B.Strong +-- monospace: B.code (Code Attr Text) needs different handling +-- code: as monospace +underline :: PandocMonad m => MoinParser m B.Inlines +underline = formatter ("__") B.Underline +superscript :: PandocMonad m => MoinParser m B.Inlines +superscript = formatter "^" B.Superscript +subscript :: PandocMonad m => MoinParser m B.Inlines +subscript = formatter ",," B.Subscript +-- smaller/larger: needs some thought +stroke :: PandocMonad m => MoinParser m B.Inlines +stroke = enclosed (string "--(") (string ")--") inline >>= + return . B.singleton . B.Strikeout . B.toList . mconcat + externalLink :: PandocMonad m => MoinParser m B.Inlines externalLink = do @@ -139,7 +165,7 @@ externalLink = do -- from Readers.Mediawiki specialChars :: [Char] -specialChars = "'[]<=&*{}|\":\\_" +specialChars = "'[]<=&*{}|\":\\_^," -- from Readers.Mediawiki spaceChars :: [Char] From 4265dac171b17c04aecc7fdaff4db2814a0be1ca Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sat, 7 Feb 2026 22:03:55 +0000 Subject: [PATCH 09/76] Add my test file This will eventually metamorphose into proper test file(s) --- testMoin.hs | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 testMoin.hs diff --git a/testMoin.hs b/testMoin.hs new file mode 100644 index 000000000000..ee953c4fb32a --- /dev/null +++ b/testMoin.hs @@ -0,0 +1,64 @@ +module Main where + +import qualified Data.Text as T +import qualified Data.Text.IO as TIO +import Text.Pandoc +import Text.Pandoc.Parsing +import Data.Either (fromRight) + +sampleMW = T.pack "\ +\= sample mediawiki doc =\n\ +\\n\ +\This is a ''sample'' CamelCase document.\n\ +\[https://jmtd.net jon's homepage]\n\ +\\n\ +\: indented reply\n\ +\\n\ +\[[Category:Foo]]" + +sampleMM = T.pack "\ +\#format wiki\n\ +\#language en\n\ +\#pragma supplementation-page on\n\ +\\n\ +\== Jon Dowland ==\n\ +\\n\ +\ * [[http://jmtd.net|jmtd.net]]\n\ +\ * another bullet\n\ +\\n\ +\ ''mostly'' meta '''indeed'''. This is __useful__.\n\ +\\n\ +\what does `monospace` look like? different to ?\n\ +\\n\ +\what goes ^up^ must come ,,down,,.\n\ +\\n\ +\what --(happened in)-- there?\n\ +\\n\ +\----\n\ +\\n\ +\CategoryHomepage" + +-- this exposes bugs in the Mediawiki reader (level 2 does not get captured as a nested DL) +tinyMW = T.pack "level 0\n:level 1\n::level 2" + +parsedMW = (fromRight (error "") . runPure . readMediaWiki def) sampleMW + +parsedMM = (fromRight (error "?") . runPure . readMoinMoin def) sampleMM + +main = do + -- what structure do we get from a Mediawiki doc? + -- (putStrLn . show) parsedMW + -- putStrLn "##################################" + + -- what happens to definition list in markdown output? + ---- the definition list just goes away! + --(handleError . runPure . writeMediaWiki def) parsedMW >>= TIO.putStrLn + --putStrLn "##################################" + + (putStrLn . show) parsedMM + putStrLn "\n##################################\n" + + result <- runIO $ + readMoinMoin def sampleMM >>= writeMarkdown def + mdwn <- handleError result + TIO.putStrLn mdwn From ecb9b5b7369f200721816d0b8d6315d5ce5bb49d Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sun, 8 Feb 2026 19:55:54 +0000 Subject: [PATCH 10/76] simplify `formatter` via Text.Pandoc.Builder builders --- src/Text/Pandoc/Readers/MoinMoin.hs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index f5d36e46c253..dc53d0e44706 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -122,29 +122,29 @@ str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars) -- utility fn for most of the inline text formatters formatter :: PandocMonad m => String - -> ([Inline] -> B.Inline) + -> (B.Inlines -> B.Inlines) -> MoinParser m B.Inlines formatter delim inliner = - enclosed delim' delim' inline >>= - return . B.singleton . inliner . B.toList . mconcat + enclosed delim' delim' inline >>= return . inliner . mconcat where delim' = string delim italic :: PandocMonad m => MoinParser m B.Inlines -italic = formatter ("''") B.Emph +italic = formatter ("''") B.emph bold :: PandocMonad m => MoinParser m B.Inlines -bold = formatter ("'''") B.Strong +bold = formatter ("'''") B.strong -- monospace: B.code (Code Attr Text) needs different handling -- code: as monospace +-- however, B.code :: Text -> Inlines underline :: PandocMonad m => MoinParser m B.Inlines -underline = formatter ("__") B.Underline +underline = formatter ("__") B.underline superscript :: PandocMonad m => MoinParser m B.Inlines -superscript = formatter "^" B.Superscript +superscript = formatter "^" B.superscript subscript :: PandocMonad m => MoinParser m B.Inlines -subscript = formatter ",," B.Subscript +subscript = formatter ",," B.subscript -- smaller/larger: needs some thought stroke :: PandocMonad m => MoinParser m B.Inlines stroke = enclosed (string "--(") (string ")--") inline >>= - return . B.singleton . B.Strikeout . B.toList . mconcat + return . B.strikeout . mconcat externalLink :: PandocMonad m => MoinParser m B.Inlines From 8d107f7a9379b96d00d82fc862bf1d1a31e1121a Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sun, 8 Feb 2026 21:20:27 +0000 Subject: [PATCH 11/76] handle 'special' characters after other inline parsers --- src/Text/Pandoc/Readers/MoinMoin.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index dc53d0e44706..4ba2a924f532 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -20,6 +20,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Parsing import qualified Text.Pandoc.Builder as B +import qualified Data.Text as T -- | Read MoinMoin from an input string and return a Pandoc document. readMoinMoin :: (PandocMonad m, ToSources a) => ReaderOptions -> a -> m Pandoc @@ -110,6 +111,7 @@ inline = whitespace <|> subscript -- <|> stroke <|> externalLink + <|> special -- from Readers.Mediawiki whitespace :: PandocMonad m => MoinParser m B.Inlines @@ -146,6 +148,8 @@ stroke :: PandocMonad m => MoinParser m B.Inlines stroke = enclosed (string "--(") (string ")--") inline >>= return . B.strikeout . mconcat +special :: PandocMonad m => MoinParser m B.Inlines +special = B.str . T.singleton <$> oneOf specialChars externalLink :: PandocMonad m => MoinParser m B.Inlines externalLink = do From 022d1bfb8563320c646725f7961e24dd3fa5f1a0 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sun, 8 Feb 2026 21:22:57 +0000 Subject: [PATCH 12/76] move test moinmoin document out of testMoin --- testMoin.hs | 27 ++++----------------------- testmoin.txt | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 23 deletions(-) create mode 100644 testmoin.txt diff --git a/testMoin.hs b/testMoin.hs index ee953c4fb32a..bb2fe48f6d85 100644 --- a/testMoin.hs +++ b/testMoin.hs @@ -16,34 +16,13 @@ sampleMW = T.pack "\ \\n\ \[[Category:Foo]]" -sampleMM = T.pack "\ -\#format wiki\n\ -\#language en\n\ -\#pragma supplementation-page on\n\ -\\n\ -\== Jon Dowland ==\n\ -\\n\ -\ * [[http://jmtd.net|jmtd.net]]\n\ -\ * another bullet\n\ -\\n\ -\ ''mostly'' meta '''indeed'''. This is __useful__.\n\ -\\n\ -\what does `monospace` look like? different to ?\n\ -\\n\ -\what goes ^up^ must come ,,down,,.\n\ -\\n\ -\what --(happened in)-- there?\n\ -\\n\ -\----\n\ -\\n\ -\CategoryHomepage" -- this exposes bugs in the Mediawiki reader (level 2 does not get captured as a nested DL) tinyMW = T.pack "level 0\n:level 1\n::level 2" parsedMW = (fromRight (error "") . runPure . readMediaWiki def) sampleMW -parsedMM = (fromRight (error "?") . runPure . readMoinMoin def) sampleMM +parseMM = fromRight (error "?") . runPure . readMoinMoin def main = do -- what structure do we get from a Mediawiki doc? @@ -55,7 +34,9 @@ main = do --(handleError . runPure . writeMediaWiki def) parsedMW >>= TIO.putStrLn --putStrLn "##################################" - (putStrLn . show) parsedMM + sampleMM <- TIO.readFile "testmoin.txt" + + (putStrLn . show . parseMM) sampleMM putStrLn "\n##################################\n" result <- runIO $ diff --git a/testmoin.txt b/testmoin.txt new file mode 100644 index 000000000000..03533f231ad9 --- /dev/null +++ b/testmoin.txt @@ -0,0 +1,20 @@ +#format wiki +#language en +#pragma supplementation-page on + +== Jon Dowland == + + * [[http://jmtd.net|jmtd.net]] + * another bullet + + ''mostly'' meta '''indeed'''. This is __useful__. + +what does `monospace` (look) like? different to ? + +what goes ^up^, must ] come ,,down,,. + +what --(happened in)-- there? + +---- + +CategoryHomepage From 25fdb185526697b60357aea3ec0010cc94d402e2 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sun, 8 Feb 2026 22:17:49 +0000 Subject: [PATCH 13/76] start work on identifying camelcase wikilinks --- testMoin.hs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/testMoin.hs b/testMoin.hs index bb2fe48f6d85..634200361ece 100644 --- a/testMoin.hs +++ b/testMoin.hs @@ -3,8 +3,12 @@ module Main where import qualified Data.Text as T import qualified Data.Text.IO as TIO import Text.Pandoc -import Text.Pandoc.Parsing +import qualified Text.Pandoc.Parsing as P import Data.Either (fromRight) +import Text.ParserCombinators.Parsec +import Text.Parsec +import Text.Parsec.Char +import Data.Char -- isUpper etc sampleMW = T.pack "\ \= sample mediawiki doc =\n\ @@ -43,3 +47,11 @@ main = do readMoinMoin def sampleMM >>= writeMarkdown def mdwn <- handleError result TIO.putStrLn mdwn + +-- parser tests +camelWord :: Stream s m Char + => ParsecT s () m String +camelWord = do + f <- upper + rest <- many1 (satisfy (\c -> isAlphaNum c && not (isUpper c))) + return (f:rest) From 757105c0454446e3ecb6064a0270fde13daf9460 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Mon, 9 Feb 2026 21:46:04 +0000 Subject: [PATCH 14/76] parse camelCaseLinks --- src/Text/Pandoc/Readers/MoinMoin.hs | 28 ++++++++++++++++++++++++++++ testMoin.hs | 8 -------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 4ba2a924f532..4445e5201cde 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -15,8 +15,10 @@ module Text.Pandoc.Readers.MoinMoin( readMoinMoin ) where import Control.Monad (guard) import Control.Monad.Except (throwError) +import Data.Char -- isUpper, isAlphaNum import Text.Pandoc.Definition import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) +import Text.Pandoc.Class (runPure) -- debug import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Parsing import qualified Text.Pandoc.Builder as B @@ -103,6 +105,7 @@ bulletListItem = try $ do inline :: PandocMonad m => MoinParser m B.Inlines inline = whitespace + <|> camelCaseLink <|> str <|> italic <|> bold @@ -117,6 +120,26 @@ inline = whitespace whitespace :: PandocMonad m => MoinParser m B.Inlines whitespace = B.space <$ skipMany1 spaceChar +many2 :: PandocMonad m => MoinParser m a -> MoinParser m [a] +many2 p = do + first <- p + rest <- many1 p + return (first:rest) + +camelWord :: PandocMonad m => MoinParser m String +camelWord = do + f <- satisfy isUpper + rest <- many1 (satisfy (\c -> isAlphaNum c && not (isUpper c))) + return (f:rest) + +camelCaseLink :: PandocMonad m => MoinParser m B.Inlines +camelCaseLink = try $ do + src <- mconcat <$> many2 camelWord + let tsrc = T.pack src + let title = "" + let label = B.str tsrc + return $ B.link tsrc title label + -- from Readers.Mediawiki str :: PandocMonad m => MoinParser m B.Inlines str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars) @@ -174,3 +197,8 @@ specialChars = "'[]<=&*{}|\":\\_^," -- from Readers.Mediawiki spaceChars :: [Char] spaceChars = " \n\t" + +-- debug function to run the inline parser in GHCi +debugParse :: PandocMonad m => T.Text -> m (Either ParseError B.Inlines) +debugParse t = + runParserT (mconcat <$> many inline) MoinState "srcname" (toSources t) diff --git a/testMoin.hs b/testMoin.hs index 634200361ece..39abce8459fd 100644 --- a/testMoin.hs +++ b/testMoin.hs @@ -47,11 +47,3 @@ main = do readMoinMoin def sampleMM >>= writeMarkdown def mdwn <- handleError result TIO.putStrLn mdwn - --- parser tests -camelWord :: Stream s m Char - => ParsecT s () m String -camelWord = do - f <- upper - rest <- many1 (satisfy (\c -> isAlphaNum c && not (isUpper c))) - return (f:rest) From 573cbd1fa20c6d4b8bb0a0729c93c61bb14c9b64 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Mon, 9 Feb 2026 22:06:47 +0000 Subject: [PATCH 15/76] fix stroke; add smaller and larger --- src/Text/Pandoc/Readers/MoinMoin.hs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 4445e5201cde..d1bacd63f507 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -112,7 +112,9 @@ inline = whitespace <|> underline <|> superscript <|> subscript --- <|> stroke + <|> smaller + <|> larger + <|> stroke <|> externalLink <|> special @@ -166,7 +168,15 @@ superscript :: PandocMonad m => MoinParser m B.Inlines superscript = formatter "^" B.superscript subscript :: PandocMonad m => MoinParser m B.Inlines subscript = formatter ",," B.subscript --- smaller/larger: needs some thought +-- smaller/larger: possibly mark the inlines with an attribute +smaller :: PandocMonad m => MoinParser m B.Inlines +smaller = enclosed (string "~-") (string "-~") inline >>= + return . mconcat + +larger :: PandocMonad m => MoinParser m B.Inlines +larger = enclosed (string "~+") (string "+~") inline >>= + return . mconcat + stroke :: PandocMonad m => MoinParser m B.Inlines stroke = enclosed (string "--(") (string ")--") inline >>= return . B.strikeout . mconcat @@ -192,7 +202,7 @@ externalLink = do -- from Readers.Mediawiki specialChars :: [Char] -specialChars = "'[]<=&*{}|\":\\_^," +specialChars = "'[]<=&*{}|\":\\_^,~-+()" -- from Readers.Mediawiki spaceChars :: [Char] From 6888ba7616f119551c768acb7e55434a90c4d64f Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Tue, 17 Feb 2026 10:41:47 +0000 Subject: [PATCH 16/76] (DEV) Simon's MoinMoin as TODO Simon Michael worked on a MoinMoin plugin in 2009. I will take from it what I can; and it has a partial TODO list in it which will also be useful. --- SimonMoin.hs | 631 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 631 insertions(+) create mode 100644 SimonMoin.hs diff --git a/SimonMoin.hs b/SimonMoin.hs new file mode 100644 index 000000000000..3bbb93ba9e87 --- /dev/null +++ b/SimonMoin.hs @@ -0,0 +1,631 @@ +{- +Copyright (C) 2009 Simon Michael + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.MoinMoin + Copyright : Copyright (C) 2009-2011 Simon Michael, ranft, John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Partial conversion from MoinMoin-formatted text (plus some pandoc-isms +like smart punctuation) to Pandoc. Based on the Markdown reader. + +TODO: +[ ] subscript: ,,sub,, +[ ] strikeout: --(stroke)-- +[ ] larger: ~+larger+~ [ignore] +[ ] smaller: +-smaller-+ [ignore] +[ ] table of contents: <> or <> +[ ] moin 1.6 double bracket links: [[FrontPage]], [[FrontPage|named link]], + [[#anchorname]], [[#anchorname|description]], [[PageName#anchorname]], + [[PageName#anchorname|description]], [[attachment:filename.txt]] +[ ] HelpOnEditing/SubPages should be a single link +[ ] /SubPage should be a link +[ ] Wiki''''''Name should result in plain string WikiName +[ ] Same with !WikiName +[ ] WikiName''''''s - the s should not be in the link +[ ] WikiName``s - the s should not be in the link +[ ] {{http://static.moinmo.in/logos/moinmoin.png}} should be an image +[ ] camel-case links assume ascii letters +[ ] indented blockquotes +[ ] definition lists +[ ] nested/multiply-indented lists, blocks, code blocks +[ ] tables +[ ] images +[ ] <> inserts a link anchor +[ ] <
> a hard break +[ ] <> a note +[ ] <> - just ignore this +[ ] <> treat as email link; pandoc has obfuscation + options +[ ] smileys and icons - use unicode char or just parse literal text +[ ] wiki parser with css classes: + {{{#!wiki red/solid + blah blah + }}} +[ ] admonitions + {{{#!wiki caution + '''Don't overuse''' + + blah blah + }}} +[ ] comments + {{{#!wiki comment/dotted + '''Don't overuse''' + + blah blah + }}} +[ ] test suite - best approach would be src/Tests/Readers/MoinMoin.hs, + see other reader tests for how to do this. + or, old style: tests/moinmoin-reader.native tests/moinmoin-reader.moinmoin + + +cf: +http://johnmacfarlane.net/pandoc/doc/pandoc/index.html +http://moinmo.in/HelpOnFormatting +http://moinmo.in/HelpOnMoinWikiSyntax + +-} + +module Text.Pandoc.Readers.MoinMoin ( readMoinMoin ) where +import Control.Monad ( when ) +import Data.Char ( isUpper ) +import Text.Pandoc.Definition +import Text.Pandoc.Parsing +import Text.Pandoc.Shared +import Text.ParserCombinators.Parsec hiding ( label ) + +-- | Parse MoinMoin string and return Pandoc document. +readMoinMoin :: ParserState -> String -> Pandoc +readMoinMoin state s = readWith parseMoinMoin state (s ++ "\n\n") + +-- +-- Constants and data structure definitions +-- + +spaceChars :: [Char] +spaceChars = " \t" + +bulletListMarkers :: [Char] +bulletListMarkers = ".*" + +hruleChars :: [Char] +hruleChars = "-" + +-- treat these as potentially non-text when parsing inline: +specialChars :: [Char] +specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221" + +-- +-- document structure +-- + +parseMoinMoin :: GenParser Char ParserState Pandoc +parseMoinMoin = do + processingInstructions + blocks <- parseBlocks + return $ Pandoc (Meta [] [] [] {-title author date-}) $ filter (/= Null) blocks + +processingInstructions :: GenParser Char a () +processingInstructions = many (char '#' >> manyTill anyChar newline) >> return () + +comment :: GenParser Char a () +comment = try $ do + pos <- getPosition + when (sourceColumn pos /= 0) $ fail "" + string "##" + manyTill anyChar newline + return () + +-- +-- parsing blocks +-- + +parseBlocks :: GenParser Char ParserState [Block] +parseBlocks = manyTill block eof + +block :: GenParser Char ParserState Block +block = do + choice ([ header + , codeBlock + , codeBlockIndented + -- , blockQuote + , hrule + , bulletList + , orderedList + , para + , plain + , nullBlock + ]) "block" + +-- +-- header blocks +-- + +header :: GenParser Char ParserState Block +header = try $ do + level <- many1 (char '=') >>= return . length + skipSpaces + text <- manyTill inline headerEnd >>= return . normalizeSpaces + return (Header level text) "header" + +headerEnd :: GenParser Char st [Char] +headerEnd = try $ skipSpaces >> skipMany (char '=') >> blanklines + +-- +-- hrule block +-- + +hrule :: GenParser Char st Block +hrule = try $ do + skipSpaces + start <- oneOf hruleChars + count 3 (char start) + skipMany (char start) + skipSpaces + newline + optional blanklines + return HorizontalRule + +-- +-- code blocks +-- + +codeBlockStart :: GenParser Char st [String] +codeBlockStart = try $ do + string "{{{" + classes <- option [] codeBlockClasses + optional newline + return classes + +codeBlockClasses :: GenParser Char st [String] +codeBlockClasses = try $ do + string "#!" + skipMany spaceChar + sepEndBy (many1 alphaNum) (many1 spaceChar) + +codeBlockEnd :: GenParser Char st () +codeBlockEnd = try $ string "}}}" >> skipSpaces >> optional newline >> return () + +codeBlock :: GenParser Char st Block +codeBlock = try $ do + classes <- codeBlockStart + contents <- manyTill anyChar codeBlockEnd + return $ CodeBlock ("",classes,[]) contents + +codeInline :: GenParser Char st Inline +codeInline = try $ do + codeBlockStart' + contents <- manyTill anyChar codeBlockEnd' + return $ Code nullAttr contents + where + codeBlockStart' = string "{{{" >> return () + codeBlockEnd' = try $ string "}}}" >> return () + + +codeBlockIndented :: GenParser Char ParserState Block +codeBlockIndented = try $ do + many1 whitespace >> codeBlockStart + contents <- manyTill anyChar codeBlockEnd + return $ BlockQuote [CodeBlock ([],[],[]) contents] + +-- +-- list blocks +-- + +-- these are just the markdown list parsers + +bulletListStart :: GenParser Char ParserState () +bulletListStart = try $ do + optional newline -- if preceded by a Plain block in a list context + nonindentSpaces + notFollowedBy' hrule -- because hrules start out just like lists + oneOf bulletListMarkers + spaceChar + skipSpaces + +anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim) +anyOrderedListStart = try $ do + optional newline -- if preceded by a Plain block in a list context + nonindentSpaces + notFollowedBy $ string "p." >> spaceChar >> digit -- page number + state <- getState + if stateStrict state + then do many1 digit + char '.' + spaceChar + return (1, DefaultStyle, DefaultDelim) + else do (num, style, delim) <- anyOrderedListMarker + -- if it could be an abbreviated first name, insist on more than one space + if delim == Period && (style == UpperAlpha || (style == UpperRoman && + num `elem` [1, 5, 10, 50, 100, 500, 1000])) + then char '\t' <|> (char ' ' >>~ notFollowedBy (satisfy isUpper)) + else spaceChar + skipSpaces + return (num, style, delim) + +listStart :: GenParser Char ParserState () +listStart = bulletListStart <|> (anyOrderedListStart >> return ()) + +-- parse a line of a list item (start = parser for beginning of list item) +listLine :: GenParser Char ParserState [Char] +listLine = try $ do + notFollowedBy' listStart + notFollowedBy blankline + notFollowedBy' (do indentSpaces + many (spaceChar) + listStart) + notFollowedBy' header + line <- manyTill anyChar newline + return $ line ++ "\n" + +-- parse raw text for one list item, excluding start marker and continuations +rawListItem :: GenParser Char ParserState [Char] +rawListItem = try $ do + listStart + result <- many1 listLine + blanks <- many blankline + return $ concat result ++ blanks + +-- continuation of a list item - indented and separated by blankline +-- or (in compact lists) endline. +-- note: nested lists are parsed as continuations +listContinuation :: GenParser Char ParserState [Char] +listContinuation = try $ do + lookAhead indentSpaces + result <- many1 listContinuationLine + blanks <- many blankline + return $ concat result ++ blanks + +listContinuationLine :: GenParser Char ParserState [Char] +listContinuationLine = try $ do + notFollowedBy blankline + notFollowedBy' listStart + optional indentSpaces + result <- manyTill anyChar newline + return $ result ++ "\n" + +listItem :: GenParser Char ParserState [Block] +listItem = try $ do + first <- rawListItem + continuations <- many listContinuation + -- parsing with ListItemState forces markers at beginning of lines to + -- count as list item markers, even if not separated by blank space. + -- see definition of "endline" + state <- getState + let oldContext = stateParserContext state + setState $ state {stateParserContext = ListItemState} + -- parse the extracted block, which may contain various block elements: + let raw = concat (first:continuations) + contents <- parseFromString parseBlocks raw + updateState (\st -> st {stateParserContext = oldContext}) + return contents + +orderedList :: GenParser Char ParserState Block +orderedList = try $ do + (start, style, delim) <- lookAhead anyOrderedListStart + items <- many1 listItem + return $ OrderedList (start, style, delim) $ compactify items + +bulletList :: GenParser Char ParserState Block +bulletList = try $ do + lookAhead bulletListStart + many1 listItem >>= return . BulletList . compactify + +-- +-- paragraph block +-- + +para :: GenParser Char ParserState Block +para = try $ do + result <- many1 inline + newline + blanklines <|> do lookAhead ((codeBlockStart >> return "") <|> {- blockQuote <|> -} (header >> return "")) + return $ Para $ normalizeSpaces result + +plain :: GenParser Char ParserState Block +plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces + +-- +-- inline +-- + +inline :: GenParser Char ParserState Inline +inline = choice inlineParsers "inline" + +inlineParsers :: [GenParser Char ParserState Inline] +inlineParsers = [ + link + , codeInline + , str + , smartPunctuation inline + , whitespace + , endline + , code + , charRef + , strong + , emph + , strikeout + , superscript + , subscript +-- , escapedChar + , symbol + ] + +symbol :: GenParser Char ParserState Inline +symbol = do + result <- oneOf specialChars + return $ Str [result] + +-- parses inline code, between n `s and n `s +code :: GenParser Char ParserState Inline +code = try $ do + starts <- many1 (char '`') + skipSpaces + result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> + (char '\n' >> return " ")) + (try (skipSpaces >> count (length starts) (char '`') >> + notFollowedBy (char '`'))) + return $ Code nullAttr $ removeLeadingTrailingSpace $ concat result + +emph :: GenParser Char ParserState Inline +emph = (enclosed (string "''") (string "''") inline) >>= return . Emph . normalizeSpaces + +strong :: GenParser Char ParserState Inline +strong = enclosed (string "'''") (string "'''") inline >>= return . Strong . normalizeSpaces + + +strikeout :: GenParser Char ParserState Inline +strikeout = failIfStrict >> enclosed (string "--(") (try $ string ")--") inline >>= + return . Strikeout . normalizeSpaces + +superscript :: GenParser Char ParserState Inline +superscript = failIfStrict >> enclosed (char '^') (char '^') + (notFollowedBy' whitespace >> inline) >>= -- may not contain Space + return . Superscript + +subscript :: GenParser Char ParserState Inline +subscript = failIfStrict >> enclosed (string ",,") (string ",,") + (notFollowedBy' whitespace >> inline) >>= -- may not contain Space + return . Subscript + +whitespace :: GenParser Char ParserState Inline +whitespace = do + sps <- many1 (oneOf spaceChars) + if length sps >= 2 + then option Space (endline >> return LineBreak) + else return Space "whitespace" + +strChar :: GenParser Char st Char +strChar = noneOf (specialChars ++ spaceChars ++ "\n") + +str :: GenParser Char st Inline +str = notFollowedBy' comment >> many1 strChar >>= return . Str + +-- an endline character that can be treated as a space, not a structural break +endline :: GenParser Char ParserState Inline +endline = try $ do + newline + notFollowedBy blankline + notFollowedBy' codeBlockStart + notFollowedBy' listStart + notFollowedBy (char '=') + -- st <- getState + -- if stateStrict st + -- then do notFollowedBy (char '=') -- header + -- else return () + -- parse potential list-starts differently if in a list: + -- if stateParserContext st == ListItemState + -- then notFollowedBy' (bulletListStart <|> + -- (anyOrderedListStart >> return ())) + -- else return () + return Space + +-- +-- links +-- + +-- inlineNonLink :: GenParser Char ParserState Inline +-- inlineNonLink = (choice $ +-- map (\parser -> try (parser >>= failIfLink)) inlineParsers) +-- "inline (non-link)" + +-- failIfLink :: Inline -> GenParser tok st Inline +-- failIfLink (Link _ _) = pzero +-- failIfLink elt = return elt + +-- -- a reference label for a link +-- reference :: GenParser Char ParserState [Inline] +-- reference = do notFollowedBy' (string "[^") -- footnote reference +-- result <- inlinesInBalancedBrackets inlineNonLink +-- return $ normalizeSpaces result + +-- -- source for a link, with optional title +-- source :: GenParser Char st (String, [Char]) +-- source = +-- (try $ charsInBalanced '(' ')' >>= parseFromString source') <|> +-- -- the following is needed for cases like: [ref](/url(a). +-- (enclosed (char '(') (char ')') anyChar >>= +-- parseFromString source') + +-- -- auxiliary function for source +-- source' :: GenParser Char st (String, [Char]) +-- source' = do +-- skipSpaces +-- let sourceURL excludes = many $ +-- optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' linkTitle >> char ' ')) +-- src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n" +-- tit <- option "" linkTitle +-- skipSpaces +-- eof +-- return (intercalate "%20" $ words $ removeTrailingSpace src, tit) + +-- linkTitle :: GenParser Char st String +-- linkTitle = try $ do +-- (many1 spaceChar >> option '\n' newline) <|> newline +-- skipSpaces +-- delim <- oneOf "'\"" +-- tit <- manyTill (optional (char '\\') >> anyChar) +-- (try (char delim >> skipSpaces >> eof)) +-- return $ decodeCharacterReferences tit + +link :: GenParser Char ParserState Inline +link = choice [uriLink + ,emailAddressLink + ,localPageCamelCaseLink + ,moin15BracketLink + ,moin16BracketLink + ] + +uriLink :: GenParser Char ParserState Inline +uriLink = try $ do + (u, uri_escaped) <- uri + return $ Link [Code nullAttr u] (uri_escaped, "") + +emailAddressLink :: GenParser Char ParserState Inline +emailAddressLink = try $ do + (e, escaped_mailto_uri) <- emailAddress + return $ Link [Str e] (escaped_mailto_uri, "") + +localPageCamelCaseLink :: GenParser Char ParserState Inline +localPageCamelCaseLink = try $ do + (p,_) <- localPageCamelCase + return $ Link [Str p] (p, "") + +moin15BracketLink :: GenParser Char ParserState Inline +moin15BracketLink = try $ do + (target,label) <- singleBracketed $ choice [ + uriSpaceLabel + ,uriNoLabel + ,localPageInQuotes + ,localPageWithColonLabel + ,localPageCamelCase + ] + return $ Link [Str label] (target, "") + +moin16BracketLink :: GenParser Char ParserState Inline +moin16BracketLink = try $ do + (target,label) <- doubleBracketed $ choice [ + uriPipeLabel + ,uriNoLabel + -- ,localPageInQuotes + -- ,localPageWithColonLabel + -- ,localPageCamelCase + ] + return $ Link [Str label] (target, "") + +uriSpaceLabel :: GenParser Char ParserState (String,String) +uriSpaceLabel = try $ do + (_, uri_escaped) <- uri + many1 space + label <- many1 $ noneOf "]" + return (uri_escaped, label) + +uriPipeLabel :: GenParser Char ParserState (String,String) +uriPipeLabel = try $ do + (_, uri_escaped) <- uri + char '|' + label <- many1 $ noneOf "]" + return (uri_escaped, label) + +uriNoLabel :: GenParser Char ParserState (String,String) +uriNoLabel = try $ do + skipSpaces + s <- many1 $ noneOf "]" + skipSpaces + -- work around uri failing when there is a trailing ] + state <- getState + either (const $ fail "") (const $ return (s,s)) $ runParser uri state "" s + +localPageInQuotes :: GenParser Char ParserState (String,String) +localPageInQuotes = try $ do + char '"' + p <- many1 $ noneOf "\"" + char '"' + return (p,p) + +-- I didn't find this in any moin docs, but it's in the darcs wiki +-- and moin was handling it +localPageWithColonLabel :: GenParser Char ParserState (String,String) +localPageWithColonLabel = try $ do + char ':' + p <- many1 $ noneOf ":" + char ':' + label <- many1 $ noneOf "]" + return (p,label) + +localPageCamelCase :: GenParser Char ParserState (String,String) +localPageCamelCase = try $ do + w <- initialCapWord + ws <- many1 initialCapWord + let p = concat $ [w]++ws + return (p,p) + +initialCapWord :: GenParser Char ParserState String +initialCapWord = try $ do + c <- upperChar + cs <- many1 lowerChar + return $ [c]++cs + +upperChar, lowerChar :: GenParser Char ParserState Char +upperChar = oneOf "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +lowerChar = oneOf "abcdefghijklmnopqrstuvwxyz" + +-- image :: GenParser Char ParserState Inline +-- image = try $ do +-- char '!' +-- (Link lab src) <- link +-- return $ Image lab src + +singleBracketed :: (GenParser Char st a) -> GenParser Char st a +singleBracketed parser = do + string "[" + contents <- parser + string "]" + return contents + +doubleBracketed :: (GenParser Char st a) -> GenParser Char st a +doubleBracketed parser = do + string "[[" + contents <- parser + string "]]" + return contents + +-- +-- auxiliary functions +-- + +indentSpaces :: GenParser Char ParserState [Char] +indentSpaces = try $ do + state <- getState + let tabStop = stateTabStop state + try (count tabStop (char ' ')) <|> + (many (char ' ') >> string "\t") "indentation" + +nonindentSpaces :: GenParser Char ParserState [Char] +nonindentSpaces = do + state <- getState + let tabStop = stateTabStop state + sps <- many (char ' ') + if length sps < tabStop + then return sps + else unexpected "indented line" + From 435352eaff290187255cfefee05c2f775e9d9ac7 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Tue, 17 Feb 2026 10:50:23 +0000 Subject: [PATCH 17/76] (DEV) some test text from Simon's plugin --- testmoin.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/testmoin.txt b/testmoin.txt index 03533f231ad9..8e9b3097edc7 100644 --- a/testmoin.txt +++ b/testmoin.txt @@ -7,7 +7,7 @@ * [[http://jmtd.net|jmtd.net]] * another bullet - ''mostly'' meta '''indeed'''. This is __useful__. +''mostly'' meta '''indeed'''. This is __useful__. what does `monospace` (look) like? different to ? @@ -15,6 +15,13 @@ what goes ^up^, must ] come ,,down,,. what --(happened in)-- there? +This is ~+larger+~ text, and this is ~-smaller-~. + +HelpOnEditing/SubPages should be a single link. +/SubPage should be a link. +Wiki''''''Name should result in plain string WikiName. +Same with !WikiName. + ---- CategoryHomepage From a3ab7a9e47543f09db903176fb9e8672683155f0 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Tue, 17 Feb 2026 10:55:33 +0000 Subject: [PATCH 18/76] Swap order of bold/italic --- src/Text/Pandoc/Readers/MoinMoin.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index d1bacd63f507..29249005abd0 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -107,8 +107,8 @@ inline :: PandocMonad m => MoinParser m B.Inlines inline = whitespace <|> camelCaseLink <|> str - <|> italic <|> bold + <|> italic <|> underline <|> superscript <|> subscript From 01cd03965f79b002534545a7650af4058c503ba9 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Tue, 17 Feb 2026 11:26:38 +0000 Subject: [PATCH 19/76] (DEV) reformat TODO and tick stuff off --- SimonMoin.hs | 81 ++++++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/SimonMoin.hs b/SimonMoin.hs index 3bbb93ba9e87..1193fa442d3a 100644 --- a/SimonMoin.hs +++ b/SimonMoin.hs @@ -1,4 +1,4 @@ -{- +{- Copyright (C) 2009 Simon Michael This program is free software; you can redistribute it and/or modify @@ -19,32 +19,39 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.MoinMoin Copyright : Copyright (C) 2009-2011 Simon Michael, ranft, John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable Partial conversion from MoinMoin-formatted text (plus some pandoc-isms -like smart punctuation) to Pandoc. Based on the Markdown reader. +like smart punctuation) to Pandoc. Based on the Markdown reader. TODO: -[ ] subscript: ,,sub,, -[ ] strikeout: --(stroke)-- -[ ] larger: ~+larger+~ [ignore] -[ ] smaller: +-smaller-+ [ignore] +[X] subscript: ,,sub,, +[X] strikeout: --(stroke)-- +[X] larger: ~+larger+~ [ignore] + * [ ] add attribute +[X] smaller: +-smaller-+ [ignore] + * [ ] add attribute [ ] table of contents: <> or <> -[ ] moin 1.6 double bracket links: [[FrontPage]], [[FrontPage|named link]], - [[#anchorname]], [[#anchorname|description]], [[PageName#anchorname]], - [[PageName#anchorname|description]], [[attachment:filename.txt]] -[ ] HelpOnEditing/SubPages should be a single link -[ ] /SubPage should be a link +[ ] moin 1.6 double bracket links: + [X] [[FrontPage]], + [ ] [[FrontPage|named link]], + [ ] [[#anchorname]], + [ ] [[#anchorname|description]], + [ ] [[PageName#anchorname]], + [ ] [[PageName#anchorname|description]], + [ ] [[attachment:filename.txt]] +[X] HelpOnEditing/SubPages should be a single link +[X] /SubPage should be a link [ ] Wiki''''''Name should result in plain string WikiName -[ ] Same with !WikiName +[X] Same with !WikiName [ ] WikiName''''''s - the s should not be in the link [ ] WikiName``s - the s should not be in the link [ ] {{http://static.moinmo.in/logos/moinmoin.png}} should be an image -[ ] camel-case links assume ascii letters +[X] camel-case links assume ascii letters [ ] indented blockquotes [ ] definition lists [ ] nested/multiply-indented lists, blocks, code blocks @@ -90,7 +97,7 @@ import Control.Monad ( when ) import Data.Char ( isUpper ) import Text.Pandoc.Definition import Text.Pandoc.Parsing -import Text.Pandoc.Shared +import Text.Pandoc.Shared import Text.ParserCombinators.Parsec hiding ( label ) -- | Parse MoinMoin string and return Pandoc document. @@ -121,7 +128,7 @@ specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221" parseMoinMoin :: GenParser Char ParserState Pandoc parseMoinMoin = do processingInstructions - blocks <- parseBlocks + blocks <- parseBlocks return $ Pandoc (Meta [] [] [] {-title author date-}) $ filter (/= Null) blocks processingInstructions :: GenParser Char a () @@ -132,7 +139,7 @@ comment = try $ do pos <- getPosition when (sourceColumn pos /= 0) $ fail "" string "##" - manyTill anyChar newline + manyTill anyChar newline return () -- @@ -189,12 +196,12 @@ hrule = try $ do -- code blocks -- -codeBlockStart :: GenParser Char st [String] +codeBlockStart :: GenParser Char st [String] codeBlockStart = try $ do string "{{{" classes <- option [] codeBlockClasses optional newline - return classes + return classes codeBlockClasses :: GenParser Char st [String] codeBlockClasses = try $ do @@ -242,7 +249,7 @@ bulletListStart = try $ do spaceChar skipSpaces -anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim) +anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim) anyOrderedListStart = try $ do optional newline -- if preceded by a Plain block in a list context nonindentSpaces @@ -285,7 +292,7 @@ rawListItem = try $ do blanks <- many blankline return $ concat result ++ blanks --- continuation of a list item - indented and separated by blankline +-- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations listContinuation :: GenParser Char ParserState [Char] @@ -304,7 +311,7 @@ listContinuationLine = try $ do return $ result ++ "\n" listItem :: GenParser Char ParserState [Block] -listItem = try $ do +listItem = try $ do first <- rawListItem continuations <- many listContinuation -- parsing with ListItemState forces markers at beginning of lines to @@ -335,7 +342,7 @@ bulletList = try $ do -- para :: GenParser Char ParserState Block -para = try $ do +para = try $ do result <- many1 inline newline blanklines <|> do lookAhead ((codeBlockStart >> return "") <|> {- blockQuote <|> -} (header >> return "")) @@ -344,7 +351,7 @@ para = try $ do plain :: GenParser Char ParserState Block plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces --- +-- -- inline -- @@ -371,18 +378,18 @@ inlineParsers = [ ] symbol :: GenParser Char ParserState Inline -symbol = do +symbol = do result <- oneOf specialChars return $ Str [result] -- parses inline code, between n `s and n `s code :: GenParser Char ParserState Inline -code = try $ do +code = try $ do starts <- many1 (char '`') skipSpaces result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> - (char '\n' >> return " ")) - (try (skipSpaces >> count (length starts) (char '`') >> + (char '\n' >> return " ")) + (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) return $ Code nullAttr $ removeLeadingTrailingSpace $ concat result @@ -391,21 +398,21 @@ emph = (enclosed (string "''") (string "''") inline) >>= return . Emph . normali strong :: GenParser Char ParserState Inline strong = enclosed (string "'''") (string "'''") inline >>= return . Strong . normalizeSpaces - + strikeout :: GenParser Char ParserState Inline strikeout = failIfStrict >> enclosed (string "--(") (try $ string ")--") inline >>= return . Strikeout . normalizeSpaces superscript :: GenParser Char ParserState Inline -superscript = failIfStrict >> enclosed (char '^') (char '^') +superscript = failIfStrict >> enclosed (char '^') (char '^') (notFollowedBy' whitespace >> inline) >>= -- may not contain Space return . Superscript subscript :: GenParser Char ParserState Inline subscript = failIfStrict >> enclosed (string ",,") (string ",,") (notFollowedBy' whitespace >> inline) >>= -- may not contain Space - return . Subscript + return . Subscript whitespace :: GenParser Char ParserState Inline whitespace = do @@ -429,12 +436,12 @@ endline = try $ do notFollowedBy' listStart notFollowedBy (char '=') -- st <- getState - -- if stateStrict st + -- if stateStrict st -- then do notFollowedBy (char '=') -- header - -- else return () + -- else return () -- parse potential list-starts differently if in a list: -- if stateParserContext st == ListItemState - -- then notFollowedBy' (bulletListStart <|> + -- then notFollowedBy' (bulletListStart <|> -- (anyOrderedListStart >> return ())) -- else return () return Space @@ -479,7 +486,7 @@ endline = try $ do -- return (intercalate "%20" $ words $ removeTrailingSpace src, tit) -- linkTitle :: GenParser Char st String --- linkTitle = try $ do +-- linkTitle = try $ do -- (many1 spaceChar >> option '\n' newline) <|> newline -- skipSpaces -- delim <- oneOf "'\"" @@ -617,7 +624,7 @@ indentSpaces :: GenParser Char ParserState [Char] indentSpaces = try $ do state <- getState let tabStop = stateTabStop state - try (count tabStop (char ' ')) <|> + try (count tabStop (char ' ')) <|> (many (char ' ') >> string "\t") "indentation" nonindentSpaces :: GenParser Char ParserState [Char] @@ -625,7 +632,7 @@ nonindentSpaces = do state <- getState let tabStop = stateTabStop state sps <- many (char ' ') - if length sps < tabStop + if length sps < tabStop then return sps else unexpected "indented line" From 5d30f94f63c93a88256af5391f37ca6b4d0d79ae Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Tue, 17 Feb 2026 11:26:50 +0000 Subject: [PATCH 20/76] Handle WikiLink sub-pages --- src/Text/Pandoc/Readers/MoinMoin.hs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 29249005abd0..bd5f606ebc56 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -130,9 +130,12 @@ many2 p = do camelWord :: PandocMonad m => MoinParser m String camelWord = do - f <- satisfy isUpper - rest <- many1 (satisfy (\c -> isAlphaNum c && not (isUpper c))) - return (f:rest) + slash <- optionMaybe (char '/') + f <- satisfy isUpper + rest <- many1 (satisfy (\c -> isAlphaNum c && not (isUpper c))) + return $ case slash of + Nothing -> f:rest + Just s -> s:f:rest camelCaseLink :: PandocMonad m => MoinParser m B.Inlines camelCaseLink = try $ do @@ -202,7 +205,7 @@ externalLink = do -- from Readers.Mediawiki specialChars :: [Char] -specialChars = "'[]<=&*{}|\":\\_^,~-+()" +specialChars = "'[]<=&*{}|\":\\_^,~-+()/" -- from Readers.Mediawiki spaceChars :: [Char] From 4c5ff55576af79bf8be541585415e2ba2b39f6ba Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Tue, 17 Feb 2026 14:38:39 +0000 Subject: [PATCH 21/76] Initial stub Tests.Readers.MoinMoin --- pandoc.cabal | 1 + test/Tests/Readers/MoinMoin.hs | 30 ++++++++++++++++++++++++++++++ test/test-pandoc.hs | 4 +++- 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 test/Tests/Readers/MoinMoin.hs diff --git a/pandoc.cabal b/pandoc.cabal index d6eb53f3396c..7c78f461ee30 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -868,6 +868,7 @@ test-suite test-pandoc Tests.Readers.JATS Tests.Readers.Jira Tests.Readers.Markdown + Tests.Readers.MoinMoin Tests.Readers.Org Tests.Readers.Org.Block Tests.Readers.Org.Block.CodeBlock diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs new file mode 100644 index 000000000000..af51feb21977 --- /dev/null +++ b/test/Tests/Readers/MoinMoin.hs @@ -0,0 +1,30 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Tests.Readers.MoinMoin + Copyright : © 2026 Jonathan Dowland + License : GNU GPL, version 2 or above + + Maintainer : Jonathan Dowland + Stability : alpha + Portability : portable + +Tests for the MoinMoin reader. +-} +module Tests.Readers.MoinMoin (tests) where + +import Text.Pandoc -- readMoinMoin +import Text.Pandoc.Sources -- toSources +import Test.Tasty +import Test.Tasty.HUnit +import qualified Data.Text as T +import Data.Either (fromRight) + +-- @?= imposes Eq for which PandocError doesn't have an instance +-- so we remove the Either layer, replacing errors with nullDoc. +nullDoc = Pandoc nullMeta [] +runMM = fromRight nullDoc . runPure . readMoinMoin def . toSources . T.pack + +tests :: [TestTree] +tests = + [ testCase "zomg" $ runMM "hi" @?= Pandoc nullMeta [Para [Str "hi"]] + ] diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs index 9ae97d9c0af9..b96e7462b8bf 100644 --- a/test/test-pandoc.hs +++ b/test/test-pandoc.hs @@ -22,6 +22,7 @@ import qualified Tests.Readers.JATS import qualified Tests.Readers.Jira import qualified Tests.Readers.LaTeX import qualified Tests.Readers.Markdown +import qualified Tests.Readers.MoinMoin import qualified Tests.Readers.Muse import qualified Tests.Readers.ODT import qualified Tests.Readers.Org @@ -90,6 +91,7 @@ tests pandocPath = testGroup "pandoc tests" , testGroup "Readers" [ testGroup "LaTeX" Tests.Readers.LaTeX.tests , testGroup "Markdown" Tests.Readers.Markdown.tests + , testGroup "MoinMoin" Tests.Readers.MoinMoin.tests , testGroup "HTML" Tests.Readers.HTML.tests , testGroup "JATS" Tests.Readers.JATS.tests , testGroup "Jira" Tests.Readers.Jira.tests @@ -128,4 +130,4 @@ main = do _ -> inDirectory "test" $ do fp <- getExecutablePath -- putStrLn $ "Using pandoc executable at " ++ fp - defaultMain $ tests fp + defaultMain $ tests fp \ No newline at end of file From 20935b0bf2eab53fc0e96305b7b8c02936c9526b Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Tue, 17 Feb 2026 14:52:11 +0000 Subject: [PATCH 22/76] basic inline tests for MoinMoin --- test/Tests/Readers/MoinMoin.hs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index af51feb21977..e1172833ccbe 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -21,10 +21,26 @@ import Data.Either (fromRight) -- @?= imposes Eq for which PandocError doesn't have an instance -- so we remove the Either layer, replacing errors with nullDoc. +nullDoc :: Pandoc nullDoc = Pandoc nullMeta [] +runMM :: String -> Pandoc runMM = fromRight nullDoc . runPure . readMoinMoin def . toSources . T.pack tests :: [TestTree] tests = - [ testCase "zomg" $ runMM "hi" @?= Pandoc nullMeta [Para [Str "hi"]] + [ testCase "basic" $ runMM "hi" @?= Pandoc nullMeta [Para [Str "hi"]] + , testCase "bold" $ runMM "'''hi'''" @?= Pandoc nullMeta [Para [Strong [Str "hi"]]] + , testCase "italic" $ runMM "''hi''" @?= Pandoc nullMeta [Para [Emph [Str "hi"]]] + , testCase "underline" $ runMM "__hi__" @?= Pandoc nullMeta [Para [Underline [Str "hi"]]] + + , testCase "italic then bold" + $ runMM "''hello'' '''world'''" @?= + Pandoc nullMeta [Para [Emph [Str "hello"], Space,Strong [Str "world"]]] + + , testCase "bold then italic" + $ runMM "'''hello''' ''world''" @?= + Pandoc nullMeta [Para [Strong [Str "hello"], Space,Emph [Str "world"]]] ] + +main :: IO () +main = defaultMain $ testGroup "." tests From ddf3b2b1333d5f8dc193e1c774c03d78a1ac0eb1 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Tue, 17 Feb 2026 14:59:06 +0000 Subject: [PATCH 23/76] helper functions to reduce the verbosity of `tests` --- test/Tests/Readers/MoinMoin.hs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index e1172833ccbe..35565231b7f6 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -26,20 +26,23 @@ nullDoc = Pandoc nullMeta [] runMM :: String -> Pandoc runMM = fromRight nullDoc . runPure . readMoinMoin def . toSources . T.pack +readsTo :: String -> [Block] -> Assertion +readsTo s b = runMM s @?= Pandoc nullMeta b + tests :: [TestTree] tests = - [ testCase "basic" $ runMM "hi" @?= Pandoc nullMeta [Para [Str "hi"]] - , testCase "bold" $ runMM "'''hi'''" @?= Pandoc nullMeta [Para [Strong [Str "hi"]]] - , testCase "italic" $ runMM "''hi''" @?= Pandoc nullMeta [Para [Emph [Str "hi"]]] - , testCase "underline" $ runMM "__hi__" @?= Pandoc nullMeta [Para [Underline [Str "hi"]]] + [ testCase "basic" $ "hi" `readsTo` [Para [Str "hi"]] + , testCase "bold" $ "'''hi'''" `readsTo` [Para [Strong [Str "hi"]]] + , testCase "italic" $ "''hi''" `readsTo` [Para [Emph [Str "hi"]]] + , testCase "underline" $ "__hi__" `readsTo` [Para [Underline [Str "hi"]]] , testCase "italic then bold" - $ runMM "''hello'' '''world'''" @?= - Pandoc nullMeta [Para [Emph [Str "hello"], Space,Strong [Str "world"]]] + $ "''hello'' '''world'''" `readsTo` + [Para [Emph [Str "hello"], Space,Strong [Str "world"]]] , testCase "bold then italic" - $ runMM "'''hello''' ''world''" @?= - Pandoc nullMeta [Para [Strong [Str "hello"], Space,Emph [Str "world"]]] + $ "'''hello''' ''world''" `readsTo` + [Para [Strong [Str "hello"], Space,Emph [Str "world"]]] ] main :: IO () From be4c4cffc4cea539d07c6b03effb6ee78968ddf9 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Tue, 17 Feb 2026 15:03:19 +0000 Subject: [PATCH 24/76] headings tests --- test/Tests/Readers/MoinMoin.hs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 35565231b7f6..291839bb308d 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -43,6 +43,13 @@ tests = , testCase "bold then italic" $ "'''hello''' ''world''" `readsTo` [Para [Strong [Str "hello"], Space,Emph [Str "world"]]] + + , testCase "heading 1" $ "= 1 =" `readsTo` [Header 1 ("",[],[]) [Str "1"]] + , testCase "heading 2" $ "== 2 ==" `readsTo` [Header 2 ("",[],[]) [Str "2"]] + , testCase "heading 3" $ "=== 3 ===" `readsTo` [Header 3 ("",[],[]) [Str "3"]] + , testCase "heading 4" $ "==== 4 ====" `readsTo` [Header 4 ("",[],[]) [Str "4"]] + , testCase "heading 5" $ "===== 5 =====" `readsTo` [Header 5 ("",[],[]) [Str "5"]] + , testCase "no heading 6" $ "====== 6 ======" `readsTo` [Para [Str "======",Space,Str "6",Space,Str "======"]] ] main :: IO () From 11627bf67ac057bfc23e088352d46a4fb5673ca1 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Tue, 17 Feb 2026 17:14:20 +0000 Subject: [PATCH 25/76] testcase: italic and bold --- test/Tests/Readers/MoinMoin.hs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 291839bb308d..395ef5f644c0 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -36,13 +36,9 @@ tests = , testCase "italic" $ "''hi''" `readsTo` [Para [Emph [Str "hi"]]] , testCase "underline" $ "__hi__" `readsTo` [Para [Underline [Str "hi"]]] - , testCase "italic then bold" - $ "''hello'' '''world'''" `readsTo` - [Para [Emph [Str "hello"], Space,Strong [Str "world"]]] - - , testCase "bold then italic" - $ "'''hello''' ''world''" `readsTo` - [Para [Strong [Str "hello"], Space,Emph [Str "world"]]] + , testCase "italic and bold" $ + "'''''hello world'''''" `readsTo` + [Para [Strong [Emph [Str "hello", Space, Str "world"]]]] , testCase "heading 1" $ "= 1 =" `readsTo` [Header 1 ("",[],[]) [Str "1"]] , testCase "heading 2" $ "== 2 ==" `readsTo` [Header 2 ("",[],[]) [Str "2"]] From 1fcf8b5a913023aae8474f06713838363cbc5c1e Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Tue, 17 Feb 2026 17:22:48 +0000 Subject: [PATCH 26/76] tests for superscript, subscript, strikout --- test/Tests/Readers/MoinMoin.hs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 395ef5f644c0..70fab1b971e6 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -46,6 +46,13 @@ tests = , testCase "heading 4" $ "==== 4 ====" `readsTo` [Header 4 ("",[],[]) [Str "4"]] , testCase "heading 5" $ "===== 5 =====" `readsTo` [Header 5 ("",[],[]) [Str "5"]] , testCase "no heading 6" $ "====== 6 ======" `readsTo` [Para [Str "======",Space,Str "6",Space,Str "======"]] + + , testCase "superscript" $ "^2^" `readsTo` [Para [Superscript [Str "2"]]] + , testCase "subscript" $ ",,low,," `readsTo` [Para [Subscript [Str "low"]]] + + -- XXX: add tests for annotations + , testCase "strikeout" $ "--(delete)--" `readsTo` [Para [Strikeout [Str "delete"]]] + ] main :: IO () From 02b0cf216e35c590efdef025fdf521fc22a7e413 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Tue, 17 Feb 2026 19:02:49 +0000 Subject: [PATCH 27/76] tests for larger,smaller, some link types --- SimonMoin.hs | 14 -------------- test/Tests/Readers/MoinMoin.hs | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/SimonMoin.hs b/SimonMoin.hs index 1193fa442d3a..e652b66c76df 100644 --- a/SimonMoin.hs +++ b/SimonMoin.hs @@ -29,27 +29,13 @@ Partial conversion from MoinMoin-formatted text (plus some pandoc-isms like smart punctuation) to Pandoc. Based on the Markdown reader. TODO: -[X] subscript: ,,sub,, -[X] strikeout: --(stroke)-- -[X] larger: ~+larger+~ [ignore] - * [ ] add attribute -[X] smaller: +-smaller-+ [ignore] - * [ ] add attribute [ ] table of contents: <> or <> [ ] moin 1.6 double bracket links: - [X] [[FrontPage]], - [ ] [[FrontPage|named link]], [ ] [[#anchorname]], [ ] [[#anchorname|description]], [ ] [[PageName#anchorname]], [ ] [[PageName#anchorname|description]], [ ] [[attachment:filename.txt]] -[X] HelpOnEditing/SubPages should be a single link -[X] /SubPage should be a link -[ ] Wiki''''''Name should result in plain string WikiName -[X] Same with !WikiName -[ ] WikiName''''''s - the s should not be in the link -[ ] WikiName``s - the s should not be in the link [ ] {{http://static.moinmo.in/logos/moinmoin.png}} should be an image [X] camel-case links assume ascii letters [ ] indented blockquotes diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 70fab1b971e6..ac323fc2a1fb 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -52,6 +52,20 @@ tests = -- XXX: add tests for annotations , testCase "strikeout" $ "--(delete)--" `readsTo` [Para [Strikeout [Str "delete"]]] + , testCase "larger" $ "~+larger+~" `readsTo` [Para [Str "larger"]] + , testCase "smaller" $ "~-smaller-~" `readsTo` [Para [Str "smaller"]] + + , testGroup "links" + [ testCase "CamelCase" $ "FooBar" `readsTo` [Para [Link ("",[],[]) [Str "FooBar"] ("FooBar","")]] + , testCase "/SubCase1" $ "/SubCase1" `readsTo` [Para [Link ("",[],[]) [Str "/SubCase1"] ("/SubCase1","")]] + , testCase "Sub/Case2" $ "Sub/Case2" `readsTo` [Para [Link ("",[],[]) [Str "Sub/Case2"] ("Sub/Case2","")]] + , testCase "bracket1" $ "[[foo]]" `readsTo` [Para [Link ("",[],[]) [Str "foo"] ("foo","")]] + , testCase "labelled" $ "[[foo|bar]]"`readsTo` [Para [Link ("",[],[]) [Str "bar"] ("foo","")]] + , testCase "banglink" $ "!NotLink" `readsTo` [Para [Str "!NotLink"]] + , testCase "notalink" $ "Not''''''Link" `readsTo` [Para [Str "NotLink"]] + , testCase "singular1" $ "SinGular''''''s" `readsTo` [Para [Link ("",[],[]) [Str "SinGular"] ("SinGular","")]] + , testCase "singular2" $ "SinGular``s" `readsTo` [Para [Link ("",[],[]) [Str "SinGular"] ("SinGular","")]] + ] ] From ff32f81b9783905802e1a1606baff55effddfe2d Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Tue, 17 Feb 2026 19:06:49 +0000 Subject: [PATCH 28/76] add Simon's copyright This is now partially derived from Simon's earlier work --- test/Tests/Readers/MoinMoin.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index ac323fc2a1fb..5e8d23b14bdb 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.MoinMoin - Copyright : © 2026 Jonathan Dowland + Copyright : © 2026 Jonathan Dowland, © 2009 Simon Michael License : GNU GPL, version 2 or above Maintainer : Jonathan Dowland From 33890cf536efb3d1d0326cb9912b8589aeac93c1 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Wed, 18 Feb 2026 11:57:05 +0000 Subject: [PATCH 29/76] unicode copyright symbol --- src/Text/Pandoc/Readers/MoinMoin.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index bd5f606ebc56..8122125880ce 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.MoinMoin - Copyright : Copyright (C) 2026 Jonathan Dowland + Copyright : Copyright © 2026 Jonathan Dowland License : GNU GPL, version 2 or above Maintainer : Jonathan Dowland From 133d691d1253a8a40bc84eafee68db9b0d95256f Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Wed, 18 Feb 2026 12:43:05 +0000 Subject: [PATCH 30/76] fix parse result of trailing 's' for two tests --- test/Tests/Readers/MoinMoin.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 5e8d23b14bdb..6c77f7a1a0f4 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -63,8 +63,8 @@ tests = , testCase "labelled" $ "[[foo|bar]]"`readsTo` [Para [Link ("",[],[]) [Str "bar"] ("foo","")]] , testCase "banglink" $ "!NotLink" `readsTo` [Para [Str "!NotLink"]] , testCase "notalink" $ "Not''''''Link" `readsTo` [Para [Str "NotLink"]] - , testCase "singular1" $ "SinGular''''''s" `readsTo` [Para [Link ("",[],[]) [Str "SinGular"] ("SinGular","")]] - , testCase "singular2" $ "SinGular``s" `readsTo` [Para [Link ("",[],[]) [Str "SinGular"] ("SinGular","")]] + , testCase "singular1" $ "SinGular''''''s" `readsTo` [Para [Link ("",[],[]) [Str "SinGular"] ("SinGular",""), Str "s"]] + , testCase "singular2" $ "SinGular``s" `readsTo` [Para [Link ("",[],[]) [Str "SinGular"] ("SinGular",""), Str "s"]] ] ] From df4d45dfcfbfd3fc66945b31248fde2d01b207b7 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Wed, 18 Feb 2026 12:43:22 +0000 Subject: [PATCH 31/76] new test for space within bold delimiters Oddly, Pandoc's `enclosed` forbids spaces after the opening delimiter. As far as I know this is valid in MoinMoin. --- test/Tests/Readers/MoinMoin.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 6c77f7a1a0f4..4eb7101660fc 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -40,6 +40,9 @@ tests = "'''''hello world'''''" `readsTo` [Para [Strong [Emph [Str "hello", Space, Str "world"]]]] + -- Pandoc's "enclosed" fails this + , testCase "boldSpace" $ "''' hi'''" `readsTo` [Para [Strong [Space,Str "hi"]]] + , testCase "heading 1" $ "= 1 =" `readsTo` [Header 1 ("",[],[]) [Str "1"]] , testCase "heading 2" $ "== 2 ==" `readsTo` [Header 2 ("",[],[]) [Str "2"]] , testCase "heading 3" $ "=== 3 ===" `readsTo` [Header 3 ("",[],[]) [Str "3"]] From ffc990c65ad3e31d5d51c392f4f56d5b1a1d40f9 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Wed, 18 Feb 2026 11:57:17 +0000 Subject: [PATCH 32/76] WIP: rework 'bold' to have optional internal inlines --- src/Text/Pandoc/Readers/MoinMoin.hs | 14 +++++++++++++- test/Tests/Readers/MoinMoin.hs | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 8122125880ce..6f1480e939ac 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -1,4 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts #-} +-- ^^ needed temp for enclosed' {- | Module : Text.Pandoc.Readers.MoinMoin Copyright : Copyright © 2026 Jonathan Dowland @@ -160,8 +162,18 @@ formatter delim inliner = italic :: PandocMonad m => MoinParser m B.Inlines italic = formatter ("''") B.emph + +-- this has broken "''''' hello world'''''" bold :: PandocMonad m => MoinParser m B.Inlines -bold = formatter ("'''") B.strong +bold = do + inner <- enclosed' delim delim inline + if null inner + then return (B.fromList []) + else (return . B.strong . mconcat) inner + where delim = string ("'''") + enclosed' start end parser = try $ + start >> manyTill parser end + -- monospace: B.code (Code Attr Text) needs different handling -- code: as monospace -- however, B.code :: Text -> Inlines diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 4eb7101660fc..3290cae965ac 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -36,6 +36,7 @@ tests = , testCase "italic" $ "''hi''" `readsTo` [Para [Emph [Str "hi"]]] , testCase "underline" $ "__hi__" `readsTo` [Para [Underline [Str "hi"]]] + -- broken , testCase "italic and bold" $ "'''''hello world'''''" `readsTo` [Para [Strong [Emph [Str "hello", Space, Str "world"]]]] From 4737152abfabe687c08638436b6451608f61c5f8 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Wed, 18 Feb 2026 15:25:52 +0000 Subject: [PATCH 33/76] Initial monospace (-> Code) --- src/Text/Pandoc/Readers/MoinMoin.hs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 6f1480e939ac..52e58ccd6348 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -110,6 +110,7 @@ inline = whitespace <|> camelCaseLink <|> str <|> bold + <|> monospace <|> italic <|> underline <|> superscript @@ -165,7 +166,7 @@ italic = formatter ("''") B.emph -- this has broken "''''' hello world'''''" bold :: PandocMonad m => MoinParser m B.Inlines -bold = do +bold = try $ do inner <- enclosed' delim delim inline if null inner then return (B.fromList []) @@ -174,9 +175,14 @@ bold = do enclosed' start end parser = try $ start >> manyTill parser end --- monospace: B.code (Code Attr Text) needs different handling --- code: as monospace --- however, B.code :: Text -> Inlines +monospace :: PandocMonad m => MoinParser m B.Inlines +monospace = try $ do + char '`' + inner <- manyTill anyChar (char '`') + if null inner + then return (B.fromList []) + else (return . B.code . T.pack) inner + underline :: PandocMonad m => MoinParser m B.Inlines underline = formatter ("__") B.underline superscript :: PandocMonad m => MoinParser m B.Inlines @@ -217,7 +223,7 @@ externalLink = do -- from Readers.Mediawiki specialChars :: [Char] -specialChars = "'[]<=&*{}|\":\\_^,~-+()/" +specialChars = "'[]<=&*{}|\":\\_^,~-+()/`" -- from Readers.Mediawiki spaceChars :: [Char] From dddcd8c1d828a494b6b77aa0ac2efc4cd964300b Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Wed, 18 Feb 2026 20:05:19 +0000 Subject: [PATCH 34/76] use Text.Show.Pretty to pretty-print Pandoc type use Text.Show.Pretty (available due to Writers.Native) to pretty-print an instance of Pandoc to ease eyeballing. --- testMoin.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/testMoin.hs b/testMoin.hs index 39abce8459fd..d3425a198034 100644 --- a/testMoin.hs +++ b/testMoin.hs @@ -10,6 +10,8 @@ import Text.Parsec import Text.Parsec.Char import Data.Char -- isUpper etc +import Text.Show.Pretty + sampleMW = T.pack "\ \= sample mediawiki doc =\n\ \\n\ @@ -40,7 +42,7 @@ main = do sampleMM <- TIO.readFile "testmoin.txt" - (putStrLn . show . parseMM) sampleMM + (putStrLn . ppShow . parseMM) sampleMM putStrLn "\n##################################\n" result <- runIO $ From 320cf1f4c00ca0e06caca13427425d4f55c491e7 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 19 Feb 2026 09:50:47 +0000 Subject: [PATCH 35/76] Comment about pre-2007 MoinMoin syntax --- src/Text/Pandoc/Readers/MoinMoin.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 52e58ccd6348..e8ed53544d10 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -205,6 +205,8 @@ stroke = enclosed (string "--(") (string ")--") inline >>= special :: PandocMonad m => MoinParser m B.Inlines special = B.str . T.singleton <$> oneOf specialChars +-- MoinMoin < 1.6.0 (~2007-12) supported a single-bracket +-- external link syntax. We don't attempt to support that. externalLink :: PandocMonad m => MoinParser m B.Inlines externalLink = do string "[[" From 7d5196762d7afec150e3a05a599178cf55cee969 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 19 Feb 2026 09:50:57 +0000 Subject: [PATCH 36/76] improve debugging routines --- src/Text/Pandoc/Readers/MoinMoin.hs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index e8ed53544d10..67eb20d58f09 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -20,11 +20,12 @@ import Control.Monad.Except (throwError) import Data.Char -- isUpper, isAlphaNum import Text.Pandoc.Definition import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) -import Text.Pandoc.Class (runPure) -- debug +import Text.Pandoc.Class (runPure, PandocPure (..)) -- debug import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Parsing import qualified Text.Pandoc.Builder as B import qualified Data.Text as T +import Data.Either (fromRight) -- | Read MoinMoin from an input string and return a Pandoc document. readMoinMoin :: (PandocMonad m, ToSources a) => ReaderOptions -> a -> m Pandoc @@ -231,7 +232,14 @@ specialChars = "'[]<=&*{}|\":\\_^,~-+()/`" spaceChars :: [Char] spaceChars = " \n\t" --- debug function to run the inline parser in GHCi -debugParse :: PandocMonad m => T.Text -> m (Either ParseError B.Inlines) -debugParse t = - runParserT (mconcat <$> many inline) MoinState "srcname" (toSources t) +------------------------------------------------------------------------------ +-- debug functions for use in GHCi + +p1 :: MoinParser PandocPure B.Inlines -> T.Text -> Either ParseError B.Inlines +p1 p' = fromRight (error "unhandled PandocError") + . runPure + . runParserT p' MoinState "?" + . toSources + +pp :: MoinParser PandocPure B.Inlines -> T.Text -> Either ParseError B.Inlines +pp = p1 . fmap mconcat . many From 2e0f21ee33f2a73c0c77a534fe506f8e2336ff11 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 19 Feb 2026 13:43:41 +0000 Subject: [PATCH 37/76] para: stop throwing away leading spaces --- src/Text/Pandoc/Readers/MoinMoin.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 67eb20d58f09..73a3acfc9dae 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -93,7 +93,7 @@ guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1) -- from Readers.Mediawiki para :: PandocMonad m => MoinParser m B.Blocks para = do - contents <- B.trimInlines . mconcat <$> many1 inline + contents <- mconcat <$> many1 inline return $ B.para contents bulletList :: PandocMonad m => MoinParser m B.Blocks From 580fff3a4b2f07f4ad15b1615c7c71579a3474ae Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 19 Feb 2026 13:44:09 +0000 Subject: [PATCH 38/76] generalise types of p1,pp so they can be used with blocks --- src/Text/Pandoc/Readers/MoinMoin.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 73a3acfc9dae..7bf24d39fb09 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -235,11 +235,13 @@ spaceChars = " \n\t" ------------------------------------------------------------------------------ -- debug functions for use in GHCi -p1 :: MoinParser PandocPure B.Inlines -> T.Text -> Either ParseError B.Inlines +p1 :: Monoid a + => MoinParser PandocPure a -> T.Text -> Either ParseError a p1 p' = fromRight (error "unhandled PandocError") . runPure . runParserT p' MoinState "?" . toSources -pp :: MoinParser PandocPure B.Inlines -> T.Text -> Either ParseError B.Inlines +pp :: Monoid a + => MoinParser PandocPure a -> T.Text -> Either ParseError a pp = p1 . fmap mconcat . many From 8c79c97c27d60c058e8406df82f7c6274013f685 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 19 Feb 2026 13:44:52 +0000 Subject: [PATCH 39/76] parse comments --- src/Text/Pandoc/Readers/MoinMoin.hs | 7 +++++++ test/Tests/Readers/MoinMoin.hs | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 7bf24d39fb09..7d0b82f288e0 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -69,10 +69,17 @@ processingInstruction = do manyUntil anyChar newline return () +comment :: PandocMonad m => MoinParser m B.Blocks +comment = do + string "##" + manyUntil anyChar newline + return mempty + block :: PandocMonad m => MoinParser m B.Blocks block = do res <- mempty <$ skipMany1 blankline <|> header + <|> comment <|> bulletList <|> para return res diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 3290cae965ac..cf29c21308a7 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -71,6 +71,10 @@ tests = , testCase "singular2" $ "SinGular``s" `readsTo` [Para [Link ("",[],[]) [Str "SinGular"] ("SinGular",""), Str "s"]] ] + , testGroup "blocks" + [ testCase "comment1" $ "#hi" `readsTo` [] + , testCase "notcomment1" $ " #hi" `readsTo` [Para [Space,Str "#hi"]] + ] ] main :: IO () From b60526d53faba421e4fac681a7c646d7f4cdb1a5 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 19 Feb 2026 13:45:14 +0000 Subject: [PATCH 40/76] note existence of writeNative --- testMoin.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/testMoin.hs b/testMoin.hs index d3425a198034..1f5c5d314864 100644 --- a/testMoin.hs +++ b/testMoin.hs @@ -42,6 +42,7 @@ main = do sampleMM <- TIO.readFile "testmoin.txt" + -- or use writeNative (putStrLn . ppShow . parseMM) sampleMM putStrLn "\n##################################\n" From 8fc9c56c7675df1c82f81eeed88a4dd601f9043c Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 19 Feb 2026 14:38:12 +0000 Subject: [PATCH 41/76] Improve processing instruction comments --- src/Text/Pandoc/Readers/MoinMoin.hs | 2 ++ test/Tests/Readers/MoinMoin.hs | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 7d0b82f288e0..ad78be54c4f3 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -69,6 +69,8 @@ processingInstruction = do manyUntil anyChar newline return () +-- technically a processing instruction but can occur anywhere +-- in a page comment :: PandocMonad m => MoinParser m B.Blocks comment = do string "##" diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index cf29c21308a7..20af7fe1c50c 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -72,8 +72,9 @@ tests = ] , testGroup "blocks" - [ testCase "comment1" $ "#hi" `readsTo` [] - , testCase "notcomment1" $ " #hi" `readsTo` [Para [Space,Str "#hi"]] + [ testCase "comment1" $ "##hi" `readsTo` [] + , testCase "comment2" $ "hello\n##hi\nworld" `readsTo` [Para [Str "hello"], Para [Str "world"]] + , testCase "notcomment1" $ " ##hi" `readsTo` [Para [Space,Str "##hi"]] ] ] From 489da22883d479bebd149fd83dbe1b0be3e33b29 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 19 Feb 2026 14:49:29 +0000 Subject: [PATCH 42/76] Inline (C-style) comments --- src/Text/Pandoc/Readers/MoinMoin.hs | 7 +++++++ test/Tests/Readers/MoinMoin.hs | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index ad78be54c4f3..4558720bfc70 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -129,6 +129,7 @@ inline = whitespace <|> larger <|> stroke <|> externalLink + <|> inlineComment <|> special -- from Readers.Mediawiki @@ -212,6 +213,12 @@ stroke :: PandocMonad m => MoinParser m B.Inlines stroke = enclosed (string "--(") (string ")--") inline >>= return . B.strikeout . mconcat +inlineComment :: PandocMonad m => MoinParser m B.Inlines +inlineComment = do + string "/*" + manyTill anyChar (string "*/") + return mempty + special :: PandocMonad m => MoinParser m B.Inlines special = B.str . T.singleton <$> oneOf specialChars diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 20af7fe1c50c..e5fb3236b8d0 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -59,6 +59,10 @@ tests = , testCase "larger" $ "~+larger+~" `readsTo` [Para [Str "larger"]] , testCase "smaller" $ "~-smaller-~" `readsTo` [Para [Str "smaller"]] + , testCase "inlineComment" $ "hello/*comment*/world" `readsTo` [Para [Str "helloworld"]] + , testCase "inlineCommentNewlines" $ + "hello/*comment\nmore\n*/world" `readsTo` [Para [Str "helloworld"]] + , testGroup "links" [ testCase "CamelCase" $ "FooBar" `readsTo` [Para [Link ("",[],[]) [Str "FooBar"] ("FooBar","")]] , testCase "/SubCase1" $ "/SubCase1" `readsTo` [Para [Link ("",[],[]) [Str "/SubCase1"] ("/SubCase1","")]] From bac4efc7466588dcdbfb91b71f1579063c351c9b Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 19 Feb 2026 14:50:36 +0000 Subject: [PATCH 43/76] test for newlines in basic text, adjust comment2 accordingly both newline and comment2 tests fail. I think this might be provoked by 2e0f21ee ("para: stop throwing away leading spaces"). --- test/Tests/Readers/MoinMoin.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index e5fb3236b8d0..21b8af9b897a 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -32,6 +32,7 @@ readsTo s b = runMM s @?= Pandoc nullMeta b tests :: [TestTree] tests = [ testCase "basic" $ "hi" `readsTo` [Para [Str "hi"]] + , testCase "newline" $ "hi\nthere"`readsTo` [Para [Str "hi",Space,Str "there"]]: , testCase "bold" $ "'''hi'''" `readsTo` [Para [Strong [Str "hi"]]] , testCase "italic" $ "''hi''" `readsTo` [Para [Emph [Str "hi"]]] , testCase "underline" $ "__hi__" `readsTo` [Para [Underline [Str "hi"]]] @@ -77,7 +78,7 @@ tests = , testGroup "blocks" [ testCase "comment1" $ "##hi" `readsTo` [] - , testCase "comment2" $ "hello\n##hi\nworld" `readsTo` [Para [Str "hello"], Para [Str "world"]] + , testCase "comment2" $ "hello\n##hi\nworld" `readsTo` [Para [Str "hello", Str "world"]] , testCase "notcomment1" $ " ##hi" `readsTo` [Para [Space,Str "##hi"]] ] ] From 90a4c252d63dff32bf1cd55535bff1b0e688fa29 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 19 Feb 2026 14:53:23 +0000 Subject: [PATCH 44/76] Revert "para: stop throwing away leading spaces" This reverts commit 2e0f21ee33f2a73c0c77a534fe506f8e2336ff11. --- src/Text/Pandoc/Readers/MoinMoin.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 4558720bfc70..d379a9714755 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -102,7 +102,7 @@ guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1) -- from Readers.Mediawiki para :: PandocMonad m => MoinParser m B.Blocks para = do - contents <- mconcat <$> many1 inline + contents <- B.trimInlines . mconcat <$> many1 inline return $ B.para contents bulletList :: PandocMonad m => MoinParser m B.Blocks From 1cafba12babec9460ae8b86bd0d1c8d3a5a51205 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 19 Feb 2026 15:05:16 +0000 Subject: [PATCH 45/76] (DEV) remove some moin 1.5 stuff we don't care about --- SimonMoin.hs | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/SimonMoin.hs b/SimonMoin.hs index e652b66c76df..66c67d91ab1c 100644 --- a/SimonMoin.hs +++ b/SimonMoin.hs @@ -484,7 +484,6 @@ link :: GenParser Char ParserState Inline link = choice [uriLink ,emailAddressLink ,localPageCamelCaseLink - ,moin15BracketLink ,moin16BracketLink ] @@ -503,17 +502,6 @@ localPageCamelCaseLink = try $ do (p,_) <- localPageCamelCase return $ Link [Str p] (p, "") -moin15BracketLink :: GenParser Char ParserState Inline -moin15BracketLink = try $ do - (target,label) <- singleBracketed $ choice [ - uriSpaceLabel - ,uriNoLabel - ,localPageInQuotes - ,localPageWithColonLabel - ,localPageCamelCase - ] - return $ Link [Str label] (target, "") - moin16BracketLink :: GenParser Char ParserState Inline moin16BracketLink = try $ do (target,label) <- doubleBracketed $ choice [ @@ -525,13 +513,6 @@ moin16BracketLink = try $ do ] return $ Link [Str label] (target, "") -uriSpaceLabel :: GenParser Char ParserState (String,String) -uriSpaceLabel = try $ do - (_, uri_escaped) <- uri - many1 space - label <- many1 $ noneOf "]" - return (uri_escaped, label) - uriPipeLabel :: GenParser Char ParserState (String,String) uriPipeLabel = try $ do (_, uri_escaped) <- uri @@ -588,13 +569,6 @@ lowerChar = oneOf "abcdefghijklmnopqrstuvwxyz" -- (Link lab src) <- link -- return $ Image lab src -singleBracketed :: (GenParser Char st a) -> GenParser Char st a -singleBracketed parser = do - string "[" - contents <- parser - string "]" - return contents - doubleBracketed :: (GenParser Char st a) -> GenParser Char st a doubleBracketed parser = do string "[[" From 0eb4b1a45bcf8aa3dd6a7ea75267c5026f5125ec Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 19 Feb 2026 15:16:04 +0000 Subject: [PATCH 46/76] fix PI comment --- src/Text/Pandoc/Readers/MoinMoin.hs | 4 ++-- test/Tests/Readers/MoinMoin.hs | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index d379a9714755..afcfdda67cee 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -72,7 +72,7 @@ processingInstruction = do -- technically a processing instruction but can occur anywhere -- in a page comment :: PandocMonad m => MoinParser m B.Blocks -comment = do +comment = try $ do string "##" manyUntil anyChar newline return mempty @@ -242,7 +242,7 @@ externalLink = do -- from Readers.Mediawiki specialChars :: [Char] -specialChars = "'[]<=&*{}|\":\\_^,~-+()/`" +specialChars = "'[]<=&*{}|\":\\_^,~-+()/`#" -- from Readers.Mediawiki spaceChars :: [Char] diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 21b8af9b897a..0bd9a278b656 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -78,8 +78,10 @@ tests = , testGroup "blocks" [ testCase "comment1" $ "##hi" `readsTo` [] - , testCase "comment2" $ "hello\n##hi\nworld" `readsTo` [Para [Str "hello", Str "world"]] - , testCase "notcomment1" $ " ##hi" `readsTo` [Para [Space,Str "##hi"]] + -- processing instruction comment lines cause paragraph breaks + , testCase "comment2" $ "hello\n##hi\nworld" `readsTo` [Para [Str "hello"], Para [Str "world"]] + , testCase "notcomment1" $ "\n#hi" `readsTo` [Para [Str "#hi"]] + , testCase "notcomment2" $ ".##hi" `readsTo` [Para [Str ".##hi"]] ] ] From 2d94e58d3d5d9371f1550025e904b66d6ef055b9 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 19 Feb 2026 15:16:14 +0000 Subject: [PATCH 47/76] test newline, to demonstrate faulty newline handling --- test/Tests/Readers/MoinMoin.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 0bd9a278b656..e0559fae60d1 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -32,7 +32,7 @@ readsTo s b = runMM s @?= Pandoc nullMeta b tests :: [TestTree] tests = [ testCase "basic" $ "hi" `readsTo` [Para [Str "hi"]] - , testCase "newline" $ "hi\nthere"`readsTo` [Para [Str "hi",Space,Str "there"]]: + , testCase "newline" $ "hi\nthere"`readsTo` [Para [Str "hi",Space,Str "there"]] , testCase "bold" $ "'''hi'''" `readsTo` [Para [Strong [Str "hi"]]] , testCase "italic" $ "''hi''" `readsTo` [Para [Emph [Str "hi"]]] , testCase "underline" $ "__hi__" `readsTo` [Para [Underline [Str "hi"]]] From 63cc71fd56f21c7ba6dc4c9edc8b52aa6e690d8d Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Fri, 20 Feb 2026 13:26:22 +0000 Subject: [PATCH 48/76] endline (initial): soft-breaks inside paras --- src/Text/Pandoc/Readers/MoinMoin.hs | 9 +++++++++ test/Tests/Readers/MoinMoin.hs | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index afcfdda67cee..ac00e1d7c290 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -130,6 +130,7 @@ inline = whitespace <|> stroke <|> externalLink <|> inlineComment + <|> endline <|> special -- from Readers.Mediawiki @@ -219,6 +220,14 @@ inlineComment = do manyTill anyChar (string "*/") return mempty +-- a newline that does not break a Para (etc) +endline :: PandocMonad m => MoinParser m B.Inlines +endline = try $ do + newline + notFollowedBy blankline + (eof >> return mempty) + <|> (skipMany spaceChar >> return B.softbreak) + special :: PandocMonad m => MoinParser m B.Inlines special = B.str . T.singleton <$> oneOf specialChars diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index e0559fae60d1..3f679db55a47 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -32,7 +32,7 @@ readsTo s b = runMM s @?= Pandoc nullMeta b tests :: [TestTree] tests = [ testCase "basic" $ "hi" `readsTo` [Para [Str "hi"]] - , testCase "newline" $ "hi\nthere"`readsTo` [Para [Str "hi",Space,Str "there"]] + , testCase "endline" $ "hi\nthere"`readsTo` [Para [Str "hi",SoftBreak,Str "there"]] , testCase "bold" $ "'''hi'''" `readsTo` [Para [Strong [Str "hi"]]] , testCase "italic" $ "''hi''" `readsTo` [Para [Emph [Str "hi"]]] , testCase "underline" $ "__hi__" `readsTo` [Para [Underline [Str "hi"]]] @@ -79,7 +79,7 @@ tests = , testGroup "blocks" [ testCase "comment1" $ "##hi" `readsTo` [] -- processing instruction comment lines cause paragraph breaks - , testCase "comment2" $ "hello\n##hi\nworld" `readsTo` [Para [Str "hello"], Para [Str "world"]] + , testCase "comment2" $ "hello\n##hi\nworld" `readsTo` [Para [Str "hello",SoftBreak,Str "world"]] , testCase "notcomment1" $ "\n#hi" `readsTo` [Para [Str "#hi"]] , testCase "notcomment2" $ ".##hi" `readsTo` [Para [Str ".##hi"]] ] From 535cd5b1197380fd888f8a5ac2b3b09f77e5635c Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Fri, 20 Feb 2026 13:32:31 +0000 Subject: [PATCH 49/76] Add Simon Michael and John MacFarlane copyrights I've now borrowed enough from the Mediawiki and Markdown readers, as well as cribbing from Simon's earlier MoinMoin plugin. --- src/Text/Pandoc/Readers/MoinMoin.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index ac00e1d7c290..a7317269a9a0 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -3,7 +3,7 @@ -- ^^ needed temp for enclosed' {- | Module : Text.Pandoc.Readers.MoinMoin - Copyright : Copyright © 2026 Jonathan Dowland + Copyright : Copyright © 2026 Jonathan Dowland, © 2009-2011 Simon Michael, © 22006-2024 John MacFarlane License : GNU GPL, version 2 or above Maintainer : Jonathan Dowland From 89d904d4a138b25638b1a1bdecad8a387de05089 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Fri, 20 Feb 2026 14:50:59 +0000 Subject: [PATCH 50/76] fix handling of endline and PI comments --- src/Text/Pandoc/Readers/MoinMoin.hs | 1 + test/Tests/Readers/MoinMoin.hs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index a7317269a9a0..599209df7011 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -225,6 +225,7 @@ endline :: PandocMonad m => MoinParser m B.Inlines endline = try $ do newline notFollowedBy blankline + notFollowedBy (string "##") (eof >> return mempty) <|> (skipMany spaceChar >> return B.softbreak) diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 3f679db55a47..b8d9024f5603 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -79,7 +79,7 @@ tests = , testGroup "blocks" [ testCase "comment1" $ "##hi" `readsTo` [] -- processing instruction comment lines cause paragraph breaks - , testCase "comment2" $ "hello\n##hi\nworld" `readsTo` [Para [Str "hello",SoftBreak,Str "world"]] + , testCase "comment2" $ "hello\n##hi\nworld" `readsTo` [Para [Str "hello"],Para [Str "world"]] , testCase "notcomment1" $ "\n#hi" `readsTo` [Para [Str "#hi"]] , testCase "notcomment2" $ ".##hi" `readsTo` [Para [Str ".##hi"]] ] From 13c7b4d747be1c1cb5c409e98978fa5ffab12068 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Fri, 20 Feb 2026 14:54:41 +0000 Subject: [PATCH 51/76] tests for empty delimiters --- test/Tests/Readers/MoinMoin.hs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index b8d9024f5603..aaa7029e5775 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -83,6 +83,11 @@ tests = , testCase "notcomment1" $ "\n#hi" `readsTo` [Para [Str "#hi"]] , testCase "notcomment2" $ ".##hi" `readsTo` [Para [Str ".##hi"]] ] + , testGroup "emptyDelims" + [ testCase "empty bold" $ "''''''" `readsTo` [Para []] + , testCase "empty italic" $ "''''" `readsTo` [Para []] + , testCase "empty code" $ "``" `readsTo` [Para []] + ] ] main :: IO () From cfca05486bab91a47c7103fa8302694e3696d4a3 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sat, 21 Feb 2026 11:51:21 +0000 Subject: [PATCH 52/76] Initial support for <> --- src/Text/Pandoc/Readers/MoinMoin.hs | 20 ++++++++++++++++---- test/Tests/Readers/MoinMoin.hs | 12 ++++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 599209df7011..6d3734c3ac3f 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -31,12 +31,14 @@ import Data.Either (fromRight) readMoinMoin :: (PandocMonad m, ToSources a) => ReaderOptions -> a -> m Pandoc readMoinMoin opts s = do let sources = toSources s - parsed <- readWithM parseMoinMoin MoinState sources + parsed <- readWithM parseMoinMoin defaultMoinState sources case parsed of Left err -> throwError err Right res -> return res -data MoinState = MoinState -- context that needs to be passed around the parser +data MoinState = MoinState { mmMeta :: Meta } deriving Show +defaultMoinState = MoinState nullMeta + type MoinParser m = ParsecT Sources MoinState m parseMoinMoin :: PandocMonad m => MoinParser m Pandoc @@ -46,7 +48,8 @@ parseMoinMoin = do spaces eof - let meta = nullMeta + st <- getState + let meta = mmMeta st -- reportLogMessages -- could not deduce 'HasLogMessages MoinState' return $ Pandoc meta (B.toList blocks) @@ -131,6 +134,7 @@ inline = whitespace <|> externalLink <|> inlineComment <|> endline + <|> tableOfContents <|> special -- from Readers.Mediawiki @@ -229,6 +233,14 @@ endline = try $ do (eof >> return mempty) <|> (skipMany spaceChar >> return B.softbreak) +-- MoinMoin behaviour: insert the TOC at the point of the token. +-- What we're doing here is not (yet) that. +tableOfContents :: PandocMonad m => MoinParser m B.Inlines +tableOfContents = try $ do + string "<>" + updateState $ \st -> st { mmMeta = B.setMeta "toc" True (mmMeta st) } + return mempty + special :: PandocMonad m => MoinParser m B.Inlines special = B.str . T.singleton <$> oneOf specialChars @@ -265,7 +277,7 @@ p1 :: Monoid a => MoinParser PandocPure a -> T.Text -> Either ParseError a p1 p' = fromRight (error "unhandled PandocError") . runPure - . runParserT p' MoinState "?" + . runParserT p' defaultMoinState "?" . toSources pp :: Monoid a diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index aaa7029e5775..f52ec26c47f4 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -17,6 +17,7 @@ import Text.Pandoc.Sources -- toSources import Test.Tasty import Test.Tasty.HUnit import qualified Data.Text as T +import qualified Data.Map.Strict as M import Data.Either (fromRight) -- @?= imposes Eq for which PandocError doesn't have an instance @@ -29,6 +30,11 @@ runMM = fromRight nullDoc . runPure . readMoinMoin def . toSources . T.pack readsTo :: String -> [Block] -> Assertion readsTo s b = runMM s @?= Pandoc nullMeta b +hasMeta :: String -> Meta -> Assertion +hasMeta s cmp = let + (Pandoc meta blocks) = runMM s + in meta @?= cmp + tests :: [TestTree] tests = [ testCase "basic" $ "hi" `readsTo` [Para [Str "hi"]] @@ -52,6 +58,12 @@ tests = , testCase "heading 5" $ "===== 5 =====" `readsTo` [Header 5 ("",[],[]) [Str "5"]] , testCase "no heading 6" $ "====== 6 ======" `readsTo` [Para [Str "======",Space,Str "6",Space,Str "======"]] + , testGroup "toc" $ + [ testCase "tocPresent" $ "<>" `hasMeta` + (Meta (M.singleton "toc" (MetaBool True))) + --[("toc",MetaBool True)] + ] + , testCase "superscript" $ "^2^" `readsTo` [Para [Superscript [Str "2"]]] , testCase "subscript" $ ",,low,," `readsTo` [Para [Subscript [Str "low"]]] From e41b140f0daf032cb4f4c65edb5707fd98a89935 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sat, 21 Feb 2026 12:11:03 +0000 Subject: [PATCH 53/76] TableOfContents: handle optional level --- src/Text/Pandoc/Readers/MoinMoin.hs | 11 ++++++++++- test/Tests/Readers/MoinMoin.hs | 10 +++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 6d3734c3ac3f..c09d43908116 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -94,6 +94,8 @@ header :: PandocMonad m => MoinParser m B.Blocks header = try $ do guardColumnOne lev <- length <$> many1 (char '=') + -- XXX: MoinMoin flattens higher levels to 5, so + -- ====== 6 ====== =>
guard $ lev <= 5 contents <- B.trimInlines . mconcat <$> manyTill inline (count lev $ char '=') return $ B.header lev contents @@ -237,7 +239,14 @@ endline = try $ do -- What we're doing here is not (yet) that. tableOfContents :: PandocMonad m => MoinParser m B.Inlines tableOfContents = try $ do - string "<>" + string "<5 + lvl <- optionMaybe (oneOf "12345") + case lvl of + Nothing -> return () + Just l -> updateState $ \st -> st + { mmMeta = B.setMeta "toclevel" [l] (mmMeta st) } + string ")>>" updateState $ \st -> st { mmMeta = B.setMeta "toc" True (mmMeta st) } return mempty diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index f52ec26c47f4..6e7559896965 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -30,10 +30,10 @@ runMM = fromRight nullDoc . runPure . readMoinMoin def . toSources . T.pack readsTo :: String -> [Block] -> Assertion readsTo s b = runMM s @?= Pandoc nullMeta b -hasMeta :: String -> Meta -> Assertion +hasMeta :: String -> [(T.Text,MetaValue)] -> Assertion hasMeta s cmp = let (Pandoc meta blocks) = runMM s - in meta @?= cmp + in meta @?= (Meta $ M.fromList cmp) tests :: [TestTree] tests = @@ -59,9 +59,9 @@ tests = , testCase "no heading 6" $ "====== 6 ======" `readsTo` [Para [Str "======",Space,Str "6",Space,Str "======"]] , testGroup "toc" $ - [ testCase "tocPresent" $ "<>" `hasMeta` - (Meta (M.singleton "toc" (MetaBool True))) - --[("toc",MetaBool True)] + [ testCase "tocPresent" $ "<>" `hasMeta` [("toc",MetaBool True)] + , testCase "tocAbsent" $ "<>" `hasMeta` [] + , testCase "tocLevel1" $ "<>" `hasMeta` [("toc",MetaBool True),("toclevel",MetaString "1")] ] , testCase "superscript" $ "^2^" `readsTo` [Para [Superscript [Str "2"]]] From 2e199513149d4bb1da67d1a66c5722d75250386f Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sat, 21 Feb 2026 12:12:14 +0000 Subject: [PATCH 54/76] (DEV) test file tweaks --- testMoin.hs | 4 ++-- testmoin.txt | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/testMoin.hs b/testMoin.hs index 1f5c5d314864..8c222615eed2 100644 --- a/testMoin.hs +++ b/testMoin.hs @@ -14,7 +14,7 @@ import Text.Show.Pretty sampleMW = T.pack "\ \= sample mediawiki doc =\n\ -\\n\ +\__TOC__\n\ \This is a ''sample'' CamelCase document.\n\ \[https://jmtd.net jon's homepage]\n\ \\n\ @@ -32,7 +32,7 @@ parseMM = fromRight (error "?") . runPure . readMoinMoin def main = do -- what structure do we get from a Mediawiki doc? - -- (putStrLn . show) parsedMW + -- (putStrLn . ppShow) parsedMW -- putStrLn "##################################" -- what happens to definition list in markdown output? diff --git a/testmoin.txt b/testmoin.txt index 8e9b3097edc7..787fd70cad46 100644 --- a/testmoin.txt +++ b/testmoin.txt @@ -4,6 +4,8 @@ == Jon Dowland == +<> + * [[http://jmtd.net|jmtd.net]] * another bullet From 92a8d2337cb1190e5c0aa0ca25cc4e4ecf4cd966 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sat, 21 Feb 2026 12:19:49 +0000 Subject: [PATCH 55/76] fix header-level flattening >5 -> 5 --- src/Text/Pandoc/Readers/MoinMoin.hs | 5 +---- test/Tests/Readers/MoinMoin.hs | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index c09d43908116..8276eea53eb0 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -94,11 +94,8 @@ header :: PandocMonad m => MoinParser m B.Blocks header = try $ do guardColumnOne lev <- length <$> many1 (char '=') - -- XXX: MoinMoin flattens higher levels to 5, so - -- ====== 6 ====== =>
- guard $ lev <= 5 contents <- B.trimInlines . mconcat <$> manyTill inline (count lev $ char '=') - return $ B.header lev contents + return $ B.header (min 5 lev) contents -- from Readers.Mediawiki guardColumnOne :: PandocMonad m => MoinParser m () diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 6e7559896965..14bcd8f9fd7e 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -56,7 +56,7 @@ tests = , testCase "heading 3" $ "=== 3 ===" `readsTo` [Header 3 ("",[],[]) [Str "3"]] , testCase "heading 4" $ "==== 4 ====" `readsTo` [Header 4 ("",[],[]) [Str "4"]] , testCase "heading 5" $ "===== 5 =====" `readsTo` [Header 5 ("",[],[]) [Str "5"]] - , testCase "no heading 6" $ "====== 6 ======" `readsTo` [Para [Str "======",Space,Str "6",Space,Str "======"]] + , testCase "no heading 6" $ "====== 6 ======" `readsTo` [Header 5 ("",[],[]) [Str "6"]] , testGroup "toc" $ [ testCase "tocPresent" $ "<>" `hasMeta` [("toc",MetaBool True)] From 3481b27f0d287c3629dc302f872fb8b21f0d35f6 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sat, 21 Feb 2026 15:43:42 +0000 Subject: [PATCH 56/76] remove some done TODOs --- SimonMoin.hs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/SimonMoin.hs b/SimonMoin.hs index 66c67d91ab1c..0f49263a5fab 100644 --- a/SimonMoin.hs +++ b/SimonMoin.hs @@ -29,22 +29,15 @@ Partial conversion from MoinMoin-formatted text (plus some pandoc-isms like smart punctuation) to Pandoc. Based on the Markdown reader. TODO: -[ ] table of contents: <> or <> [ ] moin 1.6 double bracket links: - [ ] [[#anchorname]], - [ ] [[#anchorname|description]], - [ ] [[PageName#anchorname]], - [ ] [[PageName#anchorname|description]], [ ] [[attachment:filename.txt]] [ ] {{http://static.moinmo.in/logos/moinmoin.png}} should be an image -[X] camel-case links assume ascii letters [ ] indented blockquotes [ ] definition lists [ ] nested/multiply-indented lists, blocks, code blocks [ ] tables [ ] images [ ] <> inserts a link anchor -[ ] <
> a hard break [ ] <> a note [ ] <> - just ignore this [ ] <> treat as email link; pandoc has obfuscation From 0c2382b16739993233e6313b29c591d270b26bae Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sat, 21 Feb 2026 15:47:42 +0000 Subject: [PATCH 57/76] anchor tests --- test/Tests/Readers/MoinMoin.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 14bcd8f9fd7e..6bb27bfa742a 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -86,6 +86,10 @@ tests = , testCase "notalink" $ "Not''''''Link" `readsTo` [Para [Str "NotLink"]] , testCase "singular1" $ "SinGular''''''s" `readsTo` [Para [Link ("",[],[]) [Str "SinGular"] ("SinGular",""), Str "s"]] , testCase "singular2" $ "SinGular``s" `readsTo` [Para [Link ("",[],[]) [Str "SinGular"] ("SinGular",""), Str "s"]] + , testCase "anchor1" $ "[[#foo]]" `readsTo` [Para [Link ("",[],[]) [Str "#foo"] ("#foo","")]] + , testCase "anchor2" $ "[[#foo|bar]]"`readsTo` [Para [Link ("",[],[]) [Str "bar"] ("#foo","")]] + , testCase "anchor3" $ "[[foo#bar]]"`readsTo` [Para [Link ("",[],[]) [Str "foo#bar"] ("foo#bar","")]] + , testCase "anchor4" $ "[[foo#bar|baz]]"`readsTo` [Para [Link ("",[],[]) [Str "baz"] ("foo#bar","")]] ] , testGroup "blocks" From 59d0af372fe592f5d4c8ffc23d9db5584e81dace Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sat, 21 Feb 2026 15:47:49 +0000 Subject: [PATCH 58/76] lineBreak --- src/Text/Pandoc/Readers/MoinMoin.hs | 4 ++++ test/Tests/Readers/MoinMoin.hs | 1 + 2 files changed, 5 insertions(+) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 8276eea53eb0..de6b7b0e2d1e 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -134,6 +134,7 @@ inline = whitespace <|> inlineComment <|> endline <|> tableOfContents + <|> lineBreak <|> special -- from Readers.Mediawiki @@ -247,6 +248,9 @@ tableOfContents = try $ do updateState $ \st -> st { mmMeta = B.setMeta "toc" True (mmMeta st) } return mempty +lineBreak :: PandocMonad m => MoinParser m B.Inlines +lineBreak = string "<
>" >> return B.linebreak + special :: PandocMonad m => MoinParser m B.Inlines special = B.str . T.singleton <$> oneOf specialChars diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 6bb27bfa742a..b374f44bd752 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -64,6 +64,7 @@ tests = , testCase "tocLevel1" $ "<>" `hasMeta` [("toc",MetaBool True),("toclevel",MetaString "1")] ] + , testCase "LineBreak" $ "<
>" `readsTo` [Para [LineBreak]] , testCase "superscript" $ "^2^" `readsTo` [Para [Superscript [Str "2"]]] , testCase "subscript" $ ",,low,," `readsTo` [Para [Subscript [Str "low"]]] From 57ddff402fdb77a94ead3e57a95a390d397c1743 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sat, 21 Feb 2026 16:02:58 +0000 Subject: [PATCH 59/76] anchor done --- SimonMoin.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/SimonMoin.hs b/SimonMoin.hs index 0f49263a5fab..e09f5aa1c2c0 100644 --- a/SimonMoin.hs +++ b/SimonMoin.hs @@ -37,7 +37,6 @@ TODO: [ ] nested/multiply-indented lists, blocks, code blocks [ ] tables [ ] images -[ ] <> inserts a link anchor [ ] <> a note [ ] <> - just ignore this [ ] <> treat as email link; pandoc has obfuscation From 9349ad6dbba8ff7b6f86f42fe6d37edbc978f215 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sat, 21 Feb 2026 16:03:07 +0000 Subject: [PATCH 60/76] anchor --- src/Text/Pandoc/Readers/MoinMoin.hs | 9 ++++++++- test/Tests/Readers/MoinMoin.hs | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index de6b7b0e2d1e..80a35f16a8bc 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -135,6 +135,7 @@ inline = whitespace <|> endline <|> tableOfContents <|> lineBreak + <|> anchor <|> special -- from Readers.Mediawiki @@ -249,7 +250,13 @@ tableOfContents = try $ do return mempty lineBreak :: PandocMonad m => MoinParser m B.Inlines -lineBreak = string "<
>" >> return B.linebreak +lineBreak = try $ string "<
>" >> return B.linebreak + +anchor :: PandocMonad m => MoinParser m B.Inlines +anchor = try $ do + string "<>") + return $ B.spanWith (T.pack name,[],[]) (B.fromList []) special :: PandocMonad m => MoinParser m B.Inlines special = B.str . T.singleton <$> oneOf specialChars diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index b374f44bd752..cd887d04ea7c 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -65,6 +65,8 @@ tests = ] , testCase "LineBreak" $ "<
>" `readsTo` [Para [LineBreak]] + , testCase "anchor" $ "<>" `readsTo` [Para [Span ("foo",[],[]) []]] + , testCase "superscript" $ "^2^" `readsTo` [Para [Superscript [Str "2"]]] , testCase "subscript" $ ",,low,," `readsTo` [Para [Subscript [Str "low"]]] From 7f84ece0746e9ec235032326edcf099d5c57bc5c Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sun, 22 Feb 2026 14:37:00 +0000 Subject: [PATCH 61/76] inline {{{code}}} (not the multiline parser) --- src/Text/Pandoc/Readers/MoinMoin.hs | 8 ++++++++ test/Tests/Readers/MoinMoin.hs | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 80a35f16a8bc..96fa4e28227b 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -136,6 +136,7 @@ inline = whitespace <|> tableOfContents <|> lineBreak <|> anchor + <|> code <|> special -- from Readers.Mediawiki @@ -258,6 +259,13 @@ anchor = try $ do name <- manyTill anyChar (try $ string ")>>") return $ B.spanWith (T.pack name,[],[]) (B.fromList []) +-- NOTE: not to be confused with 'parser' (block) +code :: PandocMonad m => MoinParser m B.Inlines +code = try $ do + string "{{{" + pre <- manyTillChar (noneOf "\n") (try $ string "}}}") + return $ B.code pre + special :: PandocMonad m => MoinParser m B.Inlines special = B.str . T.singleton <$> oneOf specialChars diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index cd887d04ea7c..78df9a11168d 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -42,6 +42,11 @@ tests = , testCase "bold" $ "'''hi'''" `readsTo` [Para [Strong [Str "hi"]]] , testCase "italic" $ "''hi''" `readsTo` [Para [Emph [Str "hi"]]] , testCase "underline" $ "__hi__" `readsTo` [Para [Underline [Str "hi"]]] + , testCase "monospace" $ "`hi`" `readsTo` [Para [Code nullAttr "hi"]] + + , testCase "code1" $ "{{{hi}}}" `readsTo` [Para [Code nullAttr "hi"]] + , testCase "code2" $ "{{{ hi }}}" `readsTo` [Para [Code nullAttr " hi "]] + , testCase "code3" $ "{{{''hi''}}}" `readsTo` [Para [Code nullAttr "''hi''"]] -- broken , testCase "italic and bold" $ From 9136624a7852b8d8d10c511f6e09c0ee0b7c025b Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Mon, 23 Feb 2026 21:05:01 +0000 Subject: [PATCH 62/76] Initial Parser block-handler deals with a parser section with no specified hashbang, and no custom delimiters. --- src/Text/Pandoc/Readers/MoinMoin.hs | 19 +++++++++++++++++++ test/Tests/Readers/MoinMoin.hs | 6 ++++++ 2 files changed, 25 insertions(+) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 96fa4e28227b..8b97ca760f64 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -86,6 +86,7 @@ block = do <|> header <|> comment <|> bulletList + <|> parser <|> para return res @@ -117,6 +118,24 @@ bulletListItem = try $ do spaces B.plain . B.trimInlines . mconcat <$> manyTill inline newline +-- block-level MoinMoin 'Parser'. +-- Not to be confused with 'code' (inline) +parser :: PandocMonad m => MoinParser m B.Blocks +parser = try $ do + notFollowedBy code -- if the inline 'code' parser could handle this, then it should. + string "{{{" -- TODO: >3 delimiters are accepted, as are {{{unique ... unique}}} + -- TODO: optional #!, name-of-parser, many1 spaceNotNL, parser-args, many spaceNotNL, newline + many spaceNotNL + char '\n' + pre <- manyTillChar anyChar closer + return $ B.codeBlock pre + where + spaceNotNL = satisfy (\c -> isSpace c && not (c == '\n')) + closer = try $ do + char '\n' + many spaceNotNL + string "}}}" + inline :: PandocMonad m => MoinParser m B.Inlines inline = whitespace <|> camelCaseLink diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 78df9a11168d..901740606739 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -106,6 +106,12 @@ tests = , testCase "comment2" $ "hello\n##hi\nworld" `readsTo` [Para [Str "hello"],Para [Str "world"]] , testCase "notcomment1" $ "\n#hi" `readsTo` [Para [Str "#hi"]] , testCase "notcomment2" $ ".##hi" `readsTo` [Para [Str ".##hi"]] + , testGroup "parser" + [ testCase "parser1" $ "{{{\nhi\n}}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parser2" $ "{{{ \nhi\n}}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parser3" $ "{{{\nhi\n }}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parser4" $ "{{{\nhi\nthere\n}}}" `readsTo` [CodeBlock nullAttr "hi\nthere"] + ] ] , testGroup "emptyDelims" [ testCase "empty bold" $ "''''''" `readsTo` [Para []] From cb97b5a6236eeb012938a15bc94db5f565e8339a Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Wed, 25 Feb 2026 21:15:53 +0000 Subject: [PATCH 63/76] parser delim: >3 braces and embedded brace strings --- src/Text/Pandoc/Readers/MoinMoin.hs | 14 ++++++++++---- test/Tests/Readers/MoinMoin.hs | 10 ++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 8b97ca760f64..ae5305f16ec2 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -123,18 +123,24 @@ bulletListItem = try $ do parser :: PandocMonad m => MoinParser m B.Blocks parser = try $ do notFollowedBy code -- if the inline 'code' parser could handle this, then it should. - string "{{{" -- TODO: >3 delimiters are accepted, as are {{{unique ... unique}}} + + open <- many (char '{') -- TODO: delims can have pre/suffixes e.g. {{{unique ... unique}}} + let len = length open + guard (len >= 3) + -- TODO: optional #!, name-of-parser, many1 spaceNotNL, parser-args, many spaceNotNL, newline many spaceNotNL char '\n' - pre <- manyTillChar anyChar closer + pre <- manyTillChar anyChar (closer len) return $ B.codeBlock pre + where spaceNotNL = satisfy (\c -> isSpace c && not (c == '\n')) - closer = try $ do + closer n = try $ do char '\n' many spaceNotNL - string "}}}" + delim <- many (char '}') + guard (length delim == n) inline :: PandocMonad m => MoinParser m B.Inlines inline = whitespace diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 901740606739..d8d16dbf64b6 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -107,10 +107,12 @@ tests = , testCase "notcomment1" $ "\n#hi" `readsTo` [Para [Str "#hi"]] , testCase "notcomment2" $ ".##hi" `readsTo` [Para [Str ".##hi"]] , testGroup "parser" - [ testCase "parser1" $ "{{{\nhi\n}}}" `readsTo` [CodeBlock nullAttr "hi"] - , testCase "parser2" $ "{{{ \nhi\n}}}" `readsTo` [CodeBlock nullAttr "hi"] - , testCase "parser3" $ "{{{\nhi\n }}}" `readsTo` [CodeBlock nullAttr "hi"] - , testCase "parser4" $ "{{{\nhi\nthere\n}}}" `readsTo` [CodeBlock nullAttr "hi\nthere"] + [ testCase "parser1" $ "{{{\nhi\n}}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parserChompHead" $ "{{{ \nhi\n}}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parserChomptail" $ "{{{\nhi\n }}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parserMultiLine" $ "{{{\nhi\nthere\n}}}" `readsTo` [CodeBlock nullAttr "hi\nthere"] + , testCase "parser4delims" $ "{{{{\nhi\n}}}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parserInnerDelim"$ "{{{{\n{{{hi}}}\n}}}}" `readsTo` [CodeBlock nullAttr "{{{hi}}}"] ] ] , testGroup "emptyDelims" From 2c4583071253ab1d5b937c85675b5033f9264ffa Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Wed, 25 Feb 2026 21:22:30 +0000 Subject: [PATCH 64/76] implement custom parser delimeters --- src/Text/Pandoc/Readers/MoinMoin.hs | 16 ++++++++-------- test/Tests/Readers/MoinMoin.hs | 13 +++++++------ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index ae5305f16ec2..085858dcd06b 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -122,25 +122,25 @@ bulletListItem = try $ do -- Not to be confused with 'code' (inline) parser :: PandocMonad m => MoinParser m B.Blocks parser = try $ do - notFollowedBy code -- if the inline 'code' parser could handle this, then it should. - - open <- many (char '{') -- TODO: delims can have pre/suffixes e.g. {{{unique ... unique}}} + open <- many (char '{') + open2 <- many (oneOf ('_':['a'..'z']++['A'..'Z']++['0'..'9'])) let len = length open guard (len >= 3) -- TODO: optional #!, name-of-parser, many1 spaceNotNL, parser-args, many spaceNotNL, newline many spaceNotNL char '\n' - pre <- manyTillChar anyChar (closer len) + + let delim = open2 ++ (take len (repeat '}')) + pre <- manyTillChar anyChar (closer delim) return $ B.codeBlock pre - where + where -- could use Text.Pandoc.Parsing.spaceChar? spaceNotNL = satisfy (\c -> isSpace c && not (c == '\n')) - closer n = try $ do + closer delim = try $ do char '\n' many spaceNotNL - delim <- many (char '}') - guard (length delim == n) + string delim inline :: PandocMonad m => MoinParser m B.Inlines inline = whitespace diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index d8d16dbf64b6..f981db12f357 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -107,12 +107,13 @@ tests = , testCase "notcomment1" $ "\n#hi" `readsTo` [Para [Str "#hi"]] , testCase "notcomment2" $ ".##hi" `readsTo` [Para [Str ".##hi"]] , testGroup "parser" - [ testCase "parser1" $ "{{{\nhi\n}}}" `readsTo` [CodeBlock nullAttr "hi"] - , testCase "parserChompHead" $ "{{{ \nhi\n}}}" `readsTo` [CodeBlock nullAttr "hi"] - , testCase "parserChomptail" $ "{{{\nhi\n }}}" `readsTo` [CodeBlock nullAttr "hi"] - , testCase "parserMultiLine" $ "{{{\nhi\nthere\n}}}" `readsTo` [CodeBlock nullAttr "hi\nthere"] - , testCase "parser4delims" $ "{{{{\nhi\n}}}}" `readsTo` [CodeBlock nullAttr "hi"] - , testCase "parserInnerDelim"$ "{{{{\n{{{hi}}}\n}}}}" `readsTo` [CodeBlock nullAttr "{{{hi}}}"] + [ testCase "parser1" $ "{{{\nhi\n}}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parserChompHead" $ "{{{ \nhi\n}}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parserChomptail" $ "{{{\nhi\n }}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parserMultiLine" $ "{{{\nhi\nthere\n}}}" `readsTo` [CodeBlock nullAttr "hi\nthere"] + , testCase "parser4delims" $ "{{{{\nhi\n}}}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parserInnerDelim" $ "{{{{\n{{{hi}}}\n}}}}" `readsTo` [CodeBlock nullAttr "{{{hi}}}"] + , testCase "parserCustomDelim"$"{{{badidea\nhi\nbadidea}}}" `readsTo` [CodeBlock nullAttr "hi"] ] ] , testGroup "emptyDelims" From 1d076562daac4f09a9d829b539ae990f9a6ce412 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 26 Feb 2026 19:54:27 +0000 Subject: [PATCH 65/76] bare email and uri detection (from Simon's work) --- src/Text/Pandoc/Readers/MoinMoin.hs | 12 ++++++++++++ test/Tests/Readers/MoinMoin.hs | 2 ++ 2 files changed, 14 insertions(+) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 085858dcd06b..1e0a63cd265b 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -145,6 +145,8 @@ parser = try $ do inline :: PandocMonad m => MoinParser m B.Inlines inline = whitespace <|> camelCaseLink + <|> emailAddressLink + <|> uriLink <|> str <|> bold <|> monospace @@ -191,6 +193,16 @@ camelCaseLink = try $ do let label = B.str tsrc return $ B.link tsrc title label +emailAddressLink :: PandocMonad m => MoinParser m B.Inlines +emailAddressLink = try $ do + (e, escaped_mailto_uri) <- emailAddress + return $ B.link escaped_mailto_uri "" $ B.str e + +uriLink :: PandocMonad m => MoinParser m B.Inlines +uriLink = try $ do + (u, uri_escaped) <- uri + return $ B.link u "" $ B.str uri_escaped + -- from Readers.Mediawiki str :: PandocMonad m => MoinParser m B.Inlines str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars) diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index f981db12f357..2cf3cc356d2c 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -98,6 +98,8 @@ tests = , testCase "anchor2" $ "[[#foo|bar]]"`readsTo` [Para [Link ("",[],[]) [Str "bar"] ("#foo","")]] , testCase "anchor3" $ "[[foo#bar]]"`readsTo` [Para [Link ("",[],[]) [Str "foo#bar"] ("foo#bar","")]] , testCase "anchor4" $ "[[foo#bar|baz]]"`readsTo` [Para [Link ("",[],[]) [Str "baz"] ("foo#bar","")]] + , testCase "bareUri" $ "http://example.com" `readsTo` [Para [Link nullAttr [Str "http://example.com"] ("http://example.com","")]] + , testCase "bareEmail" $ "jon@example.com" `readsTo` [Para [Link nullAttr [Str "jon@example.com"] ("mailto:jon@example.com","")]] ] , testGroup "blocks" From f9b1cc1872779e3975ee46615c42b1f12edc4e22 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 26 Feb 2026 19:58:15 +0000 Subject: [PATCH 66/76] (DEV) rm some stuff taken or not needed --- SimonMoin.hs | 64 +--------------------------------------------------- 1 file changed, 1 insertion(+), 63 deletions(-) diff --git a/SimonMoin.hs b/SimonMoin.hs index e09f5aa1c2c0..a80ac2098df5 100644 --- a/SimonMoin.hs +++ b/SimonMoin.hs @@ -102,24 +102,6 @@ specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221" -- -- document structure -- - -parseMoinMoin :: GenParser Char ParserState Pandoc -parseMoinMoin = do - processingInstructions - blocks <- parseBlocks - return $ Pandoc (Meta [] [] [] {-title author date-}) $ filter (/= Null) blocks - -processingInstructions :: GenParser Char a () -processingInstructions = many (char '#' >> manyTill anyChar newline) >> return () - -comment :: GenParser Char a () -comment = try $ do - pos <- getPosition - when (sourceColumn pos /= 0) $ fail "" - string "##" - manyTill anyChar newline - return () - -- -- parsing blocks -- @@ -141,19 +123,7 @@ block = do , nullBlock ]) "block" --- --- header blocks --- - -header :: GenParser Char ParserState Block -header = try $ do - level <- many1 (char '=') >>= return . length - skipSpaces - text <- manyTill inline headerEnd >>= return . normalizeSpaces - return (Header level text) "header" -headerEnd :: GenParser Char st [Char] -headerEnd = try $ skipSpaces >> skipMany (char '=') >> blanklines -- -- hrule block @@ -371,27 +341,6 @@ code = try $ do notFollowedBy (char '`'))) return $ Code nullAttr $ removeLeadingTrailingSpace $ concat result -emph :: GenParser Char ParserState Inline -emph = (enclosed (string "''") (string "''") inline) >>= return . Emph . normalizeSpaces - -strong :: GenParser Char ParserState Inline -strong = enclosed (string "'''") (string "'''") inline >>= return . Strong . normalizeSpaces - - -strikeout :: GenParser Char ParserState Inline -strikeout = failIfStrict >> enclosed (string "--(") (try $ string ")--") inline >>= - return . Strikeout . normalizeSpaces - -superscript :: GenParser Char ParserState Inline -superscript = failIfStrict >> enclosed (char '^') (char '^') - (notFollowedBy' whitespace >> inline) >>= -- may not contain Space - return . Superscript - -subscript :: GenParser Char ParserState Inline -subscript = failIfStrict >> enclosed (string ",,") (string ",,") - (notFollowedBy' whitespace >> inline) >>= -- may not contain Space - return . Subscript - whitespace :: GenParser Char ParserState Inline whitespace = do sps <- many1 (oneOf spaceChars) @@ -473,22 +422,11 @@ endline = try $ do -- return $ decodeCharacterReferences tit link :: GenParser Char ParserState Inline -link = choice [uriLink - ,emailAddressLink +link = choice [ ,localPageCamelCaseLink ,moin16BracketLink ] -uriLink :: GenParser Char ParserState Inline -uriLink = try $ do - (u, uri_escaped) <- uri - return $ Link [Code nullAttr u] (uri_escaped, "") - -emailAddressLink :: GenParser Char ParserState Inline -emailAddressLink = try $ do - (e, escaped_mailto_uri) <- emailAddress - return $ Link [Str e] (escaped_mailto_uri, "") - localPageCamelCaseLink :: GenParser Char ParserState Inline localPageCamelCaseLink = try $ do (p,_) <- localPageCamelCase From 1ab92b46e744b93d5fd145b270a78d1aa43582f9 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Thu, 26 Feb 2026 20:02:53 +0000 Subject: [PATCH 67/76] fix monospace inline not spanning lines --- src/Text/Pandoc/Readers/MoinMoin.hs | 2 +- test/Tests/Readers/MoinMoin.hs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 1e0a63cd265b..67ca6da69148 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -233,7 +233,7 @@ bold = try $ do monospace :: PandocMonad m => MoinParser m B.Inlines monospace = try $ do char '`' - inner <- manyTill anyChar (char '`') + inner <- manyTill (noneOf "`\n") (char '`') if null inner then return (B.fromList []) else (return . B.code . T.pack) inner diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs index 2cf3cc356d2c..e91807d9de07 100644 --- a/test/Tests/Readers/MoinMoin.hs +++ b/test/Tests/Readers/MoinMoin.hs @@ -43,6 +43,7 @@ tests = , testCase "italic" $ "''hi''" `readsTo` [Para [Emph [Str "hi"]]] , testCase "underline" $ "__hi__" `readsTo` [Para [Underline [Str "hi"]]] , testCase "monospace" $ "`hi`" `readsTo` [Para [Code nullAttr "hi"]] + , testCase "notMono" $ "`h\ni`" `readsTo` [Para [Str "`h",SoftBreak,Str "i`"]] , testCase "code1" $ "{{{hi}}}" `readsTo` [Para [Code nullAttr "hi"]] , testCase "code2" $ "{{{ hi }}}" `readsTo` [Para [Code nullAttr " hi "]] From e0f82880ad0a1f1c3edd7977f7b6a1090357135a Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Fri, 27 Feb 2026 22:30:16 +0000 Subject: [PATCH 68/76] initial parserHashBang not hooked into the Parser parser (erm) yet --- src/Text/Pandoc/Readers/MoinMoin.hs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index 67ca6da69148..c3082beaceff 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -123,11 +123,10 @@ bulletListItem = try $ do parser :: PandocMonad m => MoinParser m B.Blocks parser = try $ do open <- many (char '{') - open2 <- many (oneOf ('_':['a'..'z']++['A'..'Z']++['0'..'9'])) + open2 <- many (satisfy isWordChar) let len = length open guard (len >= 3) - -- TODO: optional #!, name-of-parser, many1 spaceNotNL, parser-args, many spaceNotNL, newline many spaceNotNL char '\n' @@ -142,6 +141,19 @@ parser = try $ do many spaceNotNL string delim +-- intended to be equivalent to Python2 re '\w' with re.UNICODE set +isWordChar :: Char -> Bool +isWordChar c = isAlphaNum c || c == '_' + +parserHashBang :: PandocMonad m => MoinParser m (String, Maybe String) +parserHashBang = do + string "#!" + parserName <- many (satisfy isWordChar) -- can be empty + parserArgs <- optionMaybe $ try $ do + many space + many1 (satisfy (/='\n')) + return (parserName, parserArgs) + inline :: PandocMonad m => MoinParser m B.Inlines inline = whitespace <|> camelCaseLink From 604528c3c2bca215c3f769bd1440e61356e04596 Mon Sep 17 00:00:00 2001 From: Jonathan Dowland Date: Sun, 1 Mar 2026 12:13:37 +0000 Subject: [PATCH 69/76] expand parserHashBang, list supported dialects and handle class names for wiki text --- src/Text/Pandoc/Readers/MoinMoin.hs | 48 ++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs index c3082beaceff..ca4e496a16a3 100644 --- a/src/Text/Pandoc/Readers/MoinMoin.hs +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -145,14 +145,49 @@ parser = try $ do isWordChar :: Char -> Bool isWordChar c = isAlphaNum c || c == '_' -parserHashBang :: PandocMonad m => MoinParser m (String, Maybe String) -parserHashBang = do +-- Moin supports (at least) creole, csv (table builder), docbook, highlight, html, +-- wiki, rst, text (plain:
) and xslt. The following names are deprecated
+-- shorthand for highlight (the name is converted to the argument to highlight):
+-- C++, diff, IRC(irssi), java, pascal, python:
+--
+-- for now we only support wiki (moin native format) and plain text.
+data ParserSpec = ParserWiki [String]
+                | ParserText
+                | ParserUnsupported
+                deriving (Show)
+
+parserHashBang :: PandocMonad m => MoinParser m ParserSpec
+parserHashBang = try $ do
   string "#!"
-  parserName <- many (satisfy isWordChar) -- can be empty
+  parserName <- many (satisfy isWordChar)
   parserArgs <- optionMaybe $ try $ do
-    many space
+    many1 space
     many1 (satisfy (/='\n'))
-  return (parserName, parserArgs)
+
+  return $ case parserName of
+    "wiki" -> ParserWiki (unmangleWikiArgs parserArgs)
+    "text" -> ParserText
+    ""     -> ParserText
+    _      -> ParserUnsupported
+
+unmangleWikiArgs :: Maybe String -> [String]
+unmangleWikiArgs Nothing = []
+unmangleWikiArgs (Just x) = let
+  stripped = T.strip (T.pack x)
+  split = T.splitOn "/" stripped
+  in map T.unpack split
+
+test_unmangleWikiArgs_simple     =  unmangleWikiArgs (Just "foo/bar") == ["foo","bar"]
+test_unmangleWikiArgs_prespace   =  unmangleWikiArgs (Just "       foo/bar") == ["foo","bar"]
+test_unmangleWikiArgs_postspace  =  unmangleWikiArgs (Just "foo/bar   ") == ["foo","bar"]
+test_unmangleWikiArgs_nowt       =  unmangleWikiArgs Nothing == []
+
+tests = and
+ [ test_unmangleWikiArgs_simple
+ , test_unmangleWikiArgs_prespace
+ , test_unmangleWikiArgs_postspace
+ , test_unmangleWikiArgs_nowt
+ ]
 
 inline :: PandocMonad m => MoinParser m B.Inlines
 inline =  whitespace
@@ -347,8 +382,7 @@ spaceChars = " \n\t"
 ------------------------------------------------------------------------------
 -- debug functions for use in GHCi
 
-p1 :: Monoid a
-   => MoinParser PandocPure a -> T.Text -> Either ParseError a
+p1 :: MoinParser PandocPure a -> T.Text -> Either ParseError a
 p1 p' = fromRight (error "unhandled PandocError")
       . runPure
       . runParserT p' defaultMoinState "?"

From ae67d3cfb119304ba6f525cb903d919aebc23f65 Mon Sep 17 00:00:00 2001
From: Jonathan Dowland 
Date: Sun, 1 Mar 2026 21:22:31 +0000
Subject: [PATCH 70/76] hook parserHashBang into parser

handling ParserWiki doesn't work properly yet
---
 src/Text/Pandoc/Readers/MoinMoin.hs | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs
index ca4e496a16a3..6ab22d103dba 100644
--- a/src/Text/Pandoc/Readers/MoinMoin.hs
+++ b/src/Text/Pandoc/Readers/MoinMoin.hs
@@ -118,6 +118,10 @@ bulletListItem = try $ do
   spaces
   B.plain . B.trimInlines . mconcat <$> manyTill inline newline
 
+-- could use Text.Pandoc.Parsing.spaceChar??
+spaceNotNL :: PandocMonad m => MoinParser m Char
+spaceNotNL = satisfy (\c -> isSpace c && not (c == '\n'))
+
 -- block-level MoinMoin 'Parser'.
 -- Not to be confused with 'code' (inline)
 parser :: PandocMonad m => MoinParser m B.Blocks
@@ -127,15 +131,20 @@ parser = try $ do
   let len = length open
   guard (len >= 3)
 
+  pspec <- optionMaybe (try parserHashBang)
   many spaceNotNL
   char '\n'
 
   let delim = open2 ++ (take len (repeat '}'))
-  pre <- manyTillChar anyChar (closer delim)
-  return $ B.codeBlock pre
 
-  where -- could use Text.Pandoc.Parsing.spaceChar?
-    spaceNotNL = satisfy (\c -> isSpace c && not (c == '\n'))
+  case pspec of
+    Just (ParserWiki args) -> do
+      inner <- manyTill block (closer delim)
+      (return . B.divWith nullAttr . mconcat) inner
+        -- Left "?" (line 1, column 3): unexpected end of input
+    _ -> manyTillChar anyChar (closer delim) >>= return . B.codeBlock
+
+  where
     closer delim = try $ do
       char '\n'
       many spaceNotNL
@@ -157,11 +166,11 @@ data ParserSpec = ParserWiki [String]
                 deriving (Show)
 
 parserHashBang :: PandocMonad m => MoinParser m ParserSpec
-parserHashBang = try $ do
+parserHashBang = do
   string "#!"
   parserName <- many (satisfy isWordChar)
   parserArgs <- optionMaybe $ try $ do
-    many1 space
+    many1 spaceNotNL
     many1 (satisfy (/='\n'))
 
   return $ case parserName of

From c9ee7a610c34831f4e051348490e550e6f5da0fb Mon Sep 17 00:00:00 2001
From: Jonathan Dowland 
Date: Mon, 2 Mar 2026 19:50:58 +0000
Subject: [PATCH 71/76] Add unit tests for parserHashBang and newlines in
 arguments

---
 src/Text/Pandoc/Readers/MoinMoin.hs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs
index 6ab22d103dba..e39ef00a7fd0 100644
--- a/src/Text/Pandoc/Readers/MoinMoin.hs
+++ b/src/Text/Pandoc/Readers/MoinMoin.hs
@@ -163,7 +163,7 @@ isWordChar c = isAlphaNum c || c  == '_'
 data ParserSpec = ParserWiki [String]
                 | ParserText
                 | ParserUnsupported
-                deriving (Show)
+                deriving (Show, Eq)
 
 parserHashBang :: PandocMonad m => MoinParser m ParserSpec
 parserHashBang = do
@@ -179,6 +179,11 @@ parserHashBang = do
     ""     -> ParserText
     _      -> ParserUnsupported
 
+test_parserHashBang_noNL_in_args1 = p1 parserHashBang "#!wiki\nremaining" == Right (ParserWiki [])
+
+test_parserHashBang_noNL_in_args2 = p1 (parserHashBang >> many anyChar) "#!wiki\nremaining"
+  == Right "\nremaining"
+
 unmangleWikiArgs :: Maybe String -> [String]
 unmangleWikiArgs Nothing = []
 unmangleWikiArgs (Just x) = let
@@ -196,6 +201,8 @@ tests = and
  , test_unmangleWikiArgs_prespace
  , test_unmangleWikiArgs_postspace
  , test_unmangleWikiArgs_nowt
+ , test_parserHashBang_noNL_in_args1
+ , test_parserHashBang_noNL_in_args2
  ]
 
 inline :: PandocMonad m => MoinParser m B.Inlines

From 360818791bd46601dbac39eeecc4e48c4eb4a009 Mon Sep 17 00:00:00 2001
From: Jonathan Dowland 
Date: Thu, 5 Mar 2026 20:07:03 +0000
Subject: [PATCH 72/76] fix parsing ParserWiki

---
 src/Text/Pandoc/Readers/MoinMoin.hs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs
index e39ef00a7fd0..29e553948026 100644
--- a/src/Text/Pandoc/Readers/MoinMoin.hs
+++ b/src/Text/Pandoc/Readers/MoinMoin.hs
@@ -142,6 +142,11 @@ parser = try $ do
       inner <- manyTill block (closer delim)
       (return . B.divWith nullAttr . mconcat) inner
         -- Left "?" (line 1, column 3): unexpected end of input
+
+    Just (ParserHighlight lang) -> do
+      let attr = ("", [lang], [])
+      manyTillChar anyChar (closer delim) >>= return . B.codeBlockWith attr
+
     _ -> manyTillChar anyChar (closer delim) >>= return . B.codeBlock
 
   where
@@ -332,6 +337,7 @@ endline = try $ do
   newline
   notFollowedBy blankline
   notFollowedBy (string "##")
+  notFollowedBy (string "}}}") -- to avoid breaking Parser
   (eof >> return mempty)
     <|> (skipMany spaceChar >> return B.softbreak)
 

From 7f659954b9f2ca7c28426c1d14fbfe56796ba666 Mon Sep 17 00:00:00 2001
From: Jonathan Dowland 
Date: Wed, 4 Mar 2026 22:43:10 +0000
Subject: [PATCH 73/76] begin extend parser to handle code highlighting

---
 src/Text/Pandoc/Readers/MoinMoin.hs | 17 +++++++++++++----
 test/Tests/Readers/MoinMoin.hs      |  6 +++++-
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs
index 29e553948026..043822fa395e 100644
--- a/src/Text/Pandoc/Readers/MoinMoin.hs
+++ b/src/Text/Pandoc/Readers/MoinMoin.hs
@@ -167,6 +167,7 @@ isWordChar c = isAlphaNum c || c  == '_'
 -- for now we only support wiki (moin native format) and plain text.
 data ParserSpec = ParserWiki [String]
                 | ParserText
+                | ParserHighlight T.Text
                 | ParserUnsupported
                 deriving (Show, Eq)
 
@@ -179,10 +180,18 @@ parserHashBang = do
     many1 (satisfy (/='\n'))
 
   return $ case parserName of
-    "wiki" -> ParserWiki (unmangleWikiArgs parserArgs)
-    "text" -> ParserText
-    ""     -> ParserText
-    _      -> ParserUnsupported
+    "wiki"   -> ParserWiki (unmangleWikiArgs parserArgs)
+    "text"   -> ParserText
+    "haskell"-> ParserHighlight (mmLangTopdLang "haskell")
+    ""       -> ParserText
+    _        -> ParserUnsupported
+
+-- map language names as recognised by MoinMoin to equivalents as
+-- recognised by Pandoc
+mmLangTopdLang :: String -> T.Text
+mmLangTopdLang s = case s of
+  "haskell" -> "haskell"
+  _         -> "unknown"
 
 test_parserHashBang_noNL_in_args1 = p1 parserHashBang "#!wiki\nremaining" == Right (ParserWiki [])
 
diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs
index e91807d9de07..e32a50945e23 100644
--- a/test/Tests/Readers/MoinMoin.hs
+++ b/test/Tests/Readers/MoinMoin.hs
@@ -116,7 +116,11 @@ tests =
       , testCase "parserMultiLine"  $ "{{{\nhi\nthere\n}}}"       `readsTo` [CodeBlock nullAttr "hi\nthere"]
       , testCase "parser4delims"    $ "{{{{\nhi\n}}}}"            `readsTo` [CodeBlock nullAttr "hi"]
       , testCase "parserInnerDelim" $ "{{{{\n{{{hi}}}\n}}}}"      `readsTo` [CodeBlock nullAttr "{{{hi}}}"]
-      , testCase "parserCustomDelim"$"{{{badidea\nhi\nbadidea}}}" `readsTo` [CodeBlock nullAttr "hi"]
+      , testCase "parserCustomDelim"$ "{{{badidea\nhi\nbadidea}}}"`readsTo` [CodeBlock nullAttr "hi"]
+      , testGroup "parserHighlight"
+        [ testCase "parserHaskell"  $ "{{{#!haskell\nx = 1\n}}}"  `readsTo` [CodeBlock ("",["haskell"],[]) "x = 1"]
+
+        ]
       ]
     ]
   , testGroup "emptyDelims"

From f0355c436a4dfc17dcbb498a44be8c9fd4779bf4 Mon Sep 17 00:00:00 2001
From: Jonathan Dowland 
Date: Thu, 5 Mar 2026 20:08:19 +0000
Subject: [PATCH 74/76] Implement parser-wiki class handling

---
 src/Text/Pandoc/Readers/MoinMoin.hs | 13 +++++--------
 test/Tests/Readers/MoinMoin.hs      |  4 +++-
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs
index 043822fa395e..1ef6714d9fe3 100644
--- a/src/Text/Pandoc/Readers/MoinMoin.hs
+++ b/src/Text/Pandoc/Readers/MoinMoin.hs
@@ -140,8 +140,8 @@ parser = try $ do
   case pspec of
     Just (ParserWiki args) -> do
       inner <- manyTill block (closer delim)
-      (return . B.divWith nullAttr . mconcat) inner
-        -- Left "?" (line 1, column 3): unexpected end of input
+      let attr = ("", args, [])
+      (return . B.divWith attr . mconcat) inner
 
     Just (ParserHighlight lang) -> do
       let attr = ("", [lang], [])
@@ -165,7 +165,7 @@ isWordChar c = isAlphaNum c || c  == '_'
 -- C++, diff, IRC(irssi), java, pascal, python:
 --
 -- for now we only support wiki (moin native format) and plain text.
-data ParserSpec = ParserWiki [String]
+data ParserSpec = ParserWiki [T.Text]
                 | ParserText
                 | ParserHighlight T.Text
                 | ParserUnsupported
@@ -198,12 +198,9 @@ test_parserHashBang_noNL_in_args1 = p1 parserHashBang "#!wiki\nremaining" == Rig
 test_parserHashBang_noNL_in_args2 = p1 (parserHashBang >> many anyChar) "#!wiki\nremaining"
   == Right "\nremaining"
 
-unmangleWikiArgs :: Maybe String -> [String]
+unmangleWikiArgs :: Maybe String -> [T.Text]
 unmangleWikiArgs Nothing = []
-unmangleWikiArgs (Just x) = let
-  stripped = T.strip (T.pack x)
-  split = T.splitOn "/" stripped
-  in map T.unpack split
+unmangleWikiArgs (Just x) = (T.splitOn "/" . T.strip . T.pack) x
 
 test_unmangleWikiArgs_simple     =  unmangleWikiArgs (Just "foo/bar") == ["foo","bar"]
 test_unmangleWikiArgs_prespace   =  unmangleWikiArgs (Just "       foo/bar") == ["foo","bar"]
diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs
index e32a50945e23..7a8d5b7658d3 100644
--- a/test/Tests/Readers/MoinMoin.hs
+++ b/test/Tests/Readers/MoinMoin.hs
@@ -119,7 +119,9 @@ tests =
       , testCase "parserCustomDelim"$ "{{{badidea\nhi\nbadidea}}}"`readsTo` [CodeBlock nullAttr "hi"]
       , testGroup "parserHighlight"
         [ testCase "parserHaskell"  $ "{{{#!haskell\nx = 1\n}}}"  `readsTo` [CodeBlock ("",["haskell"],[]) "x = 1"]
-
+        ]
+      , testGroup "parserWiki"
+        [ testCase "wikiClass" $ "{{{#!wiki red\nfoo\n}}}" `readsTo` [Div ("",["red"],[]) [Para [Str "foo"] ] ]
         ]
       ]
     ]

From ac45d9ac538f39a3bf1beefe956e88ad96013622 Mon Sep 17 00:00:00 2001
From: Jonathan Dowland 
Date: Sat, 7 Mar 2026 22:27:34 +0000
Subject: [PATCH 75/76] Map MoinMoin highlight names to Pandoc

---
 pandoc.cabal                                  |   1 +
 src/Text/Pandoc/Readers/MoinMoin.hs           |  35 ++-
 src/Text/Pandoc/Readers/MoinMoin/Highlight.hs | 233 ++++++++++++++++++
 test/Tests/Readers/MoinMoin.hs                |   7 +-
 4 files changed, 263 insertions(+), 13 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/MoinMoin/Highlight.hs

diff --git a/pandoc.cabal b/pandoc.cabal
index 7c78f461ee30..819f3ed3ada5 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -764,6 +764,7 @@ library
                    Text.Pandoc.Readers.Mdoc.Lex,
                    Text.Pandoc.Readers.Mdoc.Macros,
                    Text.Pandoc.Readers.Mdoc.Standards,
+                   Text.Pandoc.Readers.MoinMoin.Highlight,
                    Text.Pandoc.Readers.Typst.Parsing,
                    Text.Pandoc.Readers.Typst.Math,
                    Text.Pandoc.Readers.ODT.Base,
diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs
index 1ef6714d9fe3..f1a2c6731567 100644
--- a/src/Text/Pandoc/Readers/MoinMoin.hs
+++ b/src/Text/Pandoc/Readers/MoinMoin.hs
@@ -18,11 +18,13 @@ module Text.Pandoc.Readers.MoinMoin( readMoinMoin ) where
 import Control.Monad (guard)
 import Control.Monad.Except (throwError)
 import Data.Char -- isUpper, isAlphaNum
+import Data.Maybe (fromMaybe)
 import Text.Pandoc.Definition
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
 import Text.Pandoc.Class (runPure, PandocPure (..)) -- debug
 import Text.Pandoc.Options (ReaderOptions)
 import Text.Pandoc.Parsing
+import Text.Pandoc.Readers.MoinMoin.Highlight
 import qualified Text.Pandoc.Builder as B
 import qualified Data.Text as T
 import Data.Either (fromRight)
@@ -179,19 +181,24 @@ parserHashBang = do
     many1 spaceNotNL
     many1 (satisfy (/='\n'))
 
+  let tParserName = T.pack parserName
+
   return $ case parserName of
-    "wiki"   -> ParserWiki (unmangleWikiArgs parserArgs)
-    "text"   -> ParserText
-    "haskell"-> ParserHighlight (mmLangTopdLang "haskell")
-    ""       -> ParserText
-    _        -> ParserUnsupported
-
--- map language names as recognised by MoinMoin to equivalents as
--- recognised by Pandoc
-mmLangTopdLang :: String -> T.Text
-mmLangTopdLang s = case s of
-  "haskell" -> "haskell"
-  _         -> "unknown"
+      "wiki"      -> ParserWiki (unmangleWikiArgs parserArgs)
+
+      "highlight" -> ParserHighlight (highlightArgs parserArgs)
+      -- these are pre Moin-1.9 shortcuts to highlight
+      "diff"      -> ParserHighlight "diff"
+      "cplusplus" -> ParserHighlight "cpp"
+      "python"    -> ParserHighlight "python"
+      "java"      -> ParserHighlight "java"
+      -- no sensible Pandoc highlighter to map these to
+      "pascal"    -> ParserHighlight "default"
+      "irc"       -> ParserHighlight "default"
+
+      "text"      -> ParserText
+      ""          -> ParserText
+      _           -> ParserUnsupported
 
 test_parserHashBang_noNL_in_args1 = p1 parserHashBang "#!wiki\nremaining" == Right (ParserWiki [])
 
@@ -202,6 +209,10 @@ unmangleWikiArgs :: Maybe String -> [T.Text]
 unmangleWikiArgs Nothing = []
 unmangleWikiArgs (Just x) = (T.splitOn "/" . T.strip . T.pack) x
 
+highlightArgs :: Maybe String -> T.Text
+highlightArgs Nothing  = "default"
+highlightArgs (Just a) = (fromMaybe "default" . mmLangTopdLang . T.pack) a
+
 test_unmangleWikiArgs_simple     =  unmangleWikiArgs (Just "foo/bar") == ["foo","bar"]
 test_unmangleWikiArgs_prespace   =  unmangleWikiArgs (Just "       foo/bar") == ["foo","bar"]
 test_unmangleWikiArgs_postspace  =  unmangleWikiArgs (Just "foo/bar   ") == ["foo","bar"]
diff --git a/src/Text/Pandoc/Readers/MoinMoin/Highlight.hs b/src/Text/Pandoc/Readers/MoinMoin/Highlight.hs
new file mode 100644
index 000000000000..cf7cb3e6af2b
--- /dev/null
+++ b/src/Text/Pandoc/Readers/MoinMoin/Highlight.hs
@@ -0,0 +1,233 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Readers.MoinMoin.Highlight
+   Copyright   : Copyright © 2026 Jonathan Dowland
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Jonathan Dowland 
+   Stability   : alpha
+   Portability : portable
+
+Mapping of MoinMoin highlight language names to Pandoc's.
+-}
+
+module Text.Pandoc.Readers.MoinMoin.Highlight ( mmLangTopdLang ) where
+
+import qualified Data.Map as M
+import qualified Data.Text as T
+
+-- map language names as recognised by MoinMoin to equivalents as
+-- recognised by Pandoc
+mmLangTopdLang :: T.Text -> Maybe T.Text
+mmLangTopdLang s = M.lookup s mapping
+
+mapping :: M.Map T.Text T.Text
+mapping = M.fromList
+  [ ("ada", "ada")
+  , ("ada95", "ada")
+  , ("ada2005", "ada")
+  , ("agda", "agda")
+  , ("apacheconf", "apache")
+  , ("aconf", "apache")
+  , ("apache", "apache")
+  , ("awk", "awk")
+  , ("gawk", "awk")
+  , ("mawk", "awk")
+  , ("nawk", "awk")
+  , ("basemake", "makefile")
+  , ("bash", "bash")
+  , ("sh", "bash")
+  , ("ksh", "bash")
+  , ("zsh", "bash")
+  , ("shell", "bash")
+  , ("bat", "dosbat")
+  , ("batch", "dosbat")
+  , ("dosbatch", "dosbat")
+  , ("winbatch", "dosbat")
+  , ("bib", "bibtex")
+  , ("bibtex", "bibtex")
+  , ("boo", "boo")
+  , ("c", "c")
+  , ("csharp", "cs")
+  , ("c#", "cs")
+  , ("cpp", "cpp")
+  , ("c++", "cpp")
+  , ("cplusplus", "cpp")
+  , ("clojure", "clojure")
+  , ("clj", "clojure")
+  , ("cmake", "cmake")
+  , ("coffee-script", "coffeescript")
+  , ("coffeescript", "coffeescript")
+  , ("coffee", "coffeescript")
+  , ("cfc", "coldfusion")
+  , ("common-lisp", "commonlisp")
+  , ("cl", "commonlisp")
+  , ("lisp", "commonlisp")
+  , ("css", "css")
+  , ("d", "d")
+  , ("dart", "dart")
+  , ("control", "debiancontrol")
+  , ("debcontrol", "debiancontrol")
+  , ("diff", "diff")
+  , ("udiff", "diff")
+  , ("django", "djangotemplate")
+  , ("jinja", "djangotemplate")
+  , ("docker", "dockerfile")
+  , ("dockerfile", "dockerfile")
+  , ("dtd", "dtd")
+  , ("eiffel", "eiffel")
+  , ("elixir", "elixir")
+  , ("ex", "elixir")
+  , ("exs", "elixir")
+  , ("elm", "elm")
+  , ("erlang", "erlang")
+  , ("fsharp", "fsharp")
+  , ("f#", "fsharp")
+  , ("gap", "gap")
+  , ("gas", "gnuassembler")
+  , ("asm", "gnuassembler")
+  , ("glsl", "glsl")
+  , ("go", "go")
+  , ("groovy", "groovy")
+  , ("haskell", "haskell")
+  , ("hs", "haskell")
+  , ("hx", "haxe")
+  , ("haxe", "haxe")
+  , ("hxsl", "haxe")
+  , ("html", "html")
+  , ("idris", "idris")
+  , ("idr", "idris")
+  , ("j", "j")
+  , ("java", "java")
+  , ("jsp", "jsp")
+  , ("js", "javascript")
+  , ("javascript", "javascript")
+  , ("json", "json")
+  , ("julia", "julia")
+  , ("jl", "julia")
+  , ("kotlin", "kotlin")
+  , ("lhs", "literatehaskell")
+  , ("literate-haskell", "literatehaskell")
+  , ("lhaskell", "literatehaskell")
+  , ("llvm", "llvm")
+  , ("lua", "lua")
+  , ("make", "makefile")
+  , ("makefile", "makefile")
+  , ("mf", "makefile")
+  , ("bsdmake", "makefile")
+  , ("md", "markdown")
+  , ("mathematica", "mathematica")
+  , ("mma", "mathematica")
+  , ("nb", "mathematica")
+  , ("matlab", "matlab")
+  , ("modula2", "modula2")
+  , ("m2", "modula2")
+  , ("nasm", "nasm")
+  , ("nim", "nim")
+  , ("nimrod", "nim")
+  , ("nixos", "nix")
+  , ("nix", "nix")
+  , ("objective-c", "objectivec")
+  , ("objectivec", "objectivec")
+  , ("obj-c", "objectivec")
+  , ("objc", "objectivec")
+  , ("objective-c++", "objectivecpp")
+  , ("objectivec++", "objectivecpp")
+  , ("obj-c++", "objectivecpp")
+  , ("objc++", "objectivecpp")
+  , ("ocaml", "ocaml")
+  , ("octave", "octave")
+  , ("perl", "perl")
+  , ("pl", "perl")
+  , ("perl6", "raku")
+  , ("pl6", "raku")
+  , ("php", "php")
+  , ("php3", "php")
+  , ("php4", "php")
+  , ("php5", "php")
+  , ("pike", "pike")
+  , ("postscript", "postscript")
+  , ("postscr", "postscript")
+  , ("pov", "povray")
+  , ("powershell", "powershell")
+  , ("posh", "powershell")
+  , ("ps1", "powershell")
+  , ("psm1", "powershell")
+  , ("prolog", "prolog")
+  , ("protobuf", "protobuf")
+  , ("proto", "protobuf")
+  , ("python", "python")
+  , ("py", "python")
+  , ("sage", "python")
+  , ("python3", "python")
+  , ("py3", "python")
+  , ("python2", "python")
+  , ("py2", "python")
+  , ("py2tb", "python")
+  , ("pycon", "python")
+  , ("pytb", "python")
+  , ("py3tb", "python")
+  , ("qml", "qml")
+  , ("qbs", "qml")
+  , ("rconsole", "r")
+  , ("rout", "r")
+  , ("rnc", "relaxngcompact")
+  , ("rng-compact", "relaxngcompact")
+  , ("rhtml", "rhtml")
+  , ("html+erb", "rhtml")
+  , ("html+ruby", "rhtml")
+  , ("rb", "ruby")
+  , ("ruby", "ruby")
+  , ("duby", "ruby")
+  , ("rbcon", "ruby")
+  , ("irb", "ruby")
+  , ("rust", "rust")
+  , ("rs", "rust")
+  , ("sass", "sass")
+  , ("scala", "scala")
+  , ("scheme", "scheme")
+  , ("scm", "scheme")
+  , ("scilab", "sci")
+  , ("scss", "scss")
+  , ("sql", "sql")
+  , ("sqlite3", "sql")
+  , ("sml", "sml")
+  , ("stata", "stata")
+  , ("do", "stata")
+  , ("swift", "swift")
+  , ("tcl", "tcl")
+  , ("tcsh", "tsch")
+  , ("csh", "tsch")
+  , ("tcshcon", "tsch")
+  , ("terraform", "terraform")
+  , ("tf", "terraform")
+  , ("tex", "latex")
+  , ("latex", "latex")
+  , ("toml", "toml")
+  , ("ts", "typescript")
+  , ("typescript", "typescript")
+  , ("verilog", "verilog")
+  , ("v", "verilog")
+  , ("vhdl", "vhdl")
+  , ("xml", "xml")
+  , ("xml+cheetah", "xml")
+  , ("xml+spitfire", "xml")
+  , ("xml+django", "xml")
+  , ("xml+jinja", "xml")
+  , ("xml+evoque", "xml")
+  , ("xml+lasso", "xml")
+  , ("xml+mako", "xml")
+  , ("xml+myghty", "xml")
+  , ("xml+php", "xml")
+  , ("xml+erb", "xml")
+  , ("xml+ruby", "xml")
+  , ("xml+smarty", "xml")
+  , ("xml+velocity", "xml")
+  , ("xorg.conf", "xorg")
+  , ("xslt", "xslt")
+  , ("xul+mozpreproc", "xul")
+  , ("yaml", "yaml")
+  , ("yaml+jinja", "yaml")
+  , ("salt", "yaml")
+  , ("sls", "yaml")
+  ]
diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs
index 7a8d5b7658d3..b57b3ca4ff51 100644
--- a/test/Tests/Readers/MoinMoin.hs
+++ b/test/Tests/Readers/MoinMoin.hs
@@ -118,7 +118,12 @@ tests =
       , testCase "parserInnerDelim" $ "{{{{\n{{{hi}}}\n}}}}"      `readsTo` [CodeBlock nullAttr "{{{hi}}}"]
       , testCase "parserCustomDelim"$ "{{{badidea\nhi\nbadidea}}}"`readsTo` [CodeBlock nullAttr "hi"]
       , testGroup "parserHighlight"
-        [ testCase "parserHaskell"  $ "{{{#!haskell\nx = 1\n}}}"  `readsTo` [CodeBlock ("",["haskell"],[]) "x = 1"]
+        [ testCase "parserHaskell1"  $ "{{{#!highlight haskell\nx = 1\n}}}"  `readsTo` [CodeBlock ("",["haskell"],[]) "x = 1"]
+        , testCase "parserHaskell2"  $ "{{{#!highlight hs\nx = 1\n}}}"  `readsTo` [CodeBlock ("",["haskell"],[]) "x = 1"]
+        , testCase "hlightShortcut1" $ "{{{#!java\nfoo\n}}}" `readsTo` [CodeBlock ("",["java"],[]) "foo"]
+        , testCase "hlightShortcut2" $ "{{{#!irc\nfoo\n}}}" `readsTo` [CodeBlock ("",["default"],[]) "foo"]
+        , testCase "hlightEmpty"     $ "{{{#!highlight\nfoo\n}}}" `readsTo` [CodeBlock ("",["default"],[]) "foo"]
+
         ]
       , testGroup "parserWiki"
         [ testCase "wikiClass" $ "{{{#!wiki red\nfoo\n}}}" `readsTo` [Div ("",["red"],[]) [Para [Str "foo"] ] ]

From a69d5912547c4cab31012e4886cab86caf368776 Mon Sep 17 00:00:00 2001
From: Jonathan Dowland 
Date: Tue, 10 Mar 2026 21:35:25 +0000
Subject: [PATCH 76/76] inclusion macro

---
 src/Text/Pandoc/Readers/MoinMoin.hs | 11 +++++++++++
 test/Tests/Readers/MoinMoin.hs      |  3 +++
 2 files changed, 14 insertions(+)

diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs
index f1a2c6731567..79ea6faf1522 100644
--- a/src/Text/Pandoc/Readers/MoinMoin.hs
+++ b/src/Text/Pandoc/Readers/MoinMoin.hs
@@ -249,6 +249,7 @@ inline =  whitespace
       <|> lineBreak
       <|> anchor
       <|> code
+      <|> include
       <|> special
 
 -- from Readers.Mediawiki
@@ -410,6 +411,16 @@ externalLink = do
       src <- manyTillChar (noneOf "|") (string "]]")
       return (src,src)
 
+-- the full syntax of this macro has comma-separated options, a modifier to switch
+-- from page names to regexps, etc., but we're not using any of that so we take a
+-- shortcut. Full details:
+-- 
+include :: PandocMonad m => MoinParser m B.Inlines
+include = try $ do
+  string "<>") >>=
+    return . B.rawInline "moinmoin"
+
 -- from Readers.Mediawiki
 specialChars :: [Char]
 specialChars = "'[]<=&*{}|\":\\_^,~-+()/`#"
diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs
index b57b3ca4ff51..1d43e73cb9e6 100644
--- a/test/Tests/Readers/MoinMoin.hs
+++ b/test/Tests/Readers/MoinMoin.hs
@@ -135,6 +135,9 @@ tests =
     , testCase "empty italic" $ "''''" `readsTo` [Para []]
     , testCase "empty code"   $ "``" `readsTo` [Para []]
     ]
+  , testGroup "includes"
+    [ testCase "basicInclude" $ "<>" `readsTo` [Para [RawInline "moinmoin" "foo"]]
+    ]
   ]
 
 main :: IO ()