diff --git a/SimonMoin.hs b/SimonMoin.hs new file mode 100644 index 000000000000..a80ac2098df5 --- /dev/null +++ b/SimonMoin.hs @@ -0,0 +1,528 @@ +{- +Copyright (C) 2009 Simon Michael + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.MoinMoin + Copyright : Copyright (C) 2009-2011 Simon Michael, ranft, John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Partial conversion from MoinMoin-formatted text (plus some pandoc-isms +like smart punctuation) to Pandoc. Based on the Markdown reader. + +TODO: +[ ] moin 1.6 double bracket links: + [ ] [[attachment:filename.txt]] +[ ] {{http://static.moinmo.in/logos/moinmoin.png}} should be an image +[ ] indented blockquotes +[ ] definition lists +[ ] nested/multiply-indented lists, blocks, code blocks +[ ] tables +[ ] images +[ ] <> a note +[ ] <> - just ignore this +[ ] <> treat as email link; pandoc has obfuscation + options +[ ] smileys and icons - use unicode char or just parse literal text +[ ] wiki parser with css classes: + {{{#!wiki red/solid + blah blah + }}} +[ ] admonitions + {{{#!wiki caution + '''Don't overuse''' + + blah blah + }}} +[ ] comments + {{{#!wiki comment/dotted + '''Don't overuse''' + + blah blah + }}} +[ ] test suite - best approach would be src/Tests/Readers/MoinMoin.hs, + see other reader tests for how to do this. + or, old style: tests/moinmoin-reader.native tests/moinmoin-reader.moinmoin + + +cf: +http://johnmacfarlane.net/pandoc/doc/pandoc/index.html +http://moinmo.in/HelpOnFormatting +http://moinmo.in/HelpOnMoinWikiSyntax + +-} + +module Text.Pandoc.Readers.MoinMoin ( readMoinMoin ) where +import Control.Monad ( when ) +import Data.Char ( isUpper ) +import Text.Pandoc.Definition +import Text.Pandoc.Parsing +import Text.Pandoc.Shared +import Text.ParserCombinators.Parsec hiding ( label ) + +-- | Parse MoinMoin string and return Pandoc document. +readMoinMoin :: ParserState -> String -> Pandoc +readMoinMoin state s = readWith parseMoinMoin state (s ++ "\n\n") + +-- +-- Constants and data structure definitions +-- + +spaceChars :: [Char] +spaceChars = " \t" + +bulletListMarkers :: [Char] +bulletListMarkers = ".*" + +hruleChars :: [Char] +hruleChars = "-" + +-- treat these as potentially non-text when parsing inline: +specialChars :: [Char] +specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221" + +-- +-- document structure +-- +-- +-- parsing blocks +-- + +parseBlocks :: GenParser Char ParserState [Block] +parseBlocks = manyTill block eof + +block :: GenParser Char ParserState Block +block = do + choice ([ header + , codeBlock + , codeBlockIndented + -- , blockQuote + , hrule + , bulletList + , orderedList + , para + , plain + , nullBlock + ]) "block" + + + +-- +-- hrule block +-- + +hrule :: GenParser Char st Block +hrule = try $ do + skipSpaces + start <- oneOf hruleChars + count 3 (char start) + skipMany (char start) + skipSpaces + newline + optional blanklines + return HorizontalRule + +-- +-- code blocks +-- + +codeBlockStart :: GenParser Char st [String] +codeBlockStart = try $ do + string "{{{" + classes <- option [] codeBlockClasses + optional newline + return classes + +codeBlockClasses :: GenParser Char st [String] +codeBlockClasses = try $ do + string "#!" + skipMany spaceChar + sepEndBy (many1 alphaNum) (many1 spaceChar) + +codeBlockEnd :: GenParser Char st () +codeBlockEnd = try $ string "}}}" >> skipSpaces >> optional newline >> return () + +codeBlock :: GenParser Char st Block +codeBlock = try $ do + classes <- codeBlockStart + contents <- manyTill anyChar codeBlockEnd + return $ CodeBlock ("",classes,[]) contents + +codeInline :: GenParser Char st Inline +codeInline = try $ do + codeBlockStart' + contents <- manyTill anyChar codeBlockEnd' + return $ Code nullAttr contents + where + codeBlockStart' = string "{{{" >> return () + codeBlockEnd' = try $ string "}}}" >> return () + + +codeBlockIndented :: GenParser Char ParserState Block +codeBlockIndented = try $ do + many1 whitespace >> codeBlockStart + contents <- manyTill anyChar codeBlockEnd + return $ BlockQuote [CodeBlock ([],[],[]) contents] + +-- +-- list blocks +-- + +-- these are just the markdown list parsers + +bulletListStart :: GenParser Char ParserState () +bulletListStart = try $ do + optional newline -- if preceded by a Plain block in a list context + nonindentSpaces + notFollowedBy' hrule -- because hrules start out just like lists + oneOf bulletListMarkers + spaceChar + skipSpaces + +anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim) +anyOrderedListStart = try $ do + optional newline -- if preceded by a Plain block in a list context + nonindentSpaces + notFollowedBy $ string "p." >> spaceChar >> digit -- page number + state <- getState + if stateStrict state + then do many1 digit + char '.' + spaceChar + return (1, DefaultStyle, DefaultDelim) + else do (num, style, delim) <- anyOrderedListMarker + -- if it could be an abbreviated first name, insist on more than one space + if delim == Period && (style == UpperAlpha || (style == UpperRoman && + num `elem` [1, 5, 10, 50, 100, 500, 1000])) + then char '\t' <|> (char ' ' >>~ notFollowedBy (satisfy isUpper)) + else spaceChar + skipSpaces + return (num, style, delim) + +listStart :: GenParser Char ParserState () +listStart = bulletListStart <|> (anyOrderedListStart >> return ()) + +-- parse a line of a list item (start = parser for beginning of list item) +listLine :: GenParser Char ParserState [Char] +listLine = try $ do + notFollowedBy' listStart + notFollowedBy blankline + notFollowedBy' (do indentSpaces + many (spaceChar) + listStart) + notFollowedBy' header + line <- manyTill anyChar newline + return $ line ++ "\n" + +-- parse raw text for one list item, excluding start marker and continuations +rawListItem :: GenParser Char ParserState [Char] +rawListItem = try $ do + listStart + result <- many1 listLine + blanks <- many blankline + return $ concat result ++ blanks + +-- continuation of a list item - indented and separated by blankline +-- or (in compact lists) endline. +-- note: nested lists are parsed as continuations +listContinuation :: GenParser Char ParserState [Char] +listContinuation = try $ do + lookAhead indentSpaces + result <- many1 listContinuationLine + blanks <- many blankline + return $ concat result ++ blanks + +listContinuationLine :: GenParser Char ParserState [Char] +listContinuationLine = try $ do + notFollowedBy blankline + notFollowedBy' listStart + optional indentSpaces + result <- manyTill anyChar newline + return $ result ++ "\n" + +listItem :: GenParser Char ParserState [Block] +listItem = try $ do + first <- rawListItem + continuations <- many listContinuation + -- parsing with ListItemState forces markers at beginning of lines to + -- count as list item markers, even if not separated by blank space. + -- see definition of "endline" + state <- getState + let oldContext = stateParserContext state + setState $ state {stateParserContext = ListItemState} + -- parse the extracted block, which may contain various block elements: + let raw = concat (first:continuations) + contents <- parseFromString parseBlocks raw + updateState (\st -> st {stateParserContext = oldContext}) + return contents + +orderedList :: GenParser Char ParserState Block +orderedList = try $ do + (start, style, delim) <- lookAhead anyOrderedListStart + items <- many1 listItem + return $ OrderedList (start, style, delim) $ compactify items + +bulletList :: GenParser Char ParserState Block +bulletList = try $ do + lookAhead bulletListStart + many1 listItem >>= return . BulletList . compactify + +-- +-- paragraph block +-- + +para :: GenParser Char ParserState Block +para = try $ do + result <- many1 inline + newline + blanklines <|> do lookAhead ((codeBlockStart >> return "") <|> {- blockQuote <|> -} (header >> return "")) + return $ Para $ normalizeSpaces result + +plain :: GenParser Char ParserState Block +plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces + +-- +-- inline +-- + +inline :: GenParser Char ParserState Inline +inline = choice inlineParsers "inline" + +inlineParsers :: [GenParser Char ParserState Inline] +inlineParsers = [ + link + , codeInline + , str + , smartPunctuation inline + , whitespace + , endline + , code + , charRef + , strong + , emph + , strikeout + , superscript + , subscript +-- , escapedChar + , symbol + ] + +symbol :: GenParser Char ParserState Inline +symbol = do + result <- oneOf specialChars + return $ Str [result] + +-- parses inline code, between n `s and n `s +code :: GenParser Char ParserState Inline +code = try $ do + starts <- many1 (char '`') + skipSpaces + result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> + (char '\n' >> return " ")) + (try (skipSpaces >> count (length starts) (char '`') >> + notFollowedBy (char '`'))) + return $ Code nullAttr $ removeLeadingTrailingSpace $ concat result + +whitespace :: GenParser Char ParserState Inline +whitespace = do + sps <- many1 (oneOf spaceChars) + if length sps >= 2 + then option Space (endline >> return LineBreak) + else return Space "whitespace" + +strChar :: GenParser Char st Char +strChar = noneOf (specialChars ++ spaceChars ++ "\n") + +str :: GenParser Char st Inline +str = notFollowedBy' comment >> many1 strChar >>= return . Str + +-- an endline character that can be treated as a space, not a structural break +endline :: GenParser Char ParserState Inline +endline = try $ do + newline + notFollowedBy blankline + notFollowedBy' codeBlockStart + notFollowedBy' listStart + notFollowedBy (char '=') + -- st <- getState + -- if stateStrict st + -- then do notFollowedBy (char '=') -- header + -- else return () + -- parse potential list-starts differently if in a list: + -- if stateParserContext st == ListItemState + -- then notFollowedBy' (bulletListStart <|> + -- (anyOrderedListStart >> return ())) + -- else return () + return Space + +-- +-- links +-- + +-- inlineNonLink :: GenParser Char ParserState Inline +-- inlineNonLink = (choice $ +-- map (\parser -> try (parser >>= failIfLink)) inlineParsers) +-- "inline (non-link)" + +-- failIfLink :: Inline -> GenParser tok st Inline +-- failIfLink (Link _ _) = pzero +-- failIfLink elt = return elt + +-- -- a reference label for a link +-- reference :: GenParser Char ParserState [Inline] +-- reference = do notFollowedBy' (string "[^") -- footnote reference +-- result <- inlinesInBalancedBrackets inlineNonLink +-- return $ normalizeSpaces result + +-- -- source for a link, with optional title +-- source :: GenParser Char st (String, [Char]) +-- source = +-- (try $ charsInBalanced '(' ')' >>= parseFromString source') <|> +-- -- the following is needed for cases like: [ref](/url(a). +-- (enclosed (char '(') (char ')') anyChar >>= +-- parseFromString source') + +-- -- auxiliary function for source +-- source' :: GenParser Char st (String, [Char]) +-- source' = do +-- skipSpaces +-- let sourceURL excludes = many $ +-- optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' linkTitle >> char ' ')) +-- src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n" +-- tit <- option "" linkTitle +-- skipSpaces +-- eof +-- return (intercalate "%20" $ words $ removeTrailingSpace src, tit) + +-- linkTitle :: GenParser Char st String +-- linkTitle = try $ do +-- (many1 spaceChar >> option '\n' newline) <|> newline +-- skipSpaces +-- delim <- oneOf "'\"" +-- tit <- manyTill (optional (char '\\') >> anyChar) +-- (try (char delim >> skipSpaces >> eof)) +-- return $ decodeCharacterReferences tit + +link :: GenParser Char ParserState Inline +link = choice [ + ,localPageCamelCaseLink + ,moin16BracketLink + ] + +localPageCamelCaseLink :: GenParser Char ParserState Inline +localPageCamelCaseLink = try $ do + (p,_) <- localPageCamelCase + return $ Link [Str p] (p, "") + +moin16BracketLink :: GenParser Char ParserState Inline +moin16BracketLink = try $ do + (target,label) <- doubleBracketed $ choice [ + uriPipeLabel + ,uriNoLabel + -- ,localPageInQuotes + -- ,localPageWithColonLabel + -- ,localPageCamelCase + ] + return $ Link [Str label] (target, "") + +uriPipeLabel :: GenParser Char ParserState (String,String) +uriPipeLabel = try $ do + (_, uri_escaped) <- uri + char '|' + label <- many1 $ noneOf "]" + return (uri_escaped, label) + +uriNoLabel :: GenParser Char ParserState (String,String) +uriNoLabel = try $ do + skipSpaces + s <- many1 $ noneOf "]" + skipSpaces + -- work around uri failing when there is a trailing ] + state <- getState + either (const $ fail "") (const $ return (s,s)) $ runParser uri state "" s + +localPageInQuotes :: GenParser Char ParserState (String,String) +localPageInQuotes = try $ do + char '"' + p <- many1 $ noneOf "\"" + char '"' + return (p,p) + +-- I didn't find this in any moin docs, but it's in the darcs wiki +-- and moin was handling it +localPageWithColonLabel :: GenParser Char ParserState (String,String) +localPageWithColonLabel = try $ do + char ':' + p <- many1 $ noneOf ":" + char ':' + label <- many1 $ noneOf "]" + return (p,label) + +localPageCamelCase :: GenParser Char ParserState (String,String) +localPageCamelCase = try $ do + w <- initialCapWord + ws <- many1 initialCapWord + let p = concat $ [w]++ws + return (p,p) + +initialCapWord :: GenParser Char ParserState String +initialCapWord = try $ do + c <- upperChar + cs <- many1 lowerChar + return $ [c]++cs + +upperChar, lowerChar :: GenParser Char ParserState Char +upperChar = oneOf "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +lowerChar = oneOf "abcdefghijklmnopqrstuvwxyz" + +-- image :: GenParser Char ParserState Inline +-- image = try $ do +-- char '!' +-- (Link lab src) <- link +-- return $ Image lab src + +doubleBracketed :: (GenParser Char st a) -> GenParser Char st a +doubleBracketed parser = do + string "[[" + contents <- parser + string "]]" + return contents + +-- +-- auxiliary functions +-- + +indentSpaces :: GenParser Char ParserState [Char] +indentSpaces = try $ do + state <- getState + let tabStop = stateTabStop state + try (count tabStop (char ' ')) <|> + (many (char ' ') >> string "\t") "indentation" + +nonindentSpaces :: GenParser Char ParserState [Char] +nonindentSpaces = do + state <- getState + let tabStop = stateTabStop state + sps <- many (char ' ') + if length sps < tabStop + then return sps + else unexpected "indented line" + diff --git a/pandoc.cabal b/pandoc.cabal index 30afcacb004d..819f3ed3ada5 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -33,7 +33,7 @@ description: Pandoc is a Haskell library for converting from one markup - Interactive notebook formats (Jupyter notebook ipynb) - Page layout formats (InDesign ICML) - Wiki markup formats (MediaWiki, DokuWiki, TikiWiki, TWiki, - Vimwiki, XWiki, ZimWiki, Jira wiki, Creole) + Vimwiki, XWiki, ZimWiki, Jira wiki, Creole, MoinMoin) - Slide show formats (LaTeX Beamer, PowerPoint, Slidy, reveal.js, Slideous, S5, DZSlides) - Data formats (CSV and TSV tables, Excel spreadsheets) @@ -616,6 +616,7 @@ library Text.Pandoc.Readers.RIS, Text.Pandoc.Readers.CslJson, Text.Pandoc.Readers.MediaWiki, + Text.Pandoc.Readers.MoinMoin, Text.Pandoc.Readers.Vimwiki, Text.Pandoc.Readers.RST, Text.Pandoc.Readers.Org, @@ -763,6 +764,7 @@ library Text.Pandoc.Readers.Mdoc.Lex, Text.Pandoc.Readers.Mdoc.Macros, Text.Pandoc.Readers.Mdoc.Standards, + Text.Pandoc.Readers.MoinMoin.Highlight, Text.Pandoc.Readers.Typst.Parsing, Text.Pandoc.Readers.Typst.Math, Text.Pandoc.Readers.ODT.Base, @@ -867,6 +869,7 @@ test-suite test-pandoc Tests.Readers.JATS Tests.Readers.Jira Tests.Readers.Markdown + Tests.Readers.MoinMoin Tests.Readers.Org Tests.Readers.Org.Block Tests.Readers.Org.Block.CodeBlock diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 5fa0ebeb7b07..5955edc64b1e 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -69,6 +69,7 @@ module Text.Pandoc.Readers , readDjot , readPod , readXML + , readMoinMoin -- * Miscellaneous , getReader , getDefaultExtensions @@ -103,6 +104,7 @@ import Text.Pandoc.Readers.JATS (readJATS) import Text.Pandoc.Readers.Jira (readJira) import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.MediaWiki +import Text.Pandoc.Readers.MoinMoin import Text.Pandoc.Readers.Muse import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.ODT diff --git a/src/Text/Pandoc/Readers/MoinMoin.hs b/src/Text/Pandoc/Readers/MoinMoin.hs new file mode 100644 index 000000000000..79ea6faf1522 --- /dev/null +++ b/src/Text/Pandoc/Readers/MoinMoin.hs @@ -0,0 +1,443 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts #-} +-- ^^ needed temp for enclosed' +{- | + Module : Text.Pandoc.Readers.MoinMoin + Copyright : Copyright © 2026 Jonathan Dowland, © 2009-2011 Simon Michael, © 22006-2024 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : Jonathan Dowland + Stability : alpha + Portability : portable + +Conversion of MoinMoin text to 'Pandoc' document. +-} + +module Text.Pandoc.Readers.MoinMoin( readMoinMoin ) where + +import Control.Monad (guard) +import Control.Monad.Except (throwError) +import Data.Char -- isUpper, isAlphaNum +import Data.Maybe (fromMaybe) +import Text.Pandoc.Definition +import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) +import Text.Pandoc.Class (runPure, PandocPure (..)) -- debug +import Text.Pandoc.Options (ReaderOptions) +import Text.Pandoc.Parsing +import Text.Pandoc.Readers.MoinMoin.Highlight +import qualified Text.Pandoc.Builder as B +import qualified Data.Text as T +import Data.Either (fromRight) + +-- | Read MoinMoin from an input string and return a Pandoc document. +readMoinMoin :: (PandocMonad m, ToSources a) => ReaderOptions -> a -> m Pandoc +readMoinMoin opts s = do + let sources = toSources s + parsed <- readWithM parseMoinMoin defaultMoinState sources + case parsed of + Left err -> throwError err + Right res -> return res + +data MoinState = MoinState { mmMeta :: Meta } deriving Show +defaultMoinState = MoinState nullMeta + +type MoinParser m = ParsecT Sources MoinState m + +parseMoinMoin :: PandocMonad m => MoinParser m Pandoc +parseMoinMoin = do + many processingInstruction + blocks <- mconcat <$> many block + spaces + eof + + st <- getState + let meta = mmMeta st + + -- reportLogMessages -- could not deduce 'HasLogMessages MoinState' + return $ Pandoc meta (B.toList blocks) + +{- + - we may wish to handle: + - #format creole|plain|python|rst| + - #REDIRECT | #refresh Xs + - #pragma + - section-numbers (headings) + - keywords => meta keywords + - description => meta description + - #DEPRECATED + - #language (iso-639-1 code) + - + - -} +processingInstruction :: PandocMonad m => MoinParser m () +processingInstruction = do + char '#' + manyUntil anyChar newline + return () + +-- technically a processing instruction but can occur anywhere +-- in a page +comment :: PandocMonad m => MoinParser m B.Blocks +comment = try $ do + string "##" + manyUntil anyChar newline + return mempty + +block :: PandocMonad m => MoinParser m B.Blocks +block = do + res <- mempty <$ skipMany1 blankline + <|> header + <|> comment + <|> bulletList + <|> parser + <|> para + return res + +-- from Readers.Mediawiki +header :: PandocMonad m => MoinParser m B.Blocks +header = try $ do + guardColumnOne + lev <- length <$> many1 (char '=') + contents <- B.trimInlines . mconcat <$> manyTill inline (count lev $ char '=') + return $ B.header (min 5 lev) contents + +-- from Readers.Mediawiki +guardColumnOne :: PandocMonad m => MoinParser m () +guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1) + +-- from Readers.Mediawiki +para :: PandocMonad m => MoinParser m B.Blocks +para = do + contents <- B.trimInlines . mconcat <$> many1 inline + return $ B.para contents + +bulletList :: PandocMonad m => MoinParser m B.Blocks +bulletList = many1 bulletListItem >>= return . B.bulletList + +bulletListItem :: PandocMonad m => MoinParser m B.Blocks +bulletListItem = try $ do + lev <- length <$> many1 space + char '*' + spaces + B.plain . B.trimInlines . mconcat <$> manyTill inline newline + +-- could use Text.Pandoc.Parsing.spaceChar?? +spaceNotNL :: PandocMonad m => MoinParser m Char +spaceNotNL = satisfy (\c -> isSpace c && not (c == '\n')) + +-- block-level MoinMoin 'Parser'. +-- Not to be confused with 'code' (inline) +parser :: PandocMonad m => MoinParser m B.Blocks +parser = try $ do + open <- many (char '{') + open2 <- many (satisfy isWordChar) + let len = length open + guard (len >= 3) + + pspec <- optionMaybe (try parserHashBang) + many spaceNotNL + char '\n' + + let delim = open2 ++ (take len (repeat '}')) + + case pspec of + Just (ParserWiki args) -> do + inner <- manyTill block (closer delim) + let attr = ("", args, []) + (return . B.divWith attr . mconcat) inner + + Just (ParserHighlight lang) -> do + let attr = ("", [lang], []) + manyTillChar anyChar (closer delim) >>= return . B.codeBlockWith attr + + _ -> manyTillChar anyChar (closer delim) >>= return . B.codeBlock + + where + closer delim = try $ do + char '\n' + many spaceNotNL + string delim + +-- intended to be equivalent to Python2 re '\w' with re.UNICODE set +isWordChar :: Char -> Bool +isWordChar c = isAlphaNum c || c == '_' + +-- Moin supports (at least) creole, csv (table builder), docbook, highlight, html, +-- wiki, rst, text (plain:
) and xslt. The following names are deprecated
+-- shorthand for highlight (the name is converted to the argument to highlight):
+-- C++, diff, IRC(irssi), java, pascal, python:
+--
+-- for now we only support wiki (moin native format) and plain text.
+data ParserSpec = ParserWiki [T.Text]
+                | ParserText
+                | ParserHighlight T.Text
+                | ParserUnsupported
+                deriving (Show, Eq)
+
+parserHashBang :: PandocMonad m => MoinParser m ParserSpec
+parserHashBang = do
+  string "#!"
+  parserName <- many (satisfy isWordChar)
+  parserArgs <- optionMaybe $ try $ do
+    many1 spaceNotNL
+    many1 (satisfy (/='\n'))
+
+  let tParserName = T.pack parserName
+
+  return $ case parserName of
+      "wiki"      -> ParserWiki (unmangleWikiArgs parserArgs)
+
+      "highlight" -> ParserHighlight (highlightArgs parserArgs)
+      -- these are pre Moin-1.9 shortcuts to highlight
+      "diff"      -> ParserHighlight "diff"
+      "cplusplus" -> ParserHighlight "cpp"
+      "python"    -> ParserHighlight "python"
+      "java"      -> ParserHighlight "java"
+      -- no sensible Pandoc highlighter to map these to
+      "pascal"    -> ParserHighlight "default"
+      "irc"       -> ParserHighlight "default"
+
+      "text"      -> ParserText
+      ""          -> ParserText
+      _           -> ParserUnsupported
+
+test_parserHashBang_noNL_in_args1 = p1 parserHashBang "#!wiki\nremaining" == Right (ParserWiki [])
+
+test_parserHashBang_noNL_in_args2 = p1 (parserHashBang >> many anyChar) "#!wiki\nremaining"
+  == Right "\nremaining"
+
+unmangleWikiArgs :: Maybe String -> [T.Text]
+unmangleWikiArgs Nothing = []
+unmangleWikiArgs (Just x) = (T.splitOn "/" . T.strip . T.pack) x
+
+highlightArgs :: Maybe String -> T.Text
+highlightArgs Nothing  = "default"
+highlightArgs (Just a) = (fromMaybe "default" . mmLangTopdLang . T.pack) a
+
+test_unmangleWikiArgs_simple     =  unmangleWikiArgs (Just "foo/bar") == ["foo","bar"]
+test_unmangleWikiArgs_prespace   =  unmangleWikiArgs (Just "       foo/bar") == ["foo","bar"]
+test_unmangleWikiArgs_postspace  =  unmangleWikiArgs (Just "foo/bar   ") == ["foo","bar"]
+test_unmangleWikiArgs_nowt       =  unmangleWikiArgs Nothing == []
+
+tests = and
+ [ test_unmangleWikiArgs_simple
+ , test_unmangleWikiArgs_prespace
+ , test_unmangleWikiArgs_postspace
+ , test_unmangleWikiArgs_nowt
+ , test_parserHashBang_noNL_in_args1
+ , test_parserHashBang_noNL_in_args2
+ ]
+
+inline :: PandocMonad m => MoinParser m B.Inlines
+inline =  whitespace
+      <|> camelCaseLink
+      <|> emailAddressLink
+      <|> uriLink
+      <|> str
+      <|> bold
+      <|> monospace
+      <|> italic
+      <|> underline
+      <|> superscript
+      <|> subscript
+      <|> smaller
+      <|> larger
+      <|> stroke
+      <|> externalLink
+      <|> inlineComment
+      <|> endline
+      <|> tableOfContents
+      <|> lineBreak
+      <|> anchor
+      <|> code
+      <|> include
+      <|> special
+
+-- from Readers.Mediawiki
+whitespace :: PandocMonad m => MoinParser m B.Inlines
+whitespace = B.space <$ skipMany1 spaceChar
+
+many2 :: PandocMonad m => MoinParser m a -> MoinParser m [a]
+many2 p = do
+  first <- p
+  rest  <- many1 p
+  return (first:rest)
+
+camelWord :: PandocMonad m => MoinParser m String
+camelWord = do
+  slash <- optionMaybe (char '/')
+  f     <- satisfy isUpper
+  rest  <- many1 (satisfy (\c -> isAlphaNum c && not (isUpper c)))
+  return $ case slash of
+    Nothing -> f:rest
+    Just s  -> s:f:rest
+
+camelCaseLink :: PandocMonad m => MoinParser m B.Inlines
+camelCaseLink = try $ do
+  src <- mconcat <$> many2 camelWord
+  let tsrc  = T.pack src
+  let title = ""
+  let label = B.str tsrc
+  return $ B.link tsrc title label
+
+emailAddressLink :: PandocMonad m => MoinParser m B.Inlines
+emailAddressLink = try $ do
+  (e, escaped_mailto_uri) <- emailAddress
+  return $ B.link escaped_mailto_uri "" $ B.str e
+
+uriLink :: PandocMonad m => MoinParser m B.Inlines
+uriLink = try $ do
+  (u, uri_escaped) <- uri
+  return $ B.link u "" $ B.str uri_escaped
+
+-- from Readers.Mediawiki
+str :: PandocMonad m => MoinParser m B.Inlines
+str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars)
+
+-- utility fn for most of the inline text formatters
+formatter :: PandocMonad m
+          => String
+          -> (B.Inlines -> B.Inlines)
+          -> MoinParser m B.Inlines
+formatter delim inliner =
+  enclosed delim' delim' inline >>= return . inliner . mconcat
+  where delim' = string delim
+
+italic      :: PandocMonad m => MoinParser m B.Inlines
+italic      = formatter ("''") B.emph
+
+-- this has broken "''''' hello world'''''"
+bold        :: PandocMonad m => MoinParser m B.Inlines
+bold        = try $ do
+  inner <- enclosed' delim delim inline
+  if null inner
+    then return (B.fromList [])
+    else (return . B.strong . mconcat) inner
+  where delim = string ("'''")
+        enclosed' start end parser = try $
+          start >> manyTill parser end
+
+monospace :: PandocMonad m => MoinParser m B.Inlines
+monospace = try $ do
+  char '`'
+  inner <- manyTill (noneOf "`\n") (char '`')
+  if null inner
+    then return (B.fromList [])
+    else (return . B.code . T.pack) inner
+
+underline   :: PandocMonad m => MoinParser m B.Inlines
+underline   = formatter ("__") B.underline
+superscript :: PandocMonad m => MoinParser m B.Inlines
+superscript = formatter "^" B.superscript
+subscript   :: PandocMonad m => MoinParser m B.Inlines
+subscript   = formatter ",," B.subscript
+-- smaller/larger: possibly mark the inlines with an attribute
+smaller :: PandocMonad m => MoinParser m B.Inlines
+smaller = enclosed (string "~-") (string "-~") inline >>=
+    return . mconcat
+
+larger :: PandocMonad m => MoinParser m B.Inlines
+larger = enclosed (string "~+") (string "+~") inline >>=
+    return . mconcat
+
+stroke :: PandocMonad m => MoinParser m B.Inlines
+stroke = enclosed (string "--(") (string ")--") inline >>=
+    return . B.strikeout . mconcat
+
+inlineComment :: PandocMonad m => MoinParser m B.Inlines
+inlineComment = do
+  string "/*"
+  manyTill anyChar (string "*/")
+  return mempty
+
+-- a newline that does not break a Para (etc)
+endline :: PandocMonad m => MoinParser m B.Inlines
+endline = try $ do
+  newline
+  notFollowedBy blankline
+  notFollowedBy (string "##")
+  notFollowedBy (string "}}}") -- to avoid breaking Parser
+  (eof >> return mempty)
+    <|> (skipMany spaceChar >> return B.softbreak)
+
+-- MoinMoin behaviour: insert the TOC at the point of the token.
+-- What we're doing here is not (yet) that.
+tableOfContents :: PandocMonad m => MoinParser m B.Inlines
+tableOfContents = try $ do
+  string "<5
+  lvl <- optionMaybe (oneOf "12345")
+  case lvl of
+    Nothing -> return ()
+    Just l  -> updateState $ \st -> st
+      { mmMeta = B.setMeta "toclevel" [l] (mmMeta st) }
+  string ")>>"
+  updateState $ \st -> st { mmMeta = B.setMeta "toc" True (mmMeta st) }
+  return mempty
+
+lineBreak :: PandocMonad m => MoinParser m B.Inlines
+lineBreak = try $ string "<
>" >> return B.linebreak + +anchor :: PandocMonad m => MoinParser m B.Inlines +anchor = try $ do + string "<>") + return $ B.spanWith (T.pack name,[],[]) (B.fromList []) + +-- NOTE: not to be confused with 'parser' (block) +code :: PandocMonad m => MoinParser m B.Inlines +code = try $ do + string "{{{" + pre <- manyTillChar (noneOf "\n") (try $ string "}}}") + return $ B.code pre + +special :: PandocMonad m => MoinParser m B.Inlines +special = B.str . T.singleton <$> oneOf specialChars + +-- MoinMoin < 1.6.0 (~2007-12) supported a single-bracket +-- external link syntax. We don't attempt to support that. +externalLink :: PandocMonad m => MoinParser m B.Inlines +externalLink = do + string "[[" + (src,label) <- try labelledLink <|> unlabelledLink + --string "]]" + return $ B.link src "" $ B.str label + where + labelledLink = do + src <- manyTillChar (noneOf "|") (try (char '|')) + lbl <- manyTillChar (noneOf "]") (string "]]") + return (src,lbl) + + unlabelledLink = do + src <- manyTillChar (noneOf "|") (string "]]") + return (src,src) + +-- the full syntax of this macro has comma-separated options, a modifier to switch +-- from page names to regexps, etc., but we're not using any of that so we take a +-- shortcut. Full details: +-- +include :: PandocMonad m => MoinParser m B.Inlines +include = try $ do + string "<>") >>= + return . B.rawInline "moinmoin" + +-- from Readers.Mediawiki +specialChars :: [Char] +specialChars = "'[]<=&*{}|\":\\_^,~-+()/`#" + +-- from Readers.Mediawiki +spaceChars :: [Char] +spaceChars = " \n\t" + +------------------------------------------------------------------------------ +-- debug functions for use in GHCi + +p1 :: MoinParser PandocPure a -> T.Text -> Either ParseError a +p1 p' = fromRight (error "unhandled PandocError") + . runPure + . runParserT p' defaultMoinState "?" + . toSources + +pp :: Monoid a + => MoinParser PandocPure a -> T.Text -> Either ParseError a +pp = p1 . fmap mconcat . many diff --git a/src/Text/Pandoc/Readers/MoinMoin/Highlight.hs b/src/Text/Pandoc/Readers/MoinMoin/Highlight.hs new file mode 100644 index 000000000000..cf7cb3e6af2b --- /dev/null +++ b/src/Text/Pandoc/Readers/MoinMoin/Highlight.hs @@ -0,0 +1,233 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Readers.MoinMoin.Highlight + Copyright : Copyright © 2026 Jonathan Dowland + License : GNU GPL, version 2 or above + + Maintainer : Jonathan Dowland + Stability : alpha + Portability : portable + +Mapping of MoinMoin highlight language names to Pandoc's. +-} + +module Text.Pandoc.Readers.MoinMoin.Highlight ( mmLangTopdLang ) where + +import qualified Data.Map as M +import qualified Data.Text as T + +-- map language names as recognised by MoinMoin to equivalents as +-- recognised by Pandoc +mmLangTopdLang :: T.Text -> Maybe T.Text +mmLangTopdLang s = M.lookup s mapping + +mapping :: M.Map T.Text T.Text +mapping = M.fromList + [ ("ada", "ada") + , ("ada95", "ada") + , ("ada2005", "ada") + , ("agda", "agda") + , ("apacheconf", "apache") + , ("aconf", "apache") + , ("apache", "apache") + , ("awk", "awk") + , ("gawk", "awk") + , ("mawk", "awk") + , ("nawk", "awk") + , ("basemake", "makefile") + , ("bash", "bash") + , ("sh", "bash") + , ("ksh", "bash") + , ("zsh", "bash") + , ("shell", "bash") + , ("bat", "dosbat") + , ("batch", "dosbat") + , ("dosbatch", "dosbat") + , ("winbatch", "dosbat") + , ("bib", "bibtex") + , ("bibtex", "bibtex") + , ("boo", "boo") + , ("c", "c") + , ("csharp", "cs") + , ("c#", "cs") + , ("cpp", "cpp") + , ("c++", "cpp") + , ("cplusplus", "cpp") + , ("clojure", "clojure") + , ("clj", "clojure") + , ("cmake", "cmake") + , ("coffee-script", "coffeescript") + , ("coffeescript", "coffeescript") + , ("coffee", "coffeescript") + , ("cfc", "coldfusion") + , ("common-lisp", "commonlisp") + , ("cl", "commonlisp") + , ("lisp", "commonlisp") + , ("css", "css") + , ("d", "d") + , ("dart", "dart") + , ("control", "debiancontrol") + , ("debcontrol", "debiancontrol") + , ("diff", "diff") + , ("udiff", "diff") + , ("django", "djangotemplate") + , ("jinja", "djangotemplate") + , ("docker", "dockerfile") + , ("dockerfile", "dockerfile") + , ("dtd", "dtd") + , ("eiffel", "eiffel") + , ("elixir", "elixir") + , ("ex", "elixir") + , ("exs", "elixir") + , ("elm", "elm") + , ("erlang", "erlang") + , ("fsharp", "fsharp") + , ("f#", "fsharp") + , ("gap", "gap") + , ("gas", "gnuassembler") + , ("asm", "gnuassembler") + , ("glsl", "glsl") + , ("go", "go") + , ("groovy", "groovy") + , ("haskell", "haskell") + , ("hs", "haskell") + , ("hx", "haxe") + , ("haxe", "haxe") + , ("hxsl", "haxe") + , ("html", "html") + , ("idris", "idris") + , ("idr", "idris") + , ("j", "j") + , ("java", "java") + , ("jsp", "jsp") + , ("js", "javascript") + , ("javascript", "javascript") + , ("json", "json") + , ("julia", "julia") + , ("jl", "julia") + , ("kotlin", "kotlin") + , ("lhs", "literatehaskell") + , ("literate-haskell", "literatehaskell") + , ("lhaskell", "literatehaskell") + , ("llvm", "llvm") + , ("lua", "lua") + , ("make", "makefile") + , ("makefile", "makefile") + , ("mf", "makefile") + , ("bsdmake", "makefile") + , ("md", "markdown") + , ("mathematica", "mathematica") + , ("mma", "mathematica") + , ("nb", "mathematica") + , ("matlab", "matlab") + , ("modula2", "modula2") + , ("m2", "modula2") + , ("nasm", "nasm") + , ("nim", "nim") + , ("nimrod", "nim") + , ("nixos", "nix") + , ("nix", "nix") + , ("objective-c", "objectivec") + , ("objectivec", "objectivec") + , ("obj-c", "objectivec") + , ("objc", "objectivec") + , ("objective-c++", "objectivecpp") + , ("objectivec++", "objectivecpp") + , ("obj-c++", "objectivecpp") + , ("objc++", "objectivecpp") + , ("ocaml", "ocaml") + , ("octave", "octave") + , ("perl", "perl") + , ("pl", "perl") + , ("perl6", "raku") + , ("pl6", "raku") + , ("php", "php") + , ("php3", "php") + , ("php4", "php") + , ("php5", "php") + , ("pike", "pike") + , ("postscript", "postscript") + , ("postscr", "postscript") + , ("pov", "povray") + , ("powershell", "powershell") + , ("posh", "powershell") + , ("ps1", "powershell") + , ("psm1", "powershell") + , ("prolog", "prolog") + , ("protobuf", "protobuf") + , ("proto", "protobuf") + , ("python", "python") + , ("py", "python") + , ("sage", "python") + , ("python3", "python") + , ("py3", "python") + , ("python2", "python") + , ("py2", "python") + , ("py2tb", "python") + , ("pycon", "python") + , ("pytb", "python") + , ("py3tb", "python") + , ("qml", "qml") + , ("qbs", "qml") + , ("rconsole", "r") + , ("rout", "r") + , ("rnc", "relaxngcompact") + , ("rng-compact", "relaxngcompact") + , ("rhtml", "rhtml") + , ("html+erb", "rhtml") + , ("html+ruby", "rhtml") + , ("rb", "ruby") + , ("ruby", "ruby") + , ("duby", "ruby") + , ("rbcon", "ruby") + , ("irb", "ruby") + , ("rust", "rust") + , ("rs", "rust") + , ("sass", "sass") + , ("scala", "scala") + , ("scheme", "scheme") + , ("scm", "scheme") + , ("scilab", "sci") + , ("scss", "scss") + , ("sql", "sql") + , ("sqlite3", "sql") + , ("sml", "sml") + , ("stata", "stata") + , ("do", "stata") + , ("swift", "swift") + , ("tcl", "tcl") + , ("tcsh", "tsch") + , ("csh", "tsch") + , ("tcshcon", "tsch") + , ("terraform", "terraform") + , ("tf", "terraform") + , ("tex", "latex") + , ("latex", "latex") + , ("toml", "toml") + , ("ts", "typescript") + , ("typescript", "typescript") + , ("verilog", "verilog") + , ("v", "verilog") + , ("vhdl", "vhdl") + , ("xml", "xml") + , ("xml+cheetah", "xml") + , ("xml+spitfire", "xml") + , ("xml+django", "xml") + , ("xml+jinja", "xml") + , ("xml+evoque", "xml") + , ("xml+lasso", "xml") + , ("xml+mako", "xml") + , ("xml+myghty", "xml") + , ("xml+php", "xml") + , ("xml+erb", "xml") + , ("xml+ruby", "xml") + , ("xml+smarty", "xml") + , ("xml+velocity", "xml") + , ("xorg.conf", "xorg") + , ("xslt", "xslt") + , ("xul+mozpreproc", "xul") + , ("yaml", "yaml") + , ("yaml+jinja", "yaml") + , ("salt", "yaml") + , ("sls", "yaml") + ] diff --git a/test/Tests/Readers/MoinMoin.hs b/test/Tests/Readers/MoinMoin.hs new file mode 100644 index 000000000000..1d43e73cb9e6 --- /dev/null +++ b/test/Tests/Readers/MoinMoin.hs @@ -0,0 +1,144 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Tests.Readers.MoinMoin + Copyright : © 2026 Jonathan Dowland, © 2009 Simon Michael + License : GNU GPL, version 2 or above + + Maintainer : Jonathan Dowland + Stability : alpha + Portability : portable + +Tests for the MoinMoin reader. +-} +module Tests.Readers.MoinMoin (tests) where + +import Text.Pandoc -- readMoinMoin +import Text.Pandoc.Sources -- toSources +import Test.Tasty +import Test.Tasty.HUnit +import qualified Data.Text as T +import qualified Data.Map.Strict as M +import Data.Either (fromRight) + +-- @?= imposes Eq for which PandocError doesn't have an instance +-- so we remove the Either layer, replacing errors with nullDoc. +nullDoc :: Pandoc +nullDoc = Pandoc nullMeta [] +runMM :: String -> Pandoc +runMM = fromRight nullDoc . runPure . readMoinMoin def . toSources . T.pack + +readsTo :: String -> [Block] -> Assertion +readsTo s b = runMM s @?= Pandoc nullMeta b + +hasMeta :: String -> [(T.Text,MetaValue)] -> Assertion +hasMeta s cmp = let + (Pandoc meta blocks) = runMM s + in meta @?= (Meta $ M.fromList cmp) + +tests :: [TestTree] +tests = + [ testCase "basic" $ "hi" `readsTo` [Para [Str "hi"]] + , testCase "endline" $ "hi\nthere"`readsTo` [Para [Str "hi",SoftBreak,Str "there"]] + , testCase "bold" $ "'''hi'''" `readsTo` [Para [Strong [Str "hi"]]] + , testCase "italic" $ "''hi''" `readsTo` [Para [Emph [Str "hi"]]] + , testCase "underline" $ "__hi__" `readsTo` [Para [Underline [Str "hi"]]] + , testCase "monospace" $ "`hi`" `readsTo` [Para [Code nullAttr "hi"]] + , testCase "notMono" $ "`h\ni`" `readsTo` [Para [Str "`h",SoftBreak,Str "i`"]] + + , testCase "code1" $ "{{{hi}}}" `readsTo` [Para [Code nullAttr "hi"]] + , testCase "code2" $ "{{{ hi }}}" `readsTo` [Para [Code nullAttr " hi "]] + , testCase "code3" $ "{{{''hi''}}}" `readsTo` [Para [Code nullAttr "''hi''"]] + + -- broken + , testCase "italic and bold" $ + "'''''hello world'''''" `readsTo` + [Para [Strong [Emph [Str "hello", Space, Str "world"]]]] + + -- Pandoc's "enclosed" fails this + , testCase "boldSpace" $ "''' hi'''" `readsTo` [Para [Strong [Space,Str "hi"]]] + + , testCase "heading 1" $ "= 1 =" `readsTo` [Header 1 ("",[],[]) [Str "1"]] + , testCase "heading 2" $ "== 2 ==" `readsTo` [Header 2 ("",[],[]) [Str "2"]] + , testCase "heading 3" $ "=== 3 ===" `readsTo` [Header 3 ("",[],[]) [Str "3"]] + , testCase "heading 4" $ "==== 4 ====" `readsTo` [Header 4 ("",[],[]) [Str "4"]] + , testCase "heading 5" $ "===== 5 =====" `readsTo` [Header 5 ("",[],[]) [Str "5"]] + , testCase "no heading 6" $ "====== 6 ======" `readsTo` [Header 5 ("",[],[]) [Str "6"]] + + , testGroup "toc" $ + [ testCase "tocPresent" $ "<>" `hasMeta` [("toc",MetaBool True)] + , testCase "tocAbsent" $ "<>" `hasMeta` [] + , testCase "tocLevel1" $ "<>" `hasMeta` [("toc",MetaBool True),("toclevel",MetaString "1")] + ] + + , testCase "LineBreak" $ "<
>" `readsTo` [Para [LineBreak]] + , testCase "anchor" $ "<>" `readsTo` [Para [Span ("foo",[],[]) []]] + + , testCase "superscript" $ "^2^" `readsTo` [Para [Superscript [Str "2"]]] + , testCase "subscript" $ ",,low,," `readsTo` [Para [Subscript [Str "low"]]] + + -- XXX: add tests for annotations + , testCase "strikeout" $ "--(delete)--" `readsTo` [Para [Strikeout [Str "delete"]]] + , testCase "larger" $ "~+larger+~" `readsTo` [Para [Str "larger"]] + , testCase "smaller" $ "~-smaller-~" `readsTo` [Para [Str "smaller"]] + + , testCase "inlineComment" $ "hello/*comment*/world" `readsTo` [Para [Str "helloworld"]] + , testCase "inlineCommentNewlines" $ + "hello/*comment\nmore\n*/world" `readsTo` [Para [Str "helloworld"]] + + , testGroup "links" + [ testCase "CamelCase" $ "FooBar" `readsTo` [Para [Link ("",[],[]) [Str "FooBar"] ("FooBar","")]] + , testCase "/SubCase1" $ "/SubCase1" `readsTo` [Para [Link ("",[],[]) [Str "/SubCase1"] ("/SubCase1","")]] + , testCase "Sub/Case2" $ "Sub/Case2" `readsTo` [Para [Link ("",[],[]) [Str "Sub/Case2"] ("Sub/Case2","")]] + , testCase "bracket1" $ "[[foo]]" `readsTo` [Para [Link ("",[],[]) [Str "foo"] ("foo","")]] + , testCase "labelled" $ "[[foo|bar]]"`readsTo` [Para [Link ("",[],[]) [Str "bar"] ("foo","")]] + , testCase "banglink" $ "!NotLink" `readsTo` [Para [Str "!NotLink"]] + , testCase "notalink" $ "Not''''''Link" `readsTo` [Para [Str "NotLink"]] + , testCase "singular1" $ "SinGular''''''s" `readsTo` [Para [Link ("",[],[]) [Str "SinGular"] ("SinGular",""), Str "s"]] + , testCase "singular2" $ "SinGular``s" `readsTo` [Para [Link ("",[],[]) [Str "SinGular"] ("SinGular",""), Str "s"]] + , testCase "anchor1" $ "[[#foo]]" `readsTo` [Para [Link ("",[],[]) [Str "#foo"] ("#foo","")]] + , testCase "anchor2" $ "[[#foo|bar]]"`readsTo` [Para [Link ("",[],[]) [Str "bar"] ("#foo","")]] + , testCase "anchor3" $ "[[foo#bar]]"`readsTo` [Para [Link ("",[],[]) [Str "foo#bar"] ("foo#bar","")]] + , testCase "anchor4" $ "[[foo#bar|baz]]"`readsTo` [Para [Link ("",[],[]) [Str "baz"] ("foo#bar","")]] + , testCase "bareUri" $ "http://example.com" `readsTo` [Para [Link nullAttr [Str "http://example.com"] ("http://example.com","")]] + , testCase "bareEmail" $ "jon@example.com" `readsTo` [Para [Link nullAttr [Str "jon@example.com"] ("mailto:jon@example.com","")]] + ] + + , testGroup "blocks" + [ testCase "comment1" $ "##hi" `readsTo` [] + -- processing instruction comment lines cause paragraph breaks + , testCase "comment2" $ "hello\n##hi\nworld" `readsTo` [Para [Str "hello"],Para [Str "world"]] + , testCase "notcomment1" $ "\n#hi" `readsTo` [Para [Str "#hi"]] + , testCase "notcomment2" $ ".##hi" `readsTo` [Para [Str ".##hi"]] + , testGroup "parser" + [ testCase "parser1" $ "{{{\nhi\n}}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parserChompHead" $ "{{{ \nhi\n}}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parserChomptail" $ "{{{\nhi\n }}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parserMultiLine" $ "{{{\nhi\nthere\n}}}" `readsTo` [CodeBlock nullAttr "hi\nthere"] + , testCase "parser4delims" $ "{{{{\nhi\n}}}}" `readsTo` [CodeBlock nullAttr "hi"] + , testCase "parserInnerDelim" $ "{{{{\n{{{hi}}}\n}}}}" `readsTo` [CodeBlock nullAttr "{{{hi}}}"] + , testCase "parserCustomDelim"$ "{{{badidea\nhi\nbadidea}}}"`readsTo` [CodeBlock nullAttr "hi"] + , testGroup "parserHighlight" + [ testCase "parserHaskell1" $ "{{{#!highlight haskell\nx = 1\n}}}" `readsTo` [CodeBlock ("",["haskell"],[]) "x = 1"] + , testCase "parserHaskell2" $ "{{{#!highlight hs\nx = 1\n}}}" `readsTo` [CodeBlock ("",["haskell"],[]) "x = 1"] + , testCase "hlightShortcut1" $ "{{{#!java\nfoo\n}}}" `readsTo` [CodeBlock ("",["java"],[]) "foo"] + , testCase "hlightShortcut2" $ "{{{#!irc\nfoo\n}}}" `readsTo` [CodeBlock ("",["default"],[]) "foo"] + , testCase "hlightEmpty" $ "{{{#!highlight\nfoo\n}}}" `readsTo` [CodeBlock ("",["default"],[]) "foo"] + + ] + , testGroup "parserWiki" + [ testCase "wikiClass" $ "{{{#!wiki red\nfoo\n}}}" `readsTo` [Div ("",["red"],[]) [Para [Str "foo"] ] ] + ] + ] + ] + , testGroup "emptyDelims" + [ testCase "empty bold" $ "''''''" `readsTo` [Para []] + , testCase "empty italic" $ "''''" `readsTo` [Para []] + , testCase "empty code" $ "``" `readsTo` [Para []] + ] + , testGroup "includes" + [ testCase "basicInclude" $ "<>" `readsTo` [Para [RawInline "moinmoin" "foo"]] + ] + ] + +main :: IO () +main = defaultMain $ testGroup "." tests diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs index 9ae97d9c0af9..b96e7462b8bf 100644 --- a/test/test-pandoc.hs +++ b/test/test-pandoc.hs @@ -22,6 +22,7 @@ import qualified Tests.Readers.JATS import qualified Tests.Readers.Jira import qualified Tests.Readers.LaTeX import qualified Tests.Readers.Markdown +import qualified Tests.Readers.MoinMoin import qualified Tests.Readers.Muse import qualified Tests.Readers.ODT import qualified Tests.Readers.Org @@ -90,6 +91,7 @@ tests pandocPath = testGroup "pandoc tests" , testGroup "Readers" [ testGroup "LaTeX" Tests.Readers.LaTeX.tests , testGroup "Markdown" Tests.Readers.Markdown.tests + , testGroup "MoinMoin" Tests.Readers.MoinMoin.tests , testGroup "HTML" Tests.Readers.HTML.tests , testGroup "JATS" Tests.Readers.JATS.tests , testGroup "Jira" Tests.Readers.Jira.tests @@ -128,4 +130,4 @@ main = do _ -> inDirectory "test" $ do fp <- getExecutablePath -- putStrLn $ "Using pandoc executable at " ++ fp - defaultMain $ tests fp + defaultMain $ tests fp \ No newline at end of file diff --git a/testMoin.hs b/testMoin.hs new file mode 100644 index 000000000000..8c222615eed2 --- /dev/null +++ b/testMoin.hs @@ -0,0 +1,52 @@ +module Main where + +import qualified Data.Text as T +import qualified Data.Text.IO as TIO +import Text.Pandoc +import qualified Text.Pandoc.Parsing as P +import Data.Either (fromRight) +import Text.ParserCombinators.Parsec +import Text.Parsec +import Text.Parsec.Char +import Data.Char -- isUpper etc + +import Text.Show.Pretty + +sampleMW = T.pack "\ +\= sample mediawiki doc =\n\ +\__TOC__\n\ +\This is a ''sample'' CamelCase document.\n\ +\[https://jmtd.net jon's homepage]\n\ +\\n\ +\: indented reply\n\ +\\n\ +\[[Category:Foo]]" + + +-- this exposes bugs in the Mediawiki reader (level 2 does not get captured as a nested DL) +tinyMW = T.pack "level 0\n:level 1\n::level 2" + +parsedMW = (fromRight (error "") . runPure . readMediaWiki def) sampleMW + +parseMM = fromRight (error "?") . runPure . readMoinMoin def + +main = do + -- what structure do we get from a Mediawiki doc? + -- (putStrLn . ppShow) parsedMW + -- putStrLn "##################################" + + -- what happens to definition list in markdown output? + ---- the definition list just goes away! + --(handleError . runPure . writeMediaWiki def) parsedMW >>= TIO.putStrLn + --putStrLn "##################################" + + sampleMM <- TIO.readFile "testmoin.txt" + + -- or use writeNative + (putStrLn . ppShow . parseMM) sampleMM + putStrLn "\n##################################\n" + + result <- runIO $ + readMoinMoin def sampleMM >>= writeMarkdown def + mdwn <- handleError result + TIO.putStrLn mdwn diff --git a/testmoin.txt b/testmoin.txt new file mode 100644 index 000000000000..787fd70cad46 --- /dev/null +++ b/testmoin.txt @@ -0,0 +1,29 @@ +#format wiki +#language en +#pragma supplementation-page on + +== Jon Dowland == + +<> + + * [[http://jmtd.net|jmtd.net]] + * another bullet + +''mostly'' meta '''indeed'''. This is __useful__. + +what does `monospace` (look) like? different to ? + +what goes ^up^, must ] come ,,down,,. + +what --(happened in)-- there? + +This is ~+larger+~ text, and this is ~-smaller-~. + +HelpOnEditing/SubPages should be a single link. +/SubPage should be a link. +Wiki''''''Name should result in plain string WikiName. +Same with !WikiName. + +---- + +CategoryHomepage