1{-# LANGUAGE OverloadedStrings #-} 2{- | 3 Module : Text.Pandoc.Readers.MediaWiki 4 Copyright : Copyright (C) 2012-2021 John MacFarlane 5 License : GNU GPL, version 2 or above 6 7 Maintainer : John MacFarlane <jgm@berkeley.edu> 8 Stability : alpha 9 Portability : portable 10 11Conversion of mediawiki text to 'Pandoc' document. 12-} 13{- 14TODO: 15_ correctly handle tables within tables 16_ parse templates? 17-} 18module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where 19 20import Control.Monad 21import Control.Monad.Except (throwError) 22import Data.Char (isDigit, isSpace) 23import qualified Data.Foldable as F 24import Data.List (intersperse) 25import Data.Maybe (fromMaybe, maybeToList) 26import Data.Sequence (ViewL (..), viewl, (<|)) 27import qualified Data.Set as Set 28import Data.Text (Text) 29import qualified Data.Text as T 30import Text.HTML.TagSoup 31import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) 32import qualified Text.Pandoc.Builder as B 33import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) 34import Text.Pandoc.Definition 35import Text.Pandoc.Logging 36import Text.Pandoc.Options 37import Text.Pandoc.Parsing hiding (nested) 38import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) 39import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines, 40 trim, splitTextBy, tshow) 41import Text.Pandoc.Walk (walk) 42import Text.Pandoc.XML (fromEntities) 43 44-- | Read mediawiki from an input string and return a Pandoc document. 45readMediaWiki :: PandocMonad m 46 => ReaderOptions -- ^ Reader options 47 -> Text -- ^ String to parse (assuming @'\n'@ line endings) 48 -> m Pandoc 49readMediaWiki opts s = do 50 parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts 51 , mwMaxNestingLevel = 4 52 , mwNextLinkNumber = 1 53 , mwCategoryLinks = [] 54 , mwIdentifierList = Set.empty 55 , mwLogMessages = [] 56 , mwInTT = False 57 } 58 (crFilter s <> "\n") 59 case parsed of 60 Right result -> return result 61 Left e -> throwError e 62 63data MWState = MWState { mwOptions :: ReaderOptions 64 , mwMaxNestingLevel :: Int 65 , mwNextLinkNumber :: Int 66 , mwCategoryLinks :: [Inlines] 67 , mwIdentifierList :: Set.Set Text 68 , mwLogMessages :: [LogMessage] 69 , mwInTT :: Bool 70 } 71 72type MWParser m = ParserT Text MWState m 73 74instance HasReaderOptions MWState where 75 extractReaderOptions = mwOptions 76 77instance HasIdentifierList MWState where 78 extractIdentifierList = mwIdentifierList 79 updateIdentifierList f st = st{ mwIdentifierList = f $ mwIdentifierList st } 80 81instance HasLogMessages MWState where 82 addLogMessage m s = s{ mwLogMessages = m : mwLogMessages s } 83 getLogMessages = reverse . mwLogMessages 84 85-- 86-- auxiliary functions 87-- 88 89-- This is used to prevent exponential blowups for things like: 90-- ''a'''a''a'''a''a'''a''a'''a 91nested :: PandocMonad m => MWParser m a -> MWParser m a 92nested p = do 93 nestlevel <- mwMaxNestingLevel `fmap` getState 94 guard $ nestlevel > 0 95 updateState $ \st -> st{ mwMaxNestingLevel = mwMaxNestingLevel st - 1 } 96 res <- p 97 updateState $ \st -> st{ mwMaxNestingLevel = nestlevel } 98 return res 99 100specialChars :: [Char] 101specialChars = "'[]<=&*{}|\":\\" 102 103spaceChars :: [Char] 104spaceChars = " \n\t" 105 106sym :: PandocMonad m => Text -> MWParser m () 107sym s = () <$ try (string $ T.unpack s) 108 109newBlockTags :: [Text] 110newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"] 111 112isBlockTag' :: Tag Text -> Bool 113isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) && 114 t `notElem` eitherBlockOrInline 115isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) && 116 t `notElem` eitherBlockOrInline 117isBlockTag' tag = isBlockTag tag 118 119isInlineTag' :: Tag Text -> Bool 120isInlineTag' (TagComment _) = True 121isInlineTag' t = not (isBlockTag' t) 122 123eitherBlockOrInline :: [Text] 124eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", 125 "map", "area", "object"] 126 127htmlComment :: PandocMonad m => MWParser m () 128htmlComment = () <$ htmlTag isCommentTag 129 130inlinesInTags :: PandocMonad m => Text -> MWParser m Inlines 131inlinesInTags tag = try $ do 132 (_,raw) <- htmlTag (~== TagOpen tag []) 133 if T.any (== '/') raw -- self-closing tag 134 then return mempty 135 else trimInlines . mconcat <$> 136 manyTill inline (htmlTag (~== TagClose tag)) 137 138blocksInTags :: PandocMonad m => Text -> MWParser m Blocks 139blocksInTags tag = try $ do 140 (_,raw) <- htmlTag (~== TagOpen tag []) 141 let closer = if tag == "li" 142 then htmlTag (~== TagClose ("li" :: Text)) 143 <|> lookAhead ( 144 htmlTag (~== TagOpen ("li" :: Text) []) 145 <|> htmlTag (~== TagClose ("ol" :: Text)) 146 <|> htmlTag (~== TagClose ("ul" :: Text))) 147 else htmlTag (~== TagClose tag) 148 if T.any (== '/') raw -- self-closing tag 149 then return mempty 150 else mconcat <$> manyTill block closer 151 152textInTags :: PandocMonad m => Text -> MWParser m Text 153textInTags tag = try $ do 154 (_,raw) <- htmlTag (~== TagOpen tag []) 155 if T.any (== '/') raw -- self-closing tag 156 then return "" 157 else T.pack <$> manyTill anyChar (htmlTag (~== TagClose tag)) 158 159-- 160-- main parser 161-- 162 163parseMediaWiki :: PandocMonad m => MWParser m Pandoc 164parseMediaWiki = do 165 bs <- mconcat <$> many block 166 spaces 167 eof 168 categoryLinks <- reverse . mwCategoryLinks <$> getState 169 let categories = if null categoryLinks 170 then mempty 171 else B.para $ mconcat $ intersperse B.space categoryLinks 172 reportLogMessages 173 return $ B.doc $ bs <> categories 174 175-- 176-- block parsers 177-- 178 179block :: PandocMonad m => MWParser m Blocks 180block = do 181 res <- mempty <$ skipMany1 blankline 182 <|> table 183 <|> header 184 <|> hrule 185 <|> orderedList 186 <|> bulletList 187 <|> definitionList 188 <|> mempty <$ try (spaces *> htmlComment) 189 <|> preformatted 190 <|> blockTag 191 <|> (B.rawBlock "mediawiki" <$> template) 192 <|> para 193 trace (T.take 60 $ tshow $ B.toList res) 194 return res 195 196para :: PandocMonad m => MWParser m Blocks 197para = do 198 contents <- trimInlines . mconcat <$> many1 inline 199 if F.all (==Space) contents 200 then return mempty 201 else return $ B.para contents 202 203table :: PandocMonad m => MWParser m Blocks 204table = do 205 tableStart 206 styles <- option [] $ 207 parseAttrs <* skipMany spaceChar <* optional (char '|') 208 skipMany spaceChar 209 optional $ template >> skipMany spaceChar 210 optional blanklines 211 let tableWidth = case lookup "width" styles of 212 Just w -> fromMaybe 1.0 $ parseWidth w 213 Nothing -> 1.0 214 caption <- option mempty tableCaption 215 optional rowsep 216 hasheader <- option False $ True <$ lookAhead (skipSpaces *> char '!') 217 (cellspecs',hdr) <- unzip <$> tableRow 218 let widths = map ((tableWidth *) . snd) cellspecs' 219 let restwidth = tableWidth - sum widths 220 let zerocols = length $ filter (==0.0) widths 221 let defaultwidth = if zerocols == 0 || zerocols == length widths 222 then ColWidthDefault 223 else ColWidth $ restwidth / fromIntegral zerocols 224 let widths' = map (\w -> if w > 0 then ColWidth w else defaultwidth) widths 225 let cellspecs = zip (map fst cellspecs') widths' 226 rows' <- many $ try $ rowsep *> (map snd <$> tableRow) 227 optional blanklines 228 tableEnd 229 let cols = length hdr 230 let (headers,rows) = if hasheader 231 then (hdr, rows') 232 else (replicate cols mempty, hdr:rows') 233 let toRow = Row nullAttr . map B.simpleCell 234 toHeaderRow l = [toRow l | not (null l)] 235 return $ B.table (B.simpleCaption $ B.plain caption) 236 cellspecs 237 (TableHead nullAttr $ toHeaderRow headers) 238 [TableBody nullAttr 0 [] $ map toRow rows] 239 (TableFoot nullAttr []) 240 241parseAttrs :: PandocMonad m => MWParser m [(Text,Text)] 242parseAttrs = many1 parseAttr 243 244parseAttr :: PandocMonad m => MWParser m (Text, Text) 245parseAttr = try $ do 246 skipMany spaceChar 247 k <- many1Char letter 248 char '=' 249 v <- (char '"' >> many1TillChar (satisfy (/='\n')) (char '"')) 250 <|> many1Char (satisfy $ \c -> not (isSpace c) && c /= '|') 251 return (k,v) 252 253tableStart :: PandocMonad m => MWParser m () 254tableStart = try $ guardColumnOne *> skipSpaces *> sym "{|" 255 256tableEnd :: PandocMonad m => MWParser m () 257tableEnd = try $ guardColumnOne *> skipSpaces *> sym "|}" 258 259rowsep :: PandocMonad m => MWParser m () 260rowsep = try $ guardColumnOne *> skipSpaces *> sym "|-" <* 261 many (char '-') <* optional parseAttrs <* blanklines 262 263cellsep :: PandocMonad m => MWParser m () 264cellsep = try $ do 265 col <- sourceColumn <$> getPosition 266 skipSpaces 267 let pipeSep = do 268 char '|' 269 notFollowedBy (oneOf "-}+") 270 if col == 1 271 then optional (char '|') 272 else void (char '|') 273 let exclSep = do 274 char '!' 275 if col == 1 276 then optional (char '!') 277 else void (char '!') 278 pipeSep <|> exclSep 279 280tableCaption :: PandocMonad m => MWParser m Inlines 281tableCaption = try $ do 282 guardColumnOne 283 skipSpaces 284 sym "|+" 285 optional (try $ parseAttrs *> skipSpaces *> char '|' *> blanklines) 286 trimInlines . mconcat <$> 287 many (notFollowedBy (cellsep <|> rowsep) *> inline) 288 289tableRow :: PandocMonad m => MWParser m [((Alignment, Double), Blocks)] 290tableRow = try $ skipMany htmlComment *> many tableCell 291 292tableCell :: PandocMonad m => MWParser m ((Alignment, Double), Blocks) 293tableCell = try $ do 294 cellsep 295 skipMany spaceChar 296 attrs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <* 297 notFollowedBy (char '|') 298 skipMany spaceChar 299 pos' <- getPosition 300 ls <- T.concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> 301 ((snd <$> withRaw table) <|> countChar 1 anyChar)) 302 bs <- parseFromString (do setPosition pos' 303 mconcat <$> many block) ls 304 let align = case lookup "align" attrs of 305 Just "left" -> AlignLeft 306 Just "right" -> AlignRight 307 Just "center" -> AlignCenter 308 _ -> AlignDefault 309 let width = case lookup "width" attrs of 310 Just xs -> fromMaybe 0.0 $ parseWidth xs 311 Nothing -> 0.0 312 return ((align, width), bs) 313 314parseWidth :: Text -> Maybe Double 315parseWidth s = 316 case T.unsnoc s of 317 Just (ds, '%') | T.all isDigit ds -> safeRead $ "0." <> ds 318 _ -> Nothing 319 320template :: PandocMonad m => MWParser m Text 321template = try $ do 322 string "{{" 323 notFollowedBy (char '{') 324 lookAhead $ letter <|> digit <|> char ':' 325 let chunk = template <|> variable <|> many1Char (noneOf "{}") <|> countChar 1 anyChar 326 contents <- manyTill chunk (try $ string "}}") 327 return $ "{{" <> T.concat contents <> "}}" 328 329blockTag :: PandocMonad m => MWParser m Blocks 330blockTag = do 331 (tag, _) <- lookAhead $ htmlTag isBlockTag' 332 case tag of 333 TagOpen "blockquote" _ -> B.blockQuote <$> blocksInTags "blockquote" 334 TagOpen "pre" _ -> B.codeBlock . trimCode <$> textInTags "pre" 335 TagOpen "syntaxhighlight" attrs -> syntaxhighlight "syntaxhighlight" attrs 336 TagOpen "source" attrs -> syntaxhighlight "source" attrs 337 TagOpen "haskell" _ -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$> 338 textInTags "haskell" 339 TagOpen "gallery" _ -> blocksInTags "gallery" 340 TagOpen "p" _ -> mempty <$ htmlTag (~== tag) 341 TagClose "p" -> mempty <$ htmlTag (~== tag) 342 _ -> B.rawBlock "html" . snd <$> htmlTag (~== tag) 343 344trimCode :: Text -> Text 345trimCode t = case T.uncons t of 346 Just ('\n', xs) -> stripTrailingNewlines xs 347 _ -> stripTrailingNewlines t 348 349syntaxhighlight :: PandocMonad m => Text -> [Attribute Text] -> MWParser m Blocks 350syntaxhighlight tag attrs = try $ do 351 let mblang = lookup "lang" attrs 352 let mbstart = lookup "start" attrs 353 let mbline = lookup "line" attrs 354 let classes = maybeToList mblang ++ maybe [] (const ["numberLines"]) mbline 355 let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart 356 contents <- textInTags tag 357 return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents 358 359hrule :: PandocMonad m => MWParser m Blocks 360hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline) 361 362guardColumnOne :: PandocMonad m => MWParser m () 363guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1) 364 365preformatted :: PandocMonad m => MWParser m Blocks 366preformatted = try $ do 367 guardColumnOne 368 char ' ' 369 let endline' = B.linebreak <$ try (newline <* char ' ') 370 let whitespace' = B.str <$> many1Char ('\160' <$ spaceChar) 371 let spToNbsp ' ' = '\160' 372 spToNbsp x = x 373 let nowiki' = mconcat . intersperse B.linebreak . map B.str . 374 T.lines . fromEntities . T.map spToNbsp <$> try 375 (htmlTag (~== TagOpen ("nowiki" :: Text) []) *> 376 manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text)))) 377 let inline' = whitespace' <|> endline' <|> nowiki' 378 <|> try (notFollowedBy newline *> inline) 379 contents <- mconcat <$> many1 inline' 380 let spacesStr (Str xs) = T.all isSpace xs 381 spacesStr _ = False 382 if F.all spacesStr contents 383 then return mempty 384 else return $ B.para $ encode contents 385 386encode :: Inlines -> Inlines 387encode = B.fromList . normalizeCode . B.toList . walk strToCode 388 where strToCode (Str s) = Code ("",[],[]) s 389 strToCode Space = Code ("",[],[]) " " 390 strToCode x = x 391 normalizeCode [] = [] 392 normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 = 393 normalizeCode $ Code a1 (x <> y) : zs 394 normalizeCode (x:xs) = x : normalizeCode xs 395 396header :: PandocMonad m => MWParser m Blocks 397header = try $ do 398 guardColumnOne 399 lev <- length <$> many1 (char '=') 400 guard $ lev <= 6 401 contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=') 402 opts <- mwOptions <$> getState 403 attr <- (if isEnabled Ext_gfm_auto_identifiers opts 404 then id 405 else modifyIdentifier) <$> registerHeader nullAttr contents 406 return $ B.headerWith attr lev contents 407 408-- See #4731: 409modifyIdentifier :: Attr -> Attr 410modifyIdentifier (ident,cl,kv) = (ident',cl,kv) 411 where ident' = T.map (\c -> if c == '-' then '_' else c) ident 412 413bulletList :: PandocMonad m => MWParser m Blocks 414bulletList = B.bulletList <$> 415 ( many1 (listItem '*') 416 <|> (htmlTag (~== TagOpen ("ul" :: Text) []) *> spaces *> many (listItem '*' <|> li) <* 417 optional (htmlTag (~== TagClose ("ul" :: Text)))) ) 418 419orderedList :: PandocMonad m => MWParser m Blocks 420orderedList = 421 (B.orderedList <$> many1 (listItem '#')) 422 <|> try 423 (do (tag,_) <- htmlTag (~== TagOpen ("ol" :: Text) []) 424 spaces 425 items <- many (listItem '#' <|> li) 426 optional (htmlTag (~== TagClose ("ol" :: Text))) 427 let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag 428 return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items) 429 430definitionList :: PandocMonad m => MWParser m Blocks 431definitionList = B.definitionList <$> many1 defListItem 432 433defListItem :: PandocMonad m => MWParser m (Inlines, [Blocks]) 434defListItem = try $ do 435 terms <- mconcat . intersperse B.linebreak <$> many defListTerm 436 -- we allow dd with no dt, or dt with no dd 437 defs <- if null terms 438 then notFollowedBy 439 (try $ skipMany1 (char ':') >> string "<math>") *> 440 many1 (listItem ':') 441 else many (listItem ':') 442 return (terms, defs) 443 444defListTerm :: PandocMonad m => MWParser m Inlines 445defListTerm = do 446 guardColumnOne 447 char ';' 448 skipMany spaceChar 449 pos' <- getPosition 450 anyLine >>= parseFromString (do setPosition pos' 451 trimInlines . mconcat <$> many inline) 452 453listStart :: PandocMonad m => Char -> MWParser m () 454listStart c = char c *> notFollowedBy listStartChar 455 456listStartChar :: PandocMonad m => MWParser m Char 457listStartChar = oneOf "*#;:" 458 459anyListStart :: PandocMonad m => MWParser m Char 460anyListStart = guardColumnOne >> oneOf "*#:;" 461 462li :: PandocMonad m => MWParser m Blocks 463li = lookAhead (htmlTag (~== TagOpen ("li" :: Text) [])) *> 464 (firstParaToPlain <$> blocksInTags "li") <* spaces 465 466listItem :: PandocMonad m => Char -> MWParser m Blocks 467listItem c = try $ do 468 guardColumnOne 469 extras <- many (try $ char c <* lookAhead listStartChar) 470 if null extras 471 then listItem' c 472 else do 473 skipMany spaceChar 474 pos' <- getPosition 475 first <- T.concat <$> manyTill listChunk newline 476 rest <- many 477 (try $ string extras *> lookAhead listStartChar *> 478 (T.concat <$> manyTill listChunk newline)) 479 contents <- parseFromString (do setPosition pos' 480 many1 $ listItem' c) 481 (T.unlines (first : rest)) 482 case c of 483 '*' -> return $ B.bulletList contents 484 '#' -> return $ B.orderedList contents 485 ':' -> return $ B.definitionList [(mempty, contents)] 486 _ -> mzero 487 488-- The point of this is to handle stuff like 489-- * {{cite book 490-- | blah 491-- | blah 492-- }} 493-- * next list item 494-- which seems to be valid mediawiki. 495listChunk :: PandocMonad m => MWParser m Text 496listChunk = template <|> countChar 1 anyChar 497 498listItem' :: PandocMonad m => Char -> MWParser m Blocks 499listItem' c = try $ do 500 listStart c 501 skipMany spaceChar 502 pos' <- getPosition 503 first <- T.concat <$> manyTill listChunk newline 504 rest <- many (try $ char c *> lookAhead listStartChar *> 505 (T.concat <$> manyTill listChunk newline)) 506 parseFromString (do setPosition pos' 507 firstParaToPlain . mconcat <$> many1 block) 508 $ T.unlines $ first : rest 509 510firstParaToPlain :: Blocks -> Blocks 511firstParaToPlain contents = 512 case viewl (B.unMany contents) of 513 Para xs :< ys -> B.Many $ Plain xs <| ys 514 _ -> contents 515 516-- 517-- inline parsers 518-- 519 520inline :: PandocMonad m => MWParser m Inlines 521inline = whitespace 522 <|> url 523 <|> str 524 <|> doubleQuotes 525 <|> strong 526 <|> emph 527 <|> image 528 <|> internalLink 529 <|> externalLink 530 <|> math 531 <|> inlineTag 532 <|> B.singleton <$> charRef 533 <|> inlineHtml 534 <|> (B.rawInline "mediawiki" <$> variable) 535 <|> (B.rawInline "mediawiki" <$> template) 536 <|> special 537 538str :: PandocMonad m => MWParser m Inlines 539str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars) 540 541math :: PandocMonad m => MWParser m Inlines 542math = (B.displayMath . trim <$> try (many1 (char ':') >> textInTags "math")) 543 <|> (B.math . trim <$> textInTags "math") 544 <|> (B.displayMath . trim <$> try (dmStart *> manyTillChar anyChar dmEnd)) 545 <|> (B.math . trim <$> try (mStart *> manyTillChar (satisfy (/='\n')) mEnd)) 546 where dmStart = string "\\[" 547 dmEnd = try (string "\\]") 548 mStart = string "\\(" 549 mEnd = try (string "\\)") 550 551variable :: PandocMonad m => MWParser m Text 552variable = try $ do 553 string "{{{" 554 contents <- manyTillChar anyChar (try $ string "}}}") 555 return $ "{{{" <> contents <> "}}}" 556 557inlineTag :: PandocMonad m => MWParser m Inlines 558inlineTag = do 559 (tag, _) <- lookAhead $ htmlTag isInlineTag' 560 case tag of 561 TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref" 562 TagOpen "nowiki" _ -> try $ do 563 (_,raw) <- htmlTag (~== tag) 564 if T.any (== '/') raw 565 then return mempty 566 else B.text . fromEntities <$> 567 manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text))) 568 TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen ("br" :: Text) []) -- will get /> too 569 *> optional blankline) 570 TagOpen "strike" _ -> B.strikeout <$> inlinesInTags "strike" 571 TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del" 572 TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub" 573 TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup" 574 TagOpen "code" _ -> encode <$> inlinesInTags "code" 575 TagOpen "tt" _ -> do 576 inTT <- mwInTT <$> getState 577 updateState $ \st -> st{ mwInTT = True } 578 result <- encode <$> inlinesInTags "tt" 579 updateState $ \st -> st{ mwInTT = inTT } 580 return result 581 TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> textInTags "hask" 582 _ -> B.rawInline "html" . snd <$> htmlTag (~== tag) 583 584special :: PandocMonad m => MWParser m Inlines 585special = B.str <$> countChar 1 (notFollowedBy' (htmlTag isBlockTag') *> 586 oneOf specialChars) 587 588inlineHtml :: PandocMonad m => MWParser m Inlines 589inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag' 590 591whitespace :: PandocMonad m => MWParser m Inlines 592whitespace = B.space <$ (skipMany1 spaceChar <|> htmlComment) 593 <|> B.softbreak <$ endline 594 595endline :: PandocMonad m => MWParser m () 596endline = () <$ try (newline <* 597 notFollowedBy spaceChar <* 598 notFollowedBy newline <* 599 notFollowedBy' hrule <* 600 notFollowedBy tableStart <* 601 notFollowedBy' header <* 602 notFollowedBy anyListStart) 603 604imageIdentifiers :: PandocMonad m => [MWParser m ()] 605imageIdentifiers = [sym (identifier <> ":") | identifier <- identifiers] 606 where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier", 607 "Bild"] 608 609image :: PandocMonad m => MWParser m Inlines 610image = try $ do 611 sym "[[" 612 choice imageIdentifiers 613 fname <- addUnderscores <$> many1Char (noneOf "|]") 614 _ <- many imageOption 615 dims <- try (char '|' *> sepBy (manyChar digit) (char 'x') <* string "px") 616 <|> return [] 617 _ <- many imageOption 618 let kvs = case dims of 619 [w] -> [("width", w)] 620 [w, h] -> [("width", w), ("height", h)] 621 _ -> [] 622 let attr = ("", [], kvs) 623 caption <- (B.str fname <$ sym "]]") 624 <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) 625 return $ B.imageWith attr fname ("fig:" <> stringify caption) caption 626 627imageOption :: PandocMonad m => MWParser m Text 628imageOption = try $ char '|' *> opt 629 where 630 opt = try (oneOfStrings [ "border", "thumbnail", "frameless" 631 , "thumb", "upright", "left", "right" 632 , "center", "none", "baseline", "sub" 633 , "super", "top", "text-top", "middle" 634 , "bottom", "text-bottom" ]) 635 <|> try (textStr "frame") 636 <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) 637 638addUnderscores :: Text -> Text 639addUnderscores = T.intercalate "_" . splitTextBy sep 640 where 641 sep c = isSpace c || c == '_' 642 643internalLink :: PandocMonad m => MWParser m Inlines 644internalLink = try $ do 645 sym "[[" 646 pagename <- T.unwords . T.words <$> manyChar (noneOf "|]") 647 label <- option (B.text pagename) $ char '|' *> 648 ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline)) 649 -- the "pipe trick" 650 -- [[Help:Contents|] -> "Contents" 651 <|> return (B.text $ T.drop 1 $ T.dropWhile (/=':') pagename) ) 652 sym "]]" 653 linktrail <- B.text <$> manyChar letter 654 let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail) 655 if "Category:" `T.isPrefixOf` pagename 656 then do 657 updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st } 658 return mempty 659 else return link 660 661externalLink :: PandocMonad m => MWParser m Inlines 662externalLink = try $ do 663 char '[' 664 (_, src) <- uri 665 lab <- try (trimInlines . mconcat <$> 666 (skipMany1 spaceChar *> manyTill inline (char ']'))) 667 <|> do char ']' 668 num <- mwNextLinkNumber <$> getState 669 updateState $ \st -> st{ mwNextLinkNumber = num + 1 } 670 return $ B.str $ tshow num 671 return $ B.link src "" lab 672 673url :: PandocMonad m => MWParser m Inlines 674url = do 675 (orig, src) <- uri 676 return $ B.link src "" (B.str orig) 677 678-- | Parses a list of inlines between start and end delimiters. 679inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines 680inlinesBetween start end = 681 trimInlines . mconcat <$> try (start >> many1Till inline end) 682 683emph :: PandocMonad m => MWParser m Inlines 684emph = B.emph <$> nested (inlinesBetween start end) 685 where start = sym "''" 686 end = try $ notFollowedBy' (() <$ strong) >> sym "''" 687 688strong :: PandocMonad m => MWParser m Inlines 689strong = B.strong <$> nested (inlinesBetween start end) 690 where start = sym "'''" 691 end = sym "'''" 692 693doubleQuotes :: PandocMonad m => MWParser m Inlines 694doubleQuotes = do 695 guardEnabled Ext_smart 696 inTT <- mwInTT <$> getState 697 guard (not inTT) 698 B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote) 699 where openDoubleQuote = sym "\"" >> lookAhead nonspaceChar 700 closeDoubleQuote = try $ sym "\"" 701