1{-# LANGUAGE FlexibleContexts #-} 2{-# LANGUAGE TupleSections #-} 3{-# LANGUAGE OverloadedStrings #-} 4{- | 5 Module : Text.Pandoc.Readers.Muse 6 Copyright : Copyright (C) 2017-2020 Alexander Krotov 7 License : GNU GPL, version 2 or above 8 9 Maintainer : Alexander Krotov <ilabdsf@gmail.com> 10 Stability : alpha 11 Portability : portable 12 13Conversion of Muse text to 'Pandoc' document. 14-} 15{- 16TODO: 17- <cite> tag 18-} 19module Text.Pandoc.Readers.Muse (readMuse) where 20 21import Control.Monad 22import Control.Monad.Reader 23import Control.Monad.Except (throwError) 24import Data.Bifunctor 25import Data.Default 26import Data.List (transpose, uncons) 27import qualified Data.Map as M 28import qualified Data.Set as Set 29import Data.Maybe (fromMaybe, isNothing, maybeToList) 30import Data.Text (Text) 31import qualified Data.Text as T 32import Text.Pandoc.Builder (Blocks, Inlines, underline) 33import qualified Text.Pandoc.Builder as B 34import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) 35import Text.Pandoc.Definition 36import Text.Pandoc.Error (PandocError (PandocParsecError)) 37import Text.Pandoc.Logging 38import Text.Pandoc.Options 39import Text.Pandoc.Parsing 40import Text.Pandoc.Shared (trimr, tshow) 41 42-- | Read Muse from an input string and return a Pandoc document. 43readMuse :: (PandocMonad m, ToSources a) 44 => ReaderOptions 45 -> a 46 -> m Pandoc 47readMuse opts s = do 48 let sources = toSources s 49 res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } 50 (initialSourceName sources) sources 51 case res of 52 Left e -> throwError $ PandocParsecError sources e 53 Right d -> return d 54 55type F = Future MuseState 56 57data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata 58 , museOptions :: ReaderOptions 59 , museIdentifierList :: Set.Set Text 60 , museLastSpacePos :: Maybe SourcePos -- ^ Position after last space or newline parsed 61 , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed 62 , museLogMessages :: [LogMessage] 63 , museNotes :: M.Map Text (SourcePos, F Blocks) 64 } 65 66instance Default MuseState where 67 def = MuseState { museMeta = return nullMeta 68 , museOptions = def 69 , museIdentifierList = Set.empty 70 , museLastStrPos = Nothing 71 , museLastSpacePos = Nothing 72 , museLogMessages = [] 73 , museNotes = M.empty 74 } 75 76data MuseEnv = 77 MuseEnv { museInLink :: Bool -- ^ True when parsing a link description to avoid nested links 78 , museInPara :: Bool -- ^ True when parsing paragraph is not allowed 79 } 80 81instance Default MuseEnv where 82 def = MuseEnv { museInLink = False 83 , museInPara = False 84 } 85 86type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m) 87 88instance HasReaderOptions MuseState where 89 extractReaderOptions = museOptions 90 91instance HasIdentifierList MuseState where 92 extractIdentifierList = museIdentifierList 93 updateIdentifierList f st = st{ museIdentifierList = f $ museIdentifierList st } 94 95instance HasLastStrPosition MuseState where 96 setLastStrPos pos st = st{ museLastStrPos = pos } 97 getLastStrPos st = museLastStrPos st 98 99instance HasLogMessages MuseState where 100 addLogMessage m s = s{ museLogMessages = m : museLogMessages s } 101 getLogMessages = reverse . museLogMessages 102 103updateLastSpacePos :: Monad m => MuseParser m () 104updateLastSpacePos = getPosition >>= \pos -> 105 updateState $ \s -> s { museLastSpacePos = Just pos } 106 107-- | Parse Muse document 108parseMuse :: PandocMonad m => MuseParser m Pandoc 109parseMuse = do 110 many directive 111 blocks <- (:) <$> parseBlocks <*> many parseSection 112 eof 113 st <- getState 114 runF (Pandoc <$> museMeta st <*> fmap B.toList (mconcat blocks)) st <$ reportLogMessages 115 116-- * Utility functions 117 118-- | Trim up to one newline from the beginning of the string. 119lchop :: Text -> Text 120lchop s = case T.uncons s of 121 Just ('\n', xs) -> xs 122 _ -> s 123 124-- | Trim up to one newline from the end of the string. 125rchop :: Text -> Text 126rchop s = case T.unsnoc s of 127 Just (xs, '\n') -> xs 128 _ -> s 129 130unindent :: Text -> Text 131unindent = rchop . T.intercalate "\n" . dropSpacePrefix . T.splitOn "\n" . lchop 132 133dropSpacePrefix :: [Text] -> [Text] 134dropSpacePrefix lns = T.drop maxIndent <$> lns 135 where isSpaceChar c = c == ' ' || c == '\t' 136 maxIndent = length $ takeWhile (isSpaceChar . T.head) $ takeWhile same $ T.transpose lns 137 same t = case T.uncons t of 138 Just (c, cs) -> T.all (== c) cs 139 Nothing -> True 140 141atStart :: PandocMonad m => MuseParser m () 142atStart = do 143 pos <- getPosition 144 st <- getState 145 guard $ museLastStrPos st /= Just pos 146 147noSpaceBefore :: PandocMonad m => MuseParser m () 148noSpaceBefore = do 149 pos <- getPosition 150 st <- getState 151 guard $ museLastSpacePos st /= Just pos 152 153firstColumn :: PandocMonad m => MuseParser m () 154firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1) 155 156-- * Parsers 157 158-- | Parse end-of-line, which can be either a newline or end-of-file. 159eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () 160eol = void newline <|> eof 161 162getIndent :: PandocMonad m 163 => MuseParser m Int 164getIndent = subtract 1 . sourceColumn <$ many spaceChar <*> getPosition 165 166-- ** HTML parsers 167 168openTag :: PandocMonad m => Text -> MuseParser m [(Text, Text)] 169openTag tag = try $ 170 char '<' *> textStr tag *> manyTill attr (char '>') 171 where 172 attr = try $ (,) 173 <$ many1 spaceChar 174 <*> many1Char (noneOf "=\n") 175 <* string "=\"" 176 <*> manyTillChar (noneOf "\"") (char '"') 177 178closeTag :: PandocMonad m => Text -> MuseParser m () 179closeTag tag = try $ string "</" *> textStr tag *> void (char '>') 180 181-- | Convert HTML attributes to Pandoc 'Attr' 182htmlAttrToPandoc :: [(Text, Text)] -> Attr 183htmlAttrToPandoc attrs = (ident, classes, keyvals) 184 where 185 ident = fromMaybe "" $ lookup "id" attrs 186 classes = maybe [] T.words $ lookup "class" attrs 187 keyvals = [(k,v) | (k,v) <- attrs, k /= "id", k /= "class"] 188 189parseHtmlContent :: PandocMonad m 190 => Text -- ^ Tag name 191 -> MuseParser m (Attr, F Blocks) 192parseHtmlContent tag = try $ getIndent >>= \indent -> (,) 193 <$> fmap htmlAttrToPandoc (openTag tag) 194 <* manyTill spaceChar eol 195 <*> allowPara (parseBlocksTill (try $ indentWith indent *> closeTag tag)) 196 <* manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline 197 198-- ** Directive parsers 199 200-- While not documented, Emacs Muse allows "-" in directive name 201parseDirectiveKey :: PandocMonad m => MuseParser m Text 202parseDirectiveKey = char '#' *> manyChar (letter <|> char '-') 203 204parseEmacsDirective :: PandocMonad m => MuseParser m (Text, F Inlines) 205parseEmacsDirective = (,) 206 <$> parseDirectiveKey 207 <* spaceChar 208 <*> (trimInlinesF . mconcat <$> manyTill inline' eol) 209 210parseAmuseDirective :: PandocMonad m => MuseParser m (Text, F Inlines) 211parseAmuseDirective = (,) 212 <$> parseDirectiveKey 213 <* many1 spaceChar 214 <*> (trimInlinesF . mconcat <$> many1Till inline endOfDirective) 215 <* many blankline 216 where 217 endOfDirective = lookAhead $ eof <|> try (newline *> (void blankline <|> void parseDirectiveKey)) 218 219directive :: PandocMonad m => MuseParser m () 220directive = do 221 ext <- getOption readerExtensions 222 (key, value) <- if extensionEnabled Ext_amuse ext then parseAmuseDirective else parseEmacsDirective 223 updateState $ \st -> st { museMeta = B.setMeta (translateKey key) <$> value <*> museMeta st } 224 where translateKey "cover" = "cover-image" 225 translateKey x = x 226 227-- ** Block parsers 228 229allowPara :: MonadReader MuseEnv m => m a -> m a 230allowPara p = local (\s -> s { museInPara = False }) p 231 232-- | Parse section contents until EOF or next header 233parseBlocks :: PandocMonad m 234 => MuseParser m (F Blocks) 235parseBlocks = 236 try (parseEnd <|> 237 nextSection <|> 238 listStart <|> 239 blockStart <|> 240 paraStart) 241 where 242 nextSection = mempty <$ lookAhead headingStart 243 parseEnd = mempty <$ eof 244 blockStart = (B.<>) <$> (blockElements <|> emacsNoteBlock) 245 <*> allowPara parseBlocks 246 listStart = 247 uncurry (B.<>) <$> allowPara (anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks) 248 paraStart = do 249 indent <- length <$> many spaceChar 250 uncurry (B.<>) . first (p indent) <$> paraUntil parseBlocks 251 where p indent = if indent >= 2 && indent < 6 then fmap B.blockQuote else id 252 253-- | Parse section that starts with a header 254parseSection :: PandocMonad m 255 => MuseParser m (F Blocks) 256parseSection = 257 ((B.<>) <$> emacsHeading <*> parseBlocks) <|> 258 (uncurry (B.<>) <$> amuseHeadingUntil parseBlocks) 259 260parseBlocksTill :: PandocMonad m 261 => MuseParser m a 262 -> MuseParser m (F Blocks) 263parseBlocksTill end = continuation 264 where 265 parseEnd = mempty <$ end 266 blockStart = (B.<>) <$> blockElements <*> allowPara continuation 267 listStart = uncurry (B.<>) <$> allowPara (anyListUntil (parseEnd <|> continuation)) 268 paraStart = uncurry (B.<>) <$> paraUntil (parseEnd <|> continuation) 269 continuation = try $ parseEnd <|> listStart <|> blockStart <|> paraStart 270 271listItemContentsUntil :: PandocMonad m 272 => Int 273 -> MuseParser m a 274 -> MuseParser m a 275 -> MuseParser m (F Blocks, a) 276listItemContentsUntil col pre end = p 277 where 278 p = try listStart <|> try blockStart <|> try paraStart 279 parsePre = (mempty,) <$> pre 280 parseEnd = (mempty,) <$> end 281 paraStart = do 282 (f, (r, e)) <- paraUntil (parsePre <|> continuation <|> parseEnd) 283 return (f B.<> r, e) 284 blockStart = first <$> ((B.<>) <$> blockElements) 285 <*> allowPara (parsePre <|> continuation <|> parseEnd) 286 listStart = do 287 (f, (r, e)) <- allowPara $ anyListUntil (parsePre <|> continuation <|> parseEnd) 288 return (f B.<> r, e) 289 continuation = try $ do blank <- optionMaybe blankline 290 skipMany blankline 291 indentWith col 292 local (\s -> s { museInPara = museInPara s && isNothing blank }) p 293 294parseBlock :: PandocMonad m => MuseParser m (F Blocks) 295parseBlock = do 296 res <- blockElements <|> para 297 trace (T.take 60 $ tshow $ B.toList $ runF res def) 298 return res 299 where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements))) 300 301blockElements :: PandocMonad m => MuseParser m (F Blocks) 302blockElements = (mempty <$ blankline) 303 <|> comment 304 <|> separator 305 <|> pagebreak 306 <|> example 307 <|> exampleTag 308 <|> literalTag 309 <|> centerTag 310 <|> rightTag 311 <|> quoteTag 312 <|> divTag 313 <|> biblioTag 314 <|> playTag 315 <|> verseTag 316 <|> lineBlock 317 <|> museGridTable 318 <|> table 319 <|> commentTag 320 321-- | Parse a line comment, starting with @;@ in the first column. 322comment :: PandocMonad m => MuseParser m (F Blocks) 323comment = try $ mempty 324 <$ firstColumn 325 <* char ';' 326 <* optional (spaceChar *> many (noneOf "\n")) 327 <* eol 328 329-- | Parse a horizontal rule, consisting of 4 or more @\'-\'@ characters. 330separator :: PandocMonad m => MuseParser m (F Blocks) 331separator = try $ pure B.horizontalRule 332 <$ string "----" 333 <* many (char '-') 334 <* many spaceChar 335 <* eol 336 337-- | Parse a page break 338pagebreak :: PandocMonad m => MuseParser m (F Blocks) 339pagebreak = try $ pure (B.divWith ("", [], [("style", "page-break-before: always;")]) mempty) 340 <$ count 6 spaceChar 341 <* many spaceChar 342 <* string "* * * * *" 343 <* manyTill spaceChar eol 344 345headingStart :: PandocMonad m => MuseParser m (Text, Int) 346headingStart = try $ (,) 347 <$> option "" (try (parseAnchor <* manyTill spaceChar eol)) 348 <* firstColumn 349 <*> fmap length (many1 $ char '*') 350 <* spaceChar 351 352-- | Parse a single-line heading. 353emacsHeading :: PandocMonad m => MuseParser m (F Blocks) 354emacsHeading = try $ do 355 guardDisabled Ext_amuse 356 (anchorId, level) <- headingStart 357 content <- trimInlinesF . mconcat <$> manyTill inline eol 358 attr <- registerHeader (anchorId, [], []) (runF content def) 359 return $ B.headerWith attr level <$> content 360 361-- | Parse a multi-line heading. 362-- It is a Text::Amuse extension, Emacs Muse does not allow heading to span multiple lines. 363amuseHeadingUntil :: PandocMonad m 364 => MuseParser m a -- ^ Terminator parser 365 -> MuseParser m (F Blocks, a) 366amuseHeadingUntil end = try $ do 367 guardEnabled Ext_amuse 368 (anchorId, level) <- headingStart 369 (content, e) <- paraContentsUntil end 370 attr <- registerHeader (anchorId, [], []) (runF content def) 371 return (B.headerWith attr level <$> content, e) 372 373-- | Parse an example between @{{{@ and @}}}@. 374-- It is an Amusewiki extension influenced by Creole wiki, as described in @Text::Amuse@ documentation. 375example :: PandocMonad m => MuseParser m (F Blocks) 376example = try $ pure . B.codeBlock 377 <$ string "{{{" 378 <* many spaceChar 379 <*> (unindent <$> manyTillChar anyChar (string "}}}")) 380 381-- | Parse an @\<example>@ tag. 382exampleTag :: PandocMonad m => MuseParser m (F Blocks) 383exampleTag = try $ fmap pure $ B.codeBlockWith 384 <$ many spaceChar 385 <*> (htmlAttrToPandoc <$> openTag "example") 386 <*> (unindent <$> manyTillChar anyChar (closeTag "example")) 387 <* manyTill spaceChar eol 388 389-- | Parse a @\<literal>@ tag as a raw block. 390-- For 'RawInline' @\<literal>@ parser, see 'inlineLiteralTag'. 391literalTag :: PandocMonad m => MuseParser m (F Blocks) 392literalTag = try $ fmap pure $ B.rawBlock 393 <$ many spaceChar 394 <*> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML 395 <* manyTill spaceChar eol 396 <*> (unindent <$> manyTillChar anyChar (closeTag "literal")) 397 <* manyTill spaceChar eol 398 399-- | Parse @\<center>@ tag. 400-- Currently it is ignored as Pandoc cannot represent centered blocks. 401centerTag :: PandocMonad m => MuseParser m (F Blocks) 402centerTag = snd <$> parseHtmlContent "center" 403 404-- | Parse @\<right>@ tag. 405-- Currently it is ignored as Pandoc cannot represent centered blocks. 406rightTag :: PandocMonad m => MuseParser m (F Blocks) 407rightTag = snd <$> parseHtmlContent "right" 408 409-- | Parse @\<quote>@ tag. 410quoteTag :: PandocMonad m => MuseParser m (F Blocks) 411quoteTag = fmap B.blockQuote . snd <$> parseHtmlContent "quote" 412 413-- | Parse @\<div>@ tag. 414-- @\<div>@ tag is supported by Emacs Muse, but not Amusewiki 2.025. 415divTag :: PandocMonad m => MuseParser m (F Blocks) 416divTag = do 417 (attrs, content) <- parseHtmlContent "div" 418 return $ B.divWith attrs <$> content 419 420-- | Parse @\<biblio>@ tag, the result is the same as @\<div class="biblio">@. 421-- @\<biblio>@ tag is supported only in Text::Amuse mode. 422biblioTag :: PandocMonad m => MuseParser m (F Blocks) 423biblioTag = fmap (B.divWith ("", ["biblio"], [])) . snd 424 <$ guardEnabled Ext_amuse 425 <*> parseHtmlContent "biblio" 426 427-- | Parse @\<play>@ tag, the result is the same as @\<div class="play">@. 428-- @\<play>@ tag is supported only in Text::Amuse mode. 429playTag :: PandocMonad m => MuseParser m (F Blocks) 430playTag = do 431 guardEnabled Ext_amuse 432 fmap (B.divWith ("", ["play"], [])) . snd <$> parseHtmlContent "play" 433 434verseLine :: PandocMonad m => MuseParser m (F Inlines) 435verseLine = (<>) 436 <$> fmap pure (option mempty (B.str <$> many1Char ('\160' <$ char ' '))) 437 <*> fmap (trimInlinesF . mconcat) (manyTill inline' eol) 438 439-- | Parse @\<verse>@ tag. 440verseTag :: PandocMonad m => MuseParser m (F Blocks) 441verseTag = try $ getIndent >>= \indent -> fmap B.lineBlock . sequence 442 <$ openTag "verse" 443 <* manyTill spaceChar eol 444 <*> manyTill (indentWith indent *> verseLine) (try $ indentWith indent *> closeTag "verse") 445 <* manyTill spaceChar eol 446 447-- | Parse @\<comment>@ tag. 448commentTag :: PandocMonad m => MuseParser m (F Blocks) 449commentTag = try $ mempty 450 <$ many spaceChar 451 <* openTag "comment" 452 <* manyTill anyChar (closeTag "comment") 453 <* manyTill spaceChar eol 454 455-- | Parse paragraph contents. 456paraContentsUntil :: PandocMonad m 457 => MuseParser m a -- ^ Terminator parser 458 -> MuseParser m (F Inlines, a) 459paraContentsUntil end = first (trimInlinesF . mconcat) 460 <$> manyUntil inline (try (manyTill spaceChar eol *> local (\s -> s { museInPara = True}) end)) 461 462-- | Parse a paragraph. 463paraUntil :: PandocMonad m 464 => MuseParser m a -- ^ Terminator parser 465 -> MuseParser m (F Blocks, a) 466paraUntil end = do 467 inPara <- asks museInPara 468 guard $ not inPara 469 first (fmap B.para) <$> paraContentsUntil end 470 471noteMarker' :: PandocMonad m 472 => Char 473 -> Char 474 -> MuseParser m Text 475noteMarker' l r = try $ (\x y -> T.pack $ l:x:y ++ [r]) 476 <$ char l 477 <*> oneOf "123456789" 478 <*> manyTill digit (char r) 479 480noteMarker :: PandocMonad m => MuseParser m Text 481noteMarker = noteMarker' '[' ']' <|> noteMarker' '{' '}' 482 483addNote :: PandocMonad m 484 => Text 485 -> SourcePos 486 -> F Blocks 487 -> MuseParser m () 488addNote ref pos content = do 489 oldnotes <- museNotes <$> getState 490 when (M.member ref oldnotes) 491 (logMessage $ DuplicateNoteReference ref pos) 492 updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes } 493 494-- Amusewiki version of note 495-- Parsing is similar to list item, except that note marker is used instead of list marker 496amuseNoteBlockUntil :: PandocMonad m 497 => MuseParser m a 498 -> MuseParser m (F Blocks, a) 499amuseNoteBlockUntil end = try $ do 500 guardEnabled Ext_amuse 501 ref <- noteMarker 502 pos <- getPosition 503 void spaceChar <|> lookAhead eol 504 (content, e) <- allowPara $ listItemContentsUntil (sourceColumn pos) (Prelude.fail "x") end 505 addNote ref pos content 506 return (mempty, e) 507 508-- Emacs version of note 509-- Notes are allowed only at the end of text, no indentation is required. 510emacsNoteBlock :: PandocMonad m => MuseParser m (F Blocks) 511emacsNoteBlock = try $ do 512 guardDisabled Ext_amuse 513 ref <- noteMarker 514 pos <- getPosition 515 content <- fmap mconcat blocksTillNote 516 addNote ref pos content 517 return mempty 518 where 519 blocksTillNote = 520 many1Till parseBlock (eof <|> () <$ lookAhead noteMarker) 521 522-- 523-- Verse markup 524-- 525 526-- | Parse a line block indicated by @\'>\'@ characters. 527lineBlock :: PandocMonad m => MuseParser m (F Blocks) 528lineBlock = try $ getIndent >>= \indent -> fmap B.lineBlock . sequence 529 <$> (blankVerseLine <|> nonblankVerseLine) `sepBy1'` try (indentWith indent) 530 where 531 blankVerseLine = try $ mempty <$ char '>' <* blankline 532 nonblankVerseLine = try (string "> ") *> verseLine 533 534-- *** List parsers 535 536bulletListItemsUntil :: PandocMonad m 537 => Int -- ^ Indentation 538 -> MuseParser m a -- ^ Terminator parser 539 -> MuseParser m ([F Blocks], a) 540bulletListItemsUntil indent end = try $ do 541 char '-' 542 void spaceChar <|> lookAhead eol 543 (x, (xs, e)) <- allowPara $ listItemContentsUntil (indent + 2) (try (optional blankline *> indentWith indent *> bulletListItemsUntil indent end)) (([],) <$> end) 544 return (x:xs, e) 545 546-- | Parse a bullet list. 547bulletListUntil :: PandocMonad m 548 => MuseParser m a 549 -> MuseParser m (F Blocks, a) 550bulletListUntil end = try $ do 551 indent <- getIndent 552 guard $ indent /= 0 553 first (fmap B.bulletList . sequence) <$> bulletListItemsUntil indent end 554 555museOrderedListMarker :: PandocMonad m 556 => ListNumberStyle 557 -> MuseParser m Int 558museOrderedListMarker style = 559 snd <$> p <* char '.' 560 where p = case style of 561 Decimal -> decimal 562 UpperRoman -> upperRoman 563 LowerRoman -> lowerRoman 564 UpperAlpha -> upperAlpha 565 LowerAlpha -> lowerAlpha 566 _ -> Prelude.fail "Unhandled case" 567 568orderedListItemsUntil :: PandocMonad m 569 => Int 570 -> ListNumberStyle 571 -> MuseParser m a 572 -> MuseParser m ([F Blocks], a) 573orderedListItemsUntil indent style end = 574 continuation 575 where 576 continuation = try $ do 577 pos <- getPosition 578 void spaceChar <|> lookAhead eol 579 (x, (xs, e)) <- allowPara $ listItemContentsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> museOrderedListMarker style *> continuation)) (([],) <$> end) 580 return (x:xs, e) 581 582-- | Parse an ordered list. 583orderedListUntil :: PandocMonad m 584 => MuseParser m a 585 -> MuseParser m (F Blocks, a) 586orderedListUntil end = try $ do 587 indent <- getIndent 588 guard $ indent /= 0 589 (style, start) <- decimal <|> lowerRoman <|> upperRoman <|> lowerAlpha <|> upperAlpha 590 char '.' 591 first (fmap (B.orderedListWith (start, style, Period)) . sequence) 592 <$> orderedListItemsUntil indent style end 593 594descriptionsUntil :: PandocMonad m 595 => Int 596 -> MuseParser m a 597 -> MuseParser m ([F Blocks], a) 598descriptionsUntil indent end = do 599 void spaceChar <|> lookAhead eol 600 (x, (xs, e)) <- allowPara $ listItemContentsUntil indent (try (optional blankline *> indentWith indent *> manyTill spaceChar (string "::") *> descriptionsUntil indent end)) (([],) <$> end) 601 return (x:xs, e) 602 603definitionListItemsUntil :: PandocMonad m 604 => Int 605 -> MuseParser m a 606 -> MuseParser m ([F (Inlines, [Blocks])], a) 607definitionListItemsUntil indent end = 608 continuation 609 where 610 continuation = try $ do 611 pos <- getPosition 612 term <- trimInlinesF . mconcat <$> manyTill inline' (try $ string "::") 613 (x, (xs, e)) <- descriptionsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> continuation) <|> (([],) <$> end)) 614 let xx = (,) <$> term <*> sequence x 615 return (xx:xs, e) 616 617-- | Parse a definition list. 618definitionListUntil :: PandocMonad m 619 => MuseParser m a -- ^ Terminator parser 620 -> MuseParser m (F Blocks, a) 621definitionListUntil end = try $ do 622 indent <- getIndent 623 guardDisabled Ext_amuse <|> guard (indent /= 0) -- Initial space is required by Amusewiki, but not Emacs Muse 624 first (fmap B.definitionList . sequence) <$> definitionListItemsUntil indent end 625 626anyListUntil :: PandocMonad m 627 => MuseParser m a -- ^ Terminator parser 628 -> MuseParser m (F Blocks, a) 629anyListUntil end = 630 bulletListUntil end <|> orderedListUntil end <|> definitionListUntil end 631 632-- *** Table parsers 633 634-- | Internal Muse table representation. 635data MuseTable = MuseTable 636 { museTableCaption :: Inlines 637 , museTableHeaders :: [[Blocks]] 638 , museTableRows :: [[Blocks]] 639 , museTableFooters :: [[Blocks]] 640 } 641 642data MuseTableElement = MuseHeaderRow [Blocks] 643 | MuseBodyRow [Blocks] 644 | MuseFooterRow [Blocks] 645 | MuseCaption Inlines 646 647museToPandocTable :: MuseTable -> Blocks 648museToPandocTable (MuseTable caption headers body footers) = 649 B.table (B.simpleCaption $ B.plain caption) 650 attrs 651 (TableHead nullAttr $ toHeaderRow headRow) 652 [TableBody nullAttr 0 [] $ map toRow $ rows ++ body ++ footers] 653 (TableFoot nullAttr []) 654 where attrs = (AlignDefault, ColWidthDefault) <$ transpose (headers ++ body ++ footers) 655 (headRow, rows) = fromMaybe ([], []) $ uncons headers 656 toRow = Row nullAttr . map B.simpleCell 657 toHeaderRow l = [toRow l | not (null l)] 658 659museAppendElement :: MuseTableElement 660 -> MuseTable 661 -> MuseTable 662museAppendElement element tbl = 663 case element of 664 MuseHeaderRow row -> tbl{ museTableHeaders = row : museTableHeaders tbl } 665 MuseBodyRow row -> tbl{ museTableRows = row : museTableRows tbl } 666 MuseFooterRow row -> tbl{ museTableFooters = row : museTableFooters tbl } 667 MuseCaption inlines -> tbl{ museTableCaption = inlines } 668 669tableElements :: PandocMonad m => MuseParser m (F [MuseTableElement]) 670tableElements = sequence <$> many1 tableParseElement 671 672elementsToTable :: [MuseTableElement] -> MuseTable 673elementsToTable = foldr museAppendElement emptyTable 674 where emptyTable = MuseTable mempty mempty mempty mempty 675 676museGridPart :: PandocMonad m => MuseParser m Int 677museGridPart = try $ length <$> many1 (char '-') <* char '+' 678 679museGridTableHeader :: PandocMonad m => MuseParser m [Int] 680museGridTableHeader = try $ char '+' *> many1 museGridPart <* manyTill spaceChar eol 681 682museGridTableRow :: PandocMonad m 683 => Int 684 -> [Int] 685 -> MuseParser m (F [Blocks]) 686museGridTableRow indent indices = try $ do 687 lns <- many1 $ try (indentWith indent *> museGridTableRawLine indices) 688 let cols = map (T.unlines . map trimr) $ transpose lns 689 indentWith indent *> museGridTableHeader 690 sequence <$> mapM (parseFromString' parseBlocks) cols 691 692museGridTableRawLine :: PandocMonad m 693 => [Int] 694 -> MuseParser m [Text] 695museGridTableRawLine indices = 696 char '|' *> forM indices (\n -> countChar n anyChar <* char '|') <* manyTill spaceChar eol 697 698museGridTable :: PandocMonad m => MuseParser m (F Blocks) 699museGridTable = try $ do 700 indent <- getIndent 701 indices <- museGridTableHeader 702 fmap rowsToTable . sequence <$> many1 (museGridTableRow indent indices) 703 where rowsToTable rows = B.table B.emptyCaption 704 attrs 705 (TableHead nullAttr []) 706 [TableBody nullAttr 0 [] $ map toRow rows] 707 (TableFoot nullAttr []) 708 where attrs = (AlignDefault, ColWidthDefault) <$ transpose rows 709 toRow = Row nullAttr . map B.simpleCell 710 711-- | Parse a table. 712table :: PandocMonad m => MuseParser m (F Blocks) 713table = try $ fmap (museToPandocTable . elementsToTable) <$> tableElements 714 715tableParseElement :: PandocMonad m => MuseParser m (F MuseTableElement) 716tableParseElement = tableParseHeader 717 <|> tableParseBody 718 <|> tableParseFooter 719 <|> tableParseCaption 720 721tableParseRow :: PandocMonad m 722 => Int -- ^ Number of separator characters 723 -> MuseParser m (F [Blocks]) 724tableParseRow n = try $ sequence <$> tableCells 725 where tableCells = (:) <$> tableCell sep <*> (tableCells <|> fmap pure (tableCell eol)) 726 tableCell p = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline' p 727 sep = try $ many1 spaceChar *> count n (char '|') *> lookAhead (void (many1 spaceChar) <|> void eol) 728 729-- | Parse a table header row. 730tableParseHeader :: PandocMonad m => MuseParser m (F MuseTableElement) 731tableParseHeader = fmap MuseHeaderRow <$> tableParseRow 2 732 733-- | Parse a table body row. 734tableParseBody :: PandocMonad m => MuseParser m (F MuseTableElement) 735tableParseBody = fmap MuseBodyRow <$> tableParseRow 1 736 737-- | Parse a table footer row. 738tableParseFooter :: PandocMonad m => MuseParser m (F MuseTableElement) 739tableParseFooter = fmap MuseFooterRow <$> tableParseRow 3 740 741-- | Parse table caption. 742tableParseCaption :: PandocMonad m => MuseParser m (F MuseTableElement) 743tableParseCaption = try $ fmap MuseCaption . trimInlinesF . mconcat 744 <$ many spaceChar 745 <* string "|+" 746 <*> many1Till inline (try $ string "+|" *> eol) 747 748-- ** Inline parsers 749 750inline' :: PandocMonad m => MuseParser m (F Inlines) 751inline' = whitespace 752 <|> br 753 <|> anchor 754 <|> footnote 755 <|> strongEmph 756 <|> strong 757 <|> strongTag 758 <|> emph 759 <|> emphTag 760 <|> underlined 761 <|> superscriptTag 762 <|> subscriptTag 763 <|> strikeoutTag 764 <|> verbatimTag 765 <|> classTag 766 <|> inlineRtl 767 <|> inlineLtr 768 <|> nbsp 769 <|> linkOrImage 770 <|> code 771 <|> codeTag 772 <|> mathTag 773 <|> inlineLiteralTag 774 <|> str 775 <|> asterisks 776 <|> symbol 777 <?> "inline" 778 779inline :: PandocMonad m => MuseParser m (F Inlines) 780inline = endline <|> inline' 781 782-- | Parse a soft break. 783endline :: PandocMonad m => MuseParser m (F Inlines) 784endline = try $ pure B.softbreak <$ newline <* notFollowedBy blankline <* updateLastSpacePos 785 786parseAnchor :: PandocMonad m => MuseParser m Text 787parseAnchor = try $ T.cons 788 <$ firstColumn 789 <* char '#' 790 <*> letter 791 <*> manyChar (letter <|> digit <|> char '-') 792 793anchor :: PandocMonad m => MuseParser m (F Inlines) 794anchor = try $ do 795 anchorId <- parseAnchor 796 skipMany spaceChar <|> void newline 797 return $ return $ B.spanWith (anchorId, [], []) mempty 798 799-- | Parse a footnote reference. 800footnote :: PandocMonad m => MuseParser m (F Inlines) 801footnote = try $ do 802 inLink <- asks museInLink 803 guard $ not inLink 804 ref <- noteMarker 805 return $ do 806 notes <- asksF museNotes 807 case M.lookup ref notes of 808 Nothing -> return $ B.str ref 809 Just (_pos, contents) -> do 810 st <- askF 811 let contents' = runF contents st { museNotes = M.delete ref (museNotes st) } 812 return $ B.note contents' 813 814whitespace :: PandocMonad m => MuseParser m (F Inlines) 815whitespace = try $ pure B.space <$ skipMany1 spaceChar <* updateLastSpacePos 816 817-- | Parse @\<br>@ tag. 818br :: PandocMonad m => MuseParser m (F Inlines) 819br = try $ pure B.linebreak <$ string "<br>" 820 821emphasisBetween :: (PandocMonad m, Show a) 822 => MuseParser m a 823 -> MuseParser m (F Inlines) 824emphasisBetween p = try $ trimInlinesF . mconcat 825 <$ atStart 826 <* p 827 <* notFollowedBy space 828 <*> many1Till inline (try $ noSpaceBefore *> p <* notFollowedBy alphaNum) 829 830-- | Parse an inline tag, such as @\<em>@ and @\<strong>@. 831inlineTag :: PandocMonad m 832 => Text -- ^ Tag name 833 -> MuseParser m (F Inlines) 834inlineTag tag = try $ mconcat 835 <$ openTag tag 836 <*> manyTill inline (closeTag tag) 837 838-- | Parse strong emphasis inline markup, indicated by @***@. 839strongEmph :: PandocMonad m => MuseParser m (F Inlines) 840strongEmph = fmap (B.strong . B.emph) <$> emphasisBetween (string "***" <* notFollowedBy (char '*')) 841 842-- | Parse strong inline markup, indicated by @**@. 843strong :: PandocMonad m => MuseParser m (F Inlines) 844strong = fmap B.strong <$> emphasisBetween (string "**" <* notFollowedBy (char '*')) 845 846-- | Parse emphasis inline markup, indicated by @*@. 847emph :: PandocMonad m => MuseParser m (F Inlines) 848emph = fmap B.emph <$> emphasisBetween (char '*' <* notFollowedBy (char '*')) 849 850-- | Parse underline inline markup, indicated by @_@. 851-- Supported only in Emacs Muse mode, not Text::Amuse. 852underlined :: PandocMonad m => MuseParser m (F Inlines) 853underlined = fmap underline 854 <$ guardDisabled Ext_amuse -- Supported only by Emacs Muse 855 <*> emphasisBetween (char '_') 856 857-- | Parse @\<strong>@ tag. 858strongTag :: PandocMonad m => MuseParser m (F Inlines) 859strongTag = fmap B.strong <$> inlineTag "strong" 860 861-- | Parse @\<em>@ tag. 862emphTag :: PandocMonad m => MuseParser m (F Inlines) 863emphTag = fmap B.emph <$> inlineTag "em" 864 865-- | Parse @\<sup>@ tag. 866superscriptTag :: PandocMonad m => MuseParser m (F Inlines) 867superscriptTag = fmap B.superscript <$> inlineTag "sup" 868 869-- | Parse @\<sub>@ tag. 870subscriptTag :: PandocMonad m => MuseParser m (F Inlines) 871subscriptTag = fmap B.subscript <$> inlineTag "sub" 872 873-- | Parse @\<del>@ tag. 874strikeoutTag :: PandocMonad m => MuseParser m (F Inlines) 875strikeoutTag = fmap B.strikeout <$> inlineTag "del" 876 877-- | Parse @\<verbatim>@ tag. 878verbatimTag :: PandocMonad m => MuseParser m (F Inlines) 879verbatimTag = return . B.text 880 <$ openTag "verbatim" 881 <*> manyTillChar anyChar (closeTag "verbatim") 882 883-- | Parse @\<class>@ tag. 884classTag :: PandocMonad m => MuseParser m (F Inlines) 885classTag = do 886 classes <- maybe [] T.words . lookup "name" <$> openTag "class" 887 fmap (B.spanWith ("", classes, [])) . mconcat <$> manyTill inline (closeTag "class") 888 889-- | Parse @\<\<\<RTL>>>@ text. 890inlineRtl :: PandocMonad m => MuseParser m (F Inlines) 891inlineRtl = try $ 892 fmap (B.spanWith ("", [], [("dir", "rtl")])) . mconcat <$ string "<<<" <*> manyTill inline (string ">>>") 893 894-- | Parse @\<\<\<LTR>>>@ text. 895inlineLtr :: PandocMonad m => MuseParser m (F Inlines) 896inlineLtr = try $ 897 fmap (B.spanWith ("", [], [("dir", "ltr")])) . mconcat <$ string ">>>" <*> manyTill inline (string "<<<") 898 899-- | Parse "~~" as nonbreaking space. 900nbsp :: PandocMonad m => MuseParser m (F Inlines) 901nbsp = try $ pure (B.str "\160") <$ string "~~" 902 903-- | Parse code markup, indicated by @\'=\'@ characters. 904code :: PandocMonad m => MuseParser m (F Inlines) 905code = try $ fmap pure $ B.code . uncurry (<>) 906 <$ atStart 907 <* char '=' 908 <* notFollowedBy (spaceChar <|> newline) 909 <*> manyUntilChar (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap T.singleton $ noneOf " \t\n\r=" <* char '=') 910 <* notFollowedBy alphaNum 911 912-- | Parse @\<code>@ tag. 913codeTag :: PandocMonad m => MuseParser m (F Inlines) 914codeTag = fmap pure $ B.codeWith 915 <$> (htmlAttrToPandoc <$> openTag "code") 916 <*> manyTillChar anyChar (closeTag "code") 917 918-- | Parse @\<math>@ tag. 919-- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@ 920mathTag :: PandocMonad m => MuseParser m (F Inlines) 921mathTag = return . B.math 922 <$ openTag "math" 923 <*> manyTillChar anyChar (closeTag "math") 924 925-- | Parse inline @\<literal>@ tag as a raw inline. 926inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines) 927inlineLiteralTag = try $ fmap pure $ B.rawInline 928 <$> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML 929 <*> manyTillChar anyChar (closeTag "literal") 930 931str :: PandocMonad m => MuseParser m (F Inlines) 932str = return . B.str <$> many1Char alphaNum <* updateLastStrPos 933 934-- | Consume asterisks that were not used as emphasis opening. 935-- This prevents series of asterisks from being split into 936-- literal asterisk and emphasis opening. 937asterisks :: PandocMonad m => MuseParser m (F Inlines) 938asterisks = pure . B.str <$> many1Char (char '*') 939 940symbol :: PandocMonad m => MuseParser m (F Inlines) 941symbol = pure . B.str . T.singleton <$> nonspaceChar 942 943-- | Parse a link or image. 944linkOrImage :: PandocMonad m => MuseParser m (F Inlines) 945linkOrImage = try $ link "URL:" <|> image <|> link "" 946 947linkContent :: PandocMonad m => MuseParser m (F Inlines) 948linkContent = trimInlinesF . mconcat 949 <$ char '[' 950 <*> manyTill inline (char ']') 951 952-- | Parse a link starting with (possibly null) prefix 953link :: PandocMonad m => Text -> MuseParser m (F Inlines) 954link prefix = try $ do 955 inLink <- asks museInLink 956 guard $ not inLink 957 textStr $ "[[" <> prefix 958 url <- manyTillChar anyChar $ char ']' 959 content <- option (pure $ B.str url) (local (\s -> s { museInLink = True }) linkContent) 960 char ']' 961 return $ B.link url "" <$> content 962 963image :: PandocMonad m => MuseParser m (F Inlines) 964image = try $ do 965 string "[[" 966 (url, (ext, width, align)) <- manyUntilChar (noneOf "]") (imageExtensionAndOptions <* char ']') 967 content <- option mempty linkContent 968 char ']' 969 let widthAttr = case align of 970 Just 'f' -> [("width", fromMaybe "100" width <> "%"), ("height", "75%")] 971 _ -> maybeToList (("width",) . (<> "%") <$> width) 972 let alignClass = case align of 973 Just 'r' -> ["align-right"] 974 Just 'l' -> ["align-left"] 975 Just 'f' -> [] 976 _ -> [] 977 return $ B.imageWith ("", alignClass, widthAttr) (url <> ext) mempty <$> content 978 where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el 979 imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] 980 imageExtension = choice (try . textStr <$> imageExtensions) 981 imageExtensionAndOptions = do 982 ext <- imageExtension 983 (width, align) <- option (Nothing, Nothing) imageAttrs 984 return (ext, width, align) 985 imageAttrs = (,) 986 <$ many1 spaceChar 987 <*> optionMaybe (many1Char digit) 988 <* many spaceChar 989 <*> optionMaybe (oneOf "rlf") 990