1{-# LANGUAGE FlexibleContexts  #-}
2{-# LANGUAGE TupleSections     #-}
3{-# LANGUAGE OverloadedStrings #-}
4{- |
5   Module      : Text.Pandoc.Readers.Muse
6   Copyright   : Copyright (C) 2017-2020 Alexander Krotov
7   License     : GNU GPL, version 2 or above
8
9   Maintainer  : Alexander Krotov <ilabdsf@gmail.com>
10   Stability   : alpha
11   Portability : portable
12
13Conversion of Muse text to 'Pandoc' document.
14-}
15{-
16TODO:
17- <cite> tag
18-}
19module Text.Pandoc.Readers.Muse (readMuse) where
20
21import Control.Monad
22import Control.Monad.Reader
23import Control.Monad.Except (throwError)
24import Data.Bifunctor
25import Data.Default
26import Data.List (transpose, uncons)
27import qualified Data.Map as M
28import qualified Data.Set as Set
29import Data.Maybe (fromMaybe, isNothing, maybeToList)
30import Data.Text (Text)
31import qualified Data.Text as T
32import Text.Pandoc.Builder (Blocks, Inlines, underline)
33import qualified Text.Pandoc.Builder as B
34import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
35import Text.Pandoc.Definition
36import Text.Pandoc.Error (PandocError (PandocParsecError))
37import Text.Pandoc.Logging
38import Text.Pandoc.Options
39import Text.Pandoc.Parsing
40import Text.Pandoc.Shared (trimr, tshow)
41
42-- | Read Muse from an input string and return a Pandoc document.
43readMuse :: (PandocMonad m, ToSources a)
44         => ReaderOptions
45         -> a
46         -> m Pandoc
47readMuse opts s = do
48  let sources = toSources s
49  res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts }
50              (initialSourceName sources) sources
51  case res of
52       Left e  -> throwError $ PandocParsecError sources e
53       Right d -> return d
54
55type F = Future MuseState
56
57data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata
58                           , museOptions :: ReaderOptions
59                           , museIdentifierList :: Set.Set Text
60                           , museLastSpacePos :: Maybe SourcePos -- ^ Position after last space or newline parsed
61                           , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed
62                           , museLogMessages :: [LogMessage]
63                           , museNotes :: M.Map Text (SourcePos, F Blocks)
64                           }
65
66instance Default MuseState where
67  def = MuseState { museMeta = return nullMeta
68                  , museOptions = def
69                  , museIdentifierList = Set.empty
70                  , museLastStrPos = Nothing
71                  , museLastSpacePos = Nothing
72                  , museLogMessages = []
73                  , museNotes = M.empty
74                  }
75
76data MuseEnv =
77  MuseEnv { museInLink :: Bool -- ^ True when parsing a link description to avoid nested links
78          , museInPara :: Bool -- ^ True when parsing paragraph is not allowed
79          }
80
81instance Default MuseEnv where
82  def = MuseEnv { museInLink = False
83                , museInPara = False
84                }
85
86type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m)
87
88instance HasReaderOptions MuseState where
89  extractReaderOptions = museOptions
90
91instance HasIdentifierList MuseState where
92  extractIdentifierList     = museIdentifierList
93  updateIdentifierList f st = st{ museIdentifierList = f $ museIdentifierList st }
94
95instance HasLastStrPosition MuseState where
96  setLastStrPos pos st = st{ museLastStrPos = pos }
97  getLastStrPos st     = museLastStrPos st
98
99instance HasLogMessages MuseState where
100  addLogMessage m s = s{ museLogMessages = m : museLogMessages s }
101  getLogMessages = reverse . museLogMessages
102
103updateLastSpacePos :: Monad m => MuseParser m ()
104updateLastSpacePos = getPosition >>= \pos ->
105  updateState $ \s -> s { museLastSpacePos = Just pos }
106
107-- | Parse Muse document
108parseMuse :: PandocMonad m => MuseParser m Pandoc
109parseMuse = do
110  many directive
111  blocks <- (:) <$> parseBlocks <*> many parseSection
112  eof
113  st <- getState
114  runF (Pandoc <$> museMeta st <*> fmap B.toList (mconcat blocks)) st <$ reportLogMessages
115
116-- * Utility functions
117
118-- | Trim up to one newline from the beginning of the string.
119lchop :: Text -> Text
120lchop s = case T.uncons s of
121  Just ('\n', xs) -> xs
122  _               -> s
123
124-- | Trim up to one newline from the end of the string.
125rchop :: Text -> Text
126rchop s = case T.unsnoc s of
127  Just (xs, '\n') -> xs
128  _               -> s
129
130unindent :: Text -> Text
131unindent = rchop . T.intercalate "\n" . dropSpacePrefix . T.splitOn "\n" . lchop
132
133dropSpacePrefix :: [Text] -> [Text]
134dropSpacePrefix lns = T.drop maxIndent <$> lns
135  where isSpaceChar c = c == ' ' || c == '\t'
136        maxIndent = length $ takeWhile (isSpaceChar . T.head) $ takeWhile same $ T.transpose lns
137        same t = case T.uncons t of
138          Just (c, cs) -> T.all (== c) cs
139          Nothing      -> True
140
141atStart :: PandocMonad m => MuseParser m ()
142atStart = do
143  pos <- getPosition
144  st <- getState
145  guard $ museLastStrPos st /= Just pos
146
147noSpaceBefore :: PandocMonad m => MuseParser m ()
148noSpaceBefore = do
149  pos <- getPosition
150  st <- getState
151  guard $ museLastSpacePos st /= Just pos
152
153firstColumn :: PandocMonad m => MuseParser m ()
154firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
155
156-- * Parsers
157
158-- | Parse end-of-line, which can be either a newline or end-of-file.
159eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
160eol = void newline <|> eof
161
162getIndent :: PandocMonad m
163          => MuseParser m Int
164getIndent = subtract 1 . sourceColumn <$ many spaceChar <*> getPosition
165
166-- ** HTML parsers
167
168openTag :: PandocMonad m => Text -> MuseParser m [(Text, Text)]
169openTag tag = try $
170  char '<' *> textStr tag *> manyTill attr (char '>')
171  where
172    attr = try $ (,)
173      <$  many1 spaceChar
174      <*> many1Char (noneOf "=\n")
175      <*  string "=\""
176      <*> manyTillChar (noneOf "\"") (char '"')
177
178closeTag :: PandocMonad m => Text -> MuseParser m ()
179closeTag tag = try $ string "</" *> textStr tag *> void (char '>')
180
181-- | Convert HTML attributes to Pandoc 'Attr'
182htmlAttrToPandoc :: [(Text, Text)] -> Attr
183htmlAttrToPandoc attrs = (ident, classes, keyvals)
184  where
185    ident   = fromMaybe "" $ lookup "id" attrs
186    classes = maybe [] T.words $ lookup "class" attrs
187    keyvals = [(k,v) | (k,v) <- attrs, k /= "id", k /= "class"]
188
189parseHtmlContent :: PandocMonad m
190                 => Text -- ^ Tag name
191                 -> MuseParser m (Attr, F Blocks)
192parseHtmlContent tag = try $ getIndent >>= \indent -> (,)
193  <$> fmap htmlAttrToPandoc (openTag tag)
194  <*  manyTill spaceChar eol
195  <*> allowPara (parseBlocksTill (try $ indentWith indent *> closeTag tag))
196  <*  manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline
197
198-- ** Directive parsers
199
200-- While not documented, Emacs Muse allows "-" in directive name
201parseDirectiveKey :: PandocMonad m => MuseParser m Text
202parseDirectiveKey = char '#' *> manyChar (letter <|> char '-')
203
204parseEmacsDirective :: PandocMonad m => MuseParser m (Text, F Inlines)
205parseEmacsDirective = (,)
206  <$> parseDirectiveKey
207  <*  spaceChar
208  <*> (trimInlinesF . mconcat <$> manyTill inline' eol)
209
210parseAmuseDirective :: PandocMonad m => MuseParser m (Text, F Inlines)
211parseAmuseDirective = (,)
212  <$> parseDirectiveKey
213  <*  many1 spaceChar
214  <*> (trimInlinesF . mconcat <$> many1Till inline endOfDirective)
215  <*  many blankline
216  where
217    endOfDirective = lookAhead $ eof <|> try (newline *> (void blankline <|> void parseDirectiveKey))
218
219directive :: PandocMonad m => MuseParser m ()
220directive = do
221  ext <- getOption readerExtensions
222  (key, value) <- if extensionEnabled Ext_amuse ext then parseAmuseDirective else parseEmacsDirective
223  updateState $ \st -> st { museMeta = B.setMeta (translateKey key) <$> value <*> museMeta st }
224  where translateKey "cover" = "cover-image"
225        translateKey x = x
226
227-- ** Block parsers
228
229allowPara :: MonadReader MuseEnv m => m a -> m a
230allowPara p = local (\s -> s { museInPara = False }) p
231
232-- | Parse section contents until EOF or next header
233parseBlocks :: PandocMonad m
234            => MuseParser m (F Blocks)
235parseBlocks =
236  try (parseEnd <|>
237       nextSection <|>
238       listStart <|>
239       blockStart <|>
240       paraStart)
241  where
242    nextSection = mempty <$ lookAhead headingStart
243    parseEnd = mempty <$ eof
244    blockStart = (B.<>) <$> (blockElements <|> emacsNoteBlock)
245                        <*> allowPara parseBlocks
246    listStart =
247      uncurry (B.<>) <$> allowPara (anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks)
248    paraStart = do
249      indent <- length <$> many spaceChar
250      uncurry (B.<>) . first (p indent) <$> paraUntil parseBlocks
251      where p indent = if indent >= 2 && indent < 6 then fmap B.blockQuote else id
252
253-- | Parse section that starts with a header
254parseSection :: PandocMonad m
255             => MuseParser m (F Blocks)
256parseSection =
257  ((B.<>) <$> emacsHeading <*> parseBlocks) <|>
258  (uncurry (B.<>) <$> amuseHeadingUntil parseBlocks)
259
260parseBlocksTill :: PandocMonad m
261                => MuseParser m a
262                -> MuseParser m (F Blocks)
263parseBlocksTill end = continuation
264  where
265    parseEnd = mempty <$ end
266    blockStart = (B.<>) <$> blockElements <*> allowPara continuation
267    listStart = uncurry (B.<>) <$> allowPara (anyListUntil (parseEnd <|> continuation))
268    paraStart = uncurry (B.<>) <$> paraUntil (parseEnd <|> continuation)
269    continuation = try $ parseEnd <|> listStart <|> blockStart <|> paraStart
270
271listItemContentsUntil :: PandocMonad m
272                      => Int
273                      -> MuseParser m a
274                      -> MuseParser m a
275                      -> MuseParser m (F Blocks, a)
276listItemContentsUntil col pre end = p
277  where
278    p = try listStart <|> try blockStart <|> try paraStart
279    parsePre = (mempty,) <$> pre
280    parseEnd = (mempty,) <$> end
281    paraStart = do
282      (f, (r, e)) <- paraUntil (parsePre <|> continuation <|> parseEnd)
283      return (f B.<> r, e)
284    blockStart = first <$> ((B.<>) <$> blockElements)
285                       <*> allowPara (parsePre <|> continuation <|> parseEnd)
286    listStart = do
287      (f, (r, e)) <- allowPara $ anyListUntil (parsePre <|> continuation <|> parseEnd)
288      return (f B.<> r, e)
289    continuation = try $ do blank <- optionMaybe blankline
290                            skipMany blankline
291                            indentWith col
292                            local (\s -> s { museInPara = museInPara s && isNothing blank }) p
293
294parseBlock :: PandocMonad m => MuseParser m (F Blocks)
295parseBlock = do
296  res <- blockElements <|> para
297  trace (T.take 60 $ tshow $ B.toList $ runF res def)
298  return res
299  where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements)))
300
301blockElements :: PandocMonad m => MuseParser m (F Blocks)
302blockElements = (mempty <$ blankline)
303            <|> comment
304            <|> separator
305            <|> pagebreak
306            <|> example
307            <|> exampleTag
308            <|> literalTag
309            <|> centerTag
310            <|> rightTag
311            <|> quoteTag
312            <|> divTag
313            <|> biblioTag
314            <|> playTag
315            <|> verseTag
316            <|> lineBlock
317            <|> museGridTable
318            <|> table
319            <|> commentTag
320
321-- | Parse a line comment, starting with @;@ in the first column.
322comment :: PandocMonad m => MuseParser m (F Blocks)
323comment = try $ mempty
324  <$ firstColumn
325  <* char ';'
326  <* optional (spaceChar *> many (noneOf "\n"))
327  <* eol
328
329-- | Parse a horizontal rule, consisting of 4 or more @\'-\'@ characters.
330separator :: PandocMonad m => MuseParser m (F Blocks)
331separator = try $ pure B.horizontalRule
332  <$ string "----"
333  <* many (char '-')
334  <* many spaceChar
335  <* eol
336
337-- | Parse a page break
338pagebreak :: PandocMonad m => MuseParser m (F Blocks)
339pagebreak = try $ pure (B.divWith ("", [], [("style", "page-break-before: always;")]) mempty)
340  <$ count 6 spaceChar
341  <* many spaceChar
342  <* string "* * * * *"
343  <* manyTill spaceChar eol
344
345headingStart :: PandocMonad m => MuseParser m (Text, Int)
346headingStart = try $ (,)
347  <$> option "" (try (parseAnchor <* manyTill spaceChar eol))
348  <*  firstColumn
349  <*> fmap length (many1 $ char '*')
350  <*  spaceChar
351
352-- | Parse a single-line heading.
353emacsHeading :: PandocMonad m => MuseParser m (F Blocks)
354emacsHeading = try $ do
355  guardDisabled Ext_amuse
356  (anchorId, level) <- headingStart
357  content <- trimInlinesF . mconcat <$> manyTill inline eol
358  attr <- registerHeader (anchorId, [], []) (runF content def)
359  return $ B.headerWith attr level <$> content
360
361-- | Parse a multi-line heading.
362-- It is a Text::Amuse extension, Emacs Muse does not allow heading to span multiple lines.
363amuseHeadingUntil :: PandocMonad m
364                  => MuseParser m a -- ^ Terminator parser
365                  -> MuseParser m (F Blocks, a)
366amuseHeadingUntil end = try $ do
367  guardEnabled Ext_amuse
368  (anchorId, level) <- headingStart
369  (content, e) <- paraContentsUntil end
370  attr <- registerHeader (anchorId, [], []) (runF content def)
371  return (B.headerWith attr level <$> content, e)
372
373-- | Parse an example between @{{{@ and @}}}@.
374-- It is an Amusewiki extension influenced by Creole wiki, as described in @Text::Amuse@ documentation.
375example :: PandocMonad m => MuseParser m (F Blocks)
376example = try $ pure . B.codeBlock
377  <$  string "{{{"
378  <*  many spaceChar
379  <*> (unindent <$> manyTillChar anyChar (string "}}}"))
380
381-- | Parse an @\<example>@ tag.
382exampleTag :: PandocMonad m => MuseParser m (F Blocks)
383exampleTag = try $ fmap pure $ B.codeBlockWith
384  <$  many spaceChar
385  <*> (htmlAttrToPandoc <$> openTag "example")
386  <*> (unindent <$> manyTillChar anyChar (closeTag "example"))
387  <*  manyTill spaceChar eol
388
389-- | Parse a @\<literal>@ tag as a raw block.
390-- For 'RawInline' @\<literal>@ parser, see 'inlineLiteralTag'.
391literalTag :: PandocMonad m => MuseParser m (F Blocks)
392literalTag = try $ fmap pure $ B.rawBlock
393  <$  many spaceChar
394  <*> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
395  <*  manyTill spaceChar eol
396  <*> (unindent <$> manyTillChar anyChar (closeTag "literal"))
397  <*  manyTill spaceChar eol
398
399-- | Parse @\<center>@ tag.
400-- Currently it is ignored as Pandoc cannot represent centered blocks.
401centerTag :: PandocMonad m => MuseParser m (F Blocks)
402centerTag = snd <$> parseHtmlContent "center"
403
404-- | Parse @\<right>@ tag.
405-- Currently it is ignored as Pandoc cannot represent centered blocks.
406rightTag :: PandocMonad m => MuseParser m (F Blocks)
407rightTag = snd <$> parseHtmlContent "right"
408
409-- | Parse @\<quote>@ tag.
410quoteTag :: PandocMonad m => MuseParser m (F Blocks)
411quoteTag = fmap B.blockQuote . snd <$> parseHtmlContent "quote"
412
413-- | Parse @\<div>@ tag.
414-- @\<div>@ tag is supported by Emacs Muse, but not Amusewiki 2.025.
415divTag :: PandocMonad m => MuseParser m (F Blocks)
416divTag = do
417  (attrs, content) <- parseHtmlContent "div"
418  return $ B.divWith attrs <$> content
419
420-- | Parse @\<biblio>@ tag, the result is the same as @\<div class="biblio">@.
421-- @\<biblio>@ tag is supported only in Text::Amuse mode.
422biblioTag :: PandocMonad m => MuseParser m (F Blocks)
423biblioTag = fmap (B.divWith ("", ["biblio"], [])) . snd
424  <$  guardEnabled Ext_amuse
425  <*> parseHtmlContent "biblio"
426
427-- | Parse @\<play>@ tag, the result is the same as @\<div class="play">@.
428-- @\<play>@ tag is supported only in Text::Amuse mode.
429playTag :: PandocMonad m => MuseParser m (F Blocks)
430playTag = do
431  guardEnabled Ext_amuse
432  fmap (B.divWith ("", ["play"], [])) . snd <$> parseHtmlContent "play"
433
434verseLine :: PandocMonad m => MuseParser m (F Inlines)
435verseLine = (<>)
436  <$> fmap pure (option mempty (B.str <$> many1Char ('\160' <$ char ' ')))
437  <*> fmap (trimInlinesF . mconcat) (manyTill inline' eol)
438
439-- | Parse @\<verse>@ tag.
440verseTag :: PandocMonad m => MuseParser m (F Blocks)
441verseTag = try $ getIndent >>= \indent -> fmap B.lineBlock . sequence
442  <$  openTag "verse"
443  <*  manyTill spaceChar eol
444  <*> manyTill (indentWith indent *> verseLine) (try $ indentWith indent *> closeTag "verse")
445  <*  manyTill spaceChar eol
446
447-- | Parse @\<comment>@ tag.
448commentTag :: PandocMonad m => MuseParser m (F Blocks)
449commentTag = try $ mempty
450  <$ many spaceChar
451  <* openTag "comment"
452  <* manyTill anyChar (closeTag "comment")
453  <* manyTill spaceChar eol
454
455-- | Parse paragraph contents.
456paraContentsUntil :: PandocMonad m
457                  => MuseParser m a -- ^ Terminator parser
458                  -> MuseParser m (F Inlines, a)
459paraContentsUntil end = first (trimInlinesF . mconcat)
460  <$> manyUntil inline (try (manyTill spaceChar eol *> local (\s -> s { museInPara = True}) end))
461
462-- | Parse a paragraph.
463paraUntil :: PandocMonad m
464          => MuseParser m a -- ^ Terminator parser
465          -> MuseParser m (F Blocks, a)
466paraUntil end = do
467  inPara <- asks museInPara
468  guard $ not inPara
469  first (fmap B.para) <$> paraContentsUntil end
470
471noteMarker' :: PandocMonad m
472            => Char
473            -> Char
474            -> MuseParser m Text
475noteMarker' l r = try $ (\x y -> T.pack $ l:x:y ++ [r])
476  <$ char l
477  <*> oneOf "123456789"
478  <*> manyTill digit (char r)
479
480noteMarker :: PandocMonad m => MuseParser m Text
481noteMarker = noteMarker' '[' ']' <|> noteMarker' '{' '}'
482
483addNote :: PandocMonad m
484        => Text
485        -> SourcePos
486        -> F Blocks
487        -> MuseParser m ()
488addNote ref pos content = do
489  oldnotes <- museNotes <$> getState
490  when (M.member ref oldnotes)
491    (logMessage $ DuplicateNoteReference ref pos)
492  updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes }
493
494-- Amusewiki version of note
495-- Parsing is similar to list item, except that note marker is used instead of list marker
496amuseNoteBlockUntil :: PandocMonad m
497                    => MuseParser m a
498                    -> MuseParser m (F Blocks, a)
499amuseNoteBlockUntil end = try $ do
500  guardEnabled Ext_amuse
501  ref <- noteMarker
502  pos <- getPosition
503  void spaceChar <|> lookAhead eol
504  (content, e) <- allowPara $ listItemContentsUntil (sourceColumn pos) (Prelude.fail "x") end
505  addNote ref pos content
506  return (mempty, e)
507
508-- Emacs version of note
509-- Notes are allowed only at the end of text, no indentation is required.
510emacsNoteBlock :: PandocMonad m => MuseParser m (F Blocks)
511emacsNoteBlock = try $ do
512  guardDisabled Ext_amuse
513  ref <- noteMarker
514  pos <- getPosition
515  content <- fmap mconcat blocksTillNote
516  addNote ref pos content
517  return mempty
518  where
519    blocksTillNote =
520      many1Till parseBlock (eof <|> () <$ lookAhead noteMarker)
521
522--
523-- Verse markup
524--
525
526-- | Parse a line block indicated by @\'>\'@ characters.
527lineBlock :: PandocMonad m => MuseParser m (F Blocks)
528lineBlock = try $ getIndent >>= \indent -> fmap B.lineBlock . sequence
529  <$> (blankVerseLine <|> nonblankVerseLine) `sepBy1'` try (indentWith indent)
530  where
531    blankVerseLine = try $ mempty <$ char '>' <* blankline
532    nonblankVerseLine = try (string "> ") *> verseLine
533
534-- *** List parsers
535
536bulletListItemsUntil :: PandocMonad m
537                     => Int -- ^ Indentation
538                     -> MuseParser m a -- ^ Terminator parser
539                     -> MuseParser m ([F Blocks], a)
540bulletListItemsUntil indent end = try $ do
541  char '-'
542  void spaceChar <|> lookAhead eol
543  (x, (xs, e)) <- allowPara $ listItemContentsUntil (indent + 2) (try (optional blankline *> indentWith indent *> bulletListItemsUntil indent end)) (([],) <$> end)
544  return (x:xs, e)
545
546-- | Parse a bullet list.
547bulletListUntil :: PandocMonad m
548                => MuseParser m a
549                -> MuseParser m (F Blocks, a)
550bulletListUntil end = try $ do
551  indent <- getIndent
552  guard $ indent /= 0
553  first (fmap B.bulletList . sequence) <$> bulletListItemsUntil indent end
554
555museOrderedListMarker :: PandocMonad m
556                      => ListNumberStyle
557                      -> MuseParser m Int
558museOrderedListMarker style =
559  snd <$> p <* char '.'
560  where p = case style of
561              Decimal    -> decimal
562              UpperRoman -> upperRoman
563              LowerRoman -> lowerRoman
564              UpperAlpha -> upperAlpha
565              LowerAlpha -> lowerAlpha
566              _          -> Prelude.fail "Unhandled case"
567
568orderedListItemsUntil :: PandocMonad m
569                      => Int
570                      -> ListNumberStyle
571                      -> MuseParser m a
572                      -> MuseParser m ([F Blocks], a)
573orderedListItemsUntil indent style end =
574  continuation
575  where
576    continuation = try $ do
577      pos <- getPosition
578      void spaceChar <|> lookAhead eol
579      (x, (xs, e)) <- allowPara $ listItemContentsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> museOrderedListMarker style *> continuation)) (([],) <$> end)
580      return (x:xs, e)
581
582-- | Parse an ordered list.
583orderedListUntil :: PandocMonad m
584                 => MuseParser m a
585                 -> MuseParser m (F Blocks, a)
586orderedListUntil end = try $ do
587  indent <- getIndent
588  guard $ indent /= 0
589  (style, start) <- decimal <|> lowerRoman <|> upperRoman <|> lowerAlpha <|> upperAlpha
590  char '.'
591  first (fmap (B.orderedListWith (start, style, Period)) . sequence)
592    <$> orderedListItemsUntil indent style end
593
594descriptionsUntil :: PandocMonad m
595                  => Int
596                  -> MuseParser m a
597                  -> MuseParser m ([F Blocks], a)
598descriptionsUntil indent end = do
599  void spaceChar <|> lookAhead eol
600  (x, (xs, e)) <- allowPara $ listItemContentsUntil indent (try (optional blankline *> indentWith indent *> manyTill spaceChar (string "::") *> descriptionsUntil indent end)) (([],) <$> end)
601  return (x:xs, e)
602
603definitionListItemsUntil :: PandocMonad m
604                         => Int
605                         -> MuseParser m a
606                         -> MuseParser m ([F (Inlines, [Blocks])], a)
607definitionListItemsUntil indent end =
608  continuation
609  where
610    continuation = try $ do
611      pos <- getPosition
612      term <- trimInlinesF . mconcat <$> manyTill inline' (try $ string "::")
613      (x, (xs, e)) <- descriptionsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> continuation) <|> (([],) <$> end))
614      let xx = (,) <$> term <*> sequence x
615      return (xx:xs, e)
616
617-- | Parse a definition list.
618definitionListUntil :: PandocMonad m
619                    => MuseParser m a -- ^ Terminator parser
620                    -> MuseParser m (F Blocks, a)
621definitionListUntil end = try $ do
622  indent <- getIndent
623  guardDisabled Ext_amuse <|> guard (indent /= 0) -- Initial space is required by Amusewiki, but not Emacs Muse
624  first (fmap B.definitionList . sequence) <$> definitionListItemsUntil indent end
625
626anyListUntil :: PandocMonad m
627             => MuseParser m a -- ^ Terminator parser
628             -> MuseParser m (F Blocks, a)
629anyListUntil end =
630  bulletListUntil end <|> orderedListUntil end <|> definitionListUntil end
631
632-- *** Table parsers
633
634-- | Internal Muse table representation.
635data MuseTable = MuseTable
636  { museTableCaption :: Inlines
637  , museTableHeaders :: [[Blocks]]
638  , museTableRows    :: [[Blocks]]
639  , museTableFooters :: [[Blocks]]
640  }
641
642data MuseTableElement = MuseHeaderRow [Blocks]
643                      | MuseBodyRow [Blocks]
644                      | MuseFooterRow [Blocks]
645                      | MuseCaption Inlines
646
647museToPandocTable :: MuseTable -> Blocks
648museToPandocTable (MuseTable caption headers body footers) =
649  B.table (B.simpleCaption $ B.plain caption)
650          attrs
651          (TableHead nullAttr $ toHeaderRow headRow)
652          [TableBody nullAttr 0 [] $ map toRow $ rows ++ body ++ footers]
653          (TableFoot nullAttr [])
654  where attrs = (AlignDefault, ColWidthDefault) <$ transpose (headers ++ body ++ footers)
655        (headRow, rows) = fromMaybe ([], []) $ uncons headers
656        toRow = Row nullAttr . map B.simpleCell
657        toHeaderRow l = [toRow l | not (null l)]
658
659museAppendElement :: MuseTableElement
660                  -> MuseTable
661                  -> MuseTable
662museAppendElement element tbl =
663  case element of
664    MuseHeaderRow row -> tbl{ museTableHeaders = row : museTableHeaders tbl }
665    MuseBodyRow row -> tbl{ museTableRows = row : museTableRows tbl }
666    MuseFooterRow row -> tbl{ museTableFooters = row : museTableFooters tbl }
667    MuseCaption inlines -> tbl{ museTableCaption = inlines }
668
669tableElements :: PandocMonad m => MuseParser m (F [MuseTableElement])
670tableElements = sequence <$> many1 tableParseElement
671
672elementsToTable :: [MuseTableElement] -> MuseTable
673elementsToTable = foldr museAppendElement emptyTable
674  where emptyTable = MuseTable mempty mempty mempty mempty
675
676museGridPart :: PandocMonad m => MuseParser m Int
677museGridPart = try $ length <$> many1 (char '-') <* char '+'
678
679museGridTableHeader :: PandocMonad m => MuseParser m [Int]
680museGridTableHeader = try $ char '+' *> many1 museGridPart <* manyTill spaceChar eol
681
682museGridTableRow :: PandocMonad m
683                 => Int
684                 -> [Int]
685                 -> MuseParser m (F [Blocks])
686museGridTableRow indent indices = try $ do
687  lns <- many1 $ try (indentWith indent *> museGridTableRawLine indices)
688  let cols = map (T.unlines . map trimr) $ transpose lns
689  indentWith indent *> museGridTableHeader
690  sequence <$> mapM (parseFromString' parseBlocks) cols
691
692museGridTableRawLine :: PandocMonad m
693                     => [Int]
694                     -> MuseParser m [Text]
695museGridTableRawLine indices =
696  char '|' *> forM indices (\n -> countChar n anyChar <* char '|') <* manyTill spaceChar eol
697
698museGridTable :: PandocMonad m => MuseParser m (F Blocks)
699museGridTable = try $ do
700  indent <- getIndent
701  indices <- museGridTableHeader
702  fmap rowsToTable . sequence <$> many1 (museGridTableRow indent indices)
703  where rowsToTable rows = B.table B.emptyCaption
704                                   attrs
705                                   (TableHead nullAttr [])
706                                   [TableBody nullAttr 0 [] $ map toRow rows]
707                                   (TableFoot nullAttr [])
708                           where attrs = (AlignDefault, ColWidthDefault) <$ transpose rows
709                                 toRow = Row nullAttr . map B.simpleCell
710
711-- | Parse a table.
712table :: PandocMonad m => MuseParser m (F Blocks)
713table = try $ fmap (museToPandocTable . elementsToTable) <$> tableElements
714
715tableParseElement :: PandocMonad m => MuseParser m (F MuseTableElement)
716tableParseElement = tableParseHeader
717                <|> tableParseBody
718                <|> tableParseFooter
719                <|> tableParseCaption
720
721tableParseRow :: PandocMonad m
722              => Int -- ^ Number of separator characters
723              -> MuseParser m (F [Blocks])
724tableParseRow n = try $ sequence <$> tableCells
725  where tableCells = (:) <$> tableCell sep <*> (tableCells <|> fmap pure (tableCell eol))
726        tableCell p = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline' p
727        sep = try $ many1 spaceChar *> count n (char '|') *> lookAhead (void (many1 spaceChar) <|> void eol)
728
729-- | Parse a table header row.
730tableParseHeader :: PandocMonad m => MuseParser m (F MuseTableElement)
731tableParseHeader = fmap MuseHeaderRow <$> tableParseRow 2
732
733-- | Parse a table body row.
734tableParseBody :: PandocMonad m => MuseParser m (F MuseTableElement)
735tableParseBody = fmap MuseBodyRow <$> tableParseRow 1
736
737-- | Parse a table footer row.
738tableParseFooter :: PandocMonad m => MuseParser m (F MuseTableElement)
739tableParseFooter = fmap MuseFooterRow <$> tableParseRow 3
740
741-- | Parse table caption.
742tableParseCaption :: PandocMonad m => MuseParser m (F MuseTableElement)
743tableParseCaption = try $ fmap MuseCaption . trimInlinesF . mconcat
744  <$  many spaceChar
745  <*  string "|+"
746  <*> many1Till inline (try $ string "+|" *> eol)
747
748-- ** Inline parsers
749
750inline' :: PandocMonad m => MuseParser m (F Inlines)
751inline' = whitespace
752      <|> br
753      <|> anchor
754      <|> footnote
755      <|> strongEmph
756      <|> strong
757      <|> strongTag
758      <|> emph
759      <|> emphTag
760      <|> underlined
761      <|> superscriptTag
762      <|> subscriptTag
763      <|> strikeoutTag
764      <|> verbatimTag
765      <|> classTag
766      <|> inlineRtl
767      <|> inlineLtr
768      <|> nbsp
769      <|> linkOrImage
770      <|> code
771      <|> codeTag
772      <|> mathTag
773      <|> inlineLiteralTag
774      <|> str
775      <|> asterisks
776      <|> symbol
777      <?> "inline"
778
779inline :: PandocMonad m => MuseParser m (F Inlines)
780inline = endline <|> inline'
781
782-- | Parse a soft break.
783endline :: PandocMonad m => MuseParser m (F Inlines)
784endline = try $ pure B.softbreak <$ newline <* notFollowedBy blankline <* updateLastSpacePos
785
786parseAnchor :: PandocMonad m => MuseParser m Text
787parseAnchor = try $ T.cons
788  <$  firstColumn
789  <*  char '#'
790  <*> letter
791  <*> manyChar (letter <|> digit <|> char '-')
792
793anchor :: PandocMonad m => MuseParser m (F Inlines)
794anchor = try $ do
795  anchorId <- parseAnchor
796  skipMany spaceChar <|> void newline
797  return $ return $ B.spanWith (anchorId, [], []) mempty
798
799-- | Parse a footnote reference.
800footnote :: PandocMonad m => MuseParser m (F Inlines)
801footnote = try $ do
802  inLink <- asks museInLink
803  guard $ not inLink
804  ref <- noteMarker
805  return $ do
806    notes <- asksF museNotes
807    case M.lookup ref notes of
808      Nothing -> return $ B.str ref
809      Just (_pos, contents) -> do
810        st <- askF
811        let contents' = runF contents st { museNotes = M.delete ref (museNotes st) }
812        return $ B.note contents'
813
814whitespace :: PandocMonad m => MuseParser m (F Inlines)
815whitespace = try $ pure B.space <$ skipMany1 spaceChar <* updateLastSpacePos
816
817-- | Parse @\<br>@ tag.
818br :: PandocMonad m => MuseParser m (F Inlines)
819br = try $ pure B.linebreak <$ string "<br>"
820
821emphasisBetween :: (PandocMonad m, Show a)
822                => MuseParser m a
823                -> MuseParser m (F Inlines)
824emphasisBetween p = try $ trimInlinesF . mconcat
825  <$  atStart
826  <*  p
827  <*  notFollowedBy space
828  <*> many1Till inline (try $ noSpaceBefore *> p <* notFollowedBy alphaNum)
829
830-- | Parse an inline tag, such as @\<em>@ and @\<strong>@.
831inlineTag :: PandocMonad m
832          => Text -- ^ Tag name
833          -> MuseParser m (F Inlines)
834inlineTag tag = try $ mconcat
835  <$  openTag tag
836  <*> manyTill inline (closeTag tag)
837
838-- | Parse strong emphasis inline markup, indicated by @***@.
839strongEmph :: PandocMonad m => MuseParser m (F Inlines)
840strongEmph = fmap (B.strong . B.emph) <$> emphasisBetween (string "***" <* notFollowedBy (char '*'))
841
842-- | Parse strong inline markup, indicated by @**@.
843strong :: PandocMonad m => MuseParser m (F Inlines)
844strong = fmap B.strong <$> emphasisBetween (string "**" <* notFollowedBy (char '*'))
845
846-- | Parse emphasis inline markup, indicated by @*@.
847emph :: PandocMonad m => MuseParser m (F Inlines)
848emph = fmap B.emph <$> emphasisBetween (char '*' <* notFollowedBy (char '*'))
849
850-- | Parse underline inline markup, indicated by @_@.
851-- Supported only in Emacs Muse mode, not Text::Amuse.
852underlined :: PandocMonad m => MuseParser m (F Inlines)
853underlined = fmap underline
854  <$  guardDisabled Ext_amuse -- Supported only by Emacs Muse
855  <*> emphasisBetween (char '_')
856
857-- | Parse @\<strong>@ tag.
858strongTag :: PandocMonad m => MuseParser m (F Inlines)
859strongTag = fmap B.strong <$> inlineTag "strong"
860
861-- | Parse @\<em>@ tag.
862emphTag :: PandocMonad m => MuseParser m (F Inlines)
863emphTag = fmap B.emph <$> inlineTag "em"
864
865-- | Parse @\<sup>@ tag.
866superscriptTag :: PandocMonad m => MuseParser m (F Inlines)
867superscriptTag = fmap B.superscript <$> inlineTag "sup"
868
869-- | Parse @\<sub>@ tag.
870subscriptTag :: PandocMonad m => MuseParser m (F Inlines)
871subscriptTag = fmap B.subscript <$> inlineTag "sub"
872
873-- | Parse @\<del>@ tag.
874strikeoutTag :: PandocMonad m => MuseParser m (F Inlines)
875strikeoutTag = fmap B.strikeout <$> inlineTag "del"
876
877-- | Parse @\<verbatim>@ tag.
878verbatimTag :: PandocMonad m => MuseParser m (F Inlines)
879verbatimTag = return . B.text
880  <$  openTag "verbatim"
881  <*> manyTillChar anyChar (closeTag "verbatim")
882
883-- | Parse @\<class>@ tag.
884classTag :: PandocMonad m => MuseParser m (F Inlines)
885classTag = do
886  classes <- maybe [] T.words . lookup "name" <$> openTag "class"
887  fmap (B.spanWith ("", classes, [])) . mconcat <$> manyTill inline (closeTag "class")
888
889-- | Parse @\<\<\<RTL>>>@ text.
890inlineRtl :: PandocMonad m => MuseParser m (F Inlines)
891inlineRtl = try $
892  fmap (B.spanWith ("", [], [("dir", "rtl")])) . mconcat <$ string "<<<" <*> manyTill inline (string ">>>")
893
894-- | Parse @\<\<\<LTR>>>@ text.
895inlineLtr :: PandocMonad m => MuseParser m (F Inlines)
896inlineLtr = try $
897  fmap (B.spanWith ("", [], [("dir", "ltr")])) . mconcat <$ string ">>>" <*> manyTill inline (string "<<<")
898
899-- | Parse "~~" as nonbreaking space.
900nbsp :: PandocMonad m => MuseParser m (F Inlines)
901nbsp = try $ pure (B.str "\160") <$ string "~~"
902
903-- | Parse code markup, indicated by @\'=\'@ characters.
904code :: PandocMonad m => MuseParser m (F Inlines)
905code = try $ fmap pure $ B.code . uncurry (<>)
906  <$  atStart
907  <*  char '='
908  <*  notFollowedBy (spaceChar <|> newline)
909  <*> manyUntilChar (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap T.singleton $ noneOf " \t\n\r=" <* char '=')
910  <*  notFollowedBy alphaNum
911
912-- | Parse @\<code>@ tag.
913codeTag :: PandocMonad m => MuseParser m (F Inlines)
914codeTag = fmap pure $ B.codeWith
915  <$> (htmlAttrToPandoc <$> openTag "code")
916  <*> manyTillChar anyChar (closeTag "code")
917
918-- | Parse @\<math>@ tag.
919-- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@
920mathTag :: PandocMonad m => MuseParser m (F Inlines)
921mathTag = return . B.math
922  <$  openTag "math"
923  <*> manyTillChar anyChar (closeTag "math")
924
925-- | Parse inline @\<literal>@ tag as a raw inline.
926inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines)
927inlineLiteralTag = try $ fmap pure $ B.rawInline
928  <$> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
929  <*> manyTillChar anyChar (closeTag "literal")
930
931str :: PandocMonad m => MuseParser m (F Inlines)
932str = return . B.str <$> many1Char alphaNum <* updateLastStrPos
933
934-- | Consume asterisks that were not used as emphasis opening.
935-- This prevents series of asterisks from being split into
936-- literal asterisk and emphasis opening.
937asterisks :: PandocMonad m => MuseParser m (F Inlines)
938asterisks = pure . B.str <$> many1Char (char '*')
939
940symbol :: PandocMonad m => MuseParser m (F Inlines)
941symbol = pure . B.str . T.singleton <$> nonspaceChar
942
943-- | Parse a link or image.
944linkOrImage :: PandocMonad m => MuseParser m (F Inlines)
945linkOrImage = try $ link "URL:" <|> image <|> link ""
946
947linkContent :: PandocMonad m => MuseParser m (F Inlines)
948linkContent = trimInlinesF . mconcat
949  <$  char '['
950  <*> manyTill inline (char ']')
951
952-- | Parse a link starting with (possibly null) prefix
953link :: PandocMonad m => Text -> MuseParser m (F Inlines)
954link prefix = try $ do
955  inLink <- asks museInLink
956  guard $ not inLink
957  textStr $ "[[" <> prefix
958  url <- manyTillChar anyChar $ char ']'
959  content <- option (pure $ B.str url) (local (\s -> s { museInLink = True }) linkContent)
960  char ']'
961  return $ B.link url "" <$> content
962
963image :: PandocMonad m => MuseParser m (F Inlines)
964image = try $ do
965  string "[["
966  (url, (ext, width, align)) <- manyUntilChar (noneOf "]") (imageExtensionAndOptions <* char ']')
967  content <- option mempty linkContent
968  char ']'
969  let widthAttr = case align of
970                    Just 'f' -> [("width", fromMaybe "100" width <> "%"), ("height", "75%")]
971                    _ -> maybeToList (("width",) . (<> "%") <$> width)
972  let alignClass = case align of
973                     Just 'r' -> ["align-right"]
974                     Just 'l' -> ["align-left"]
975                     Just 'f' -> []
976                     _        -> []
977  return $ B.imageWith ("", alignClass, widthAttr) (url <> ext) mempty <$> content
978  where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
979        imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
980        imageExtension = choice (try . textStr <$> imageExtensions)
981        imageExtensionAndOptions = do
982          ext <- imageExtension
983          (width, align) <- option (Nothing, Nothing) imageAttrs
984          return (ext, width, align)
985        imageAttrs = (,)
986          <$  many1 spaceChar
987          <*> optionMaybe (many1Char digit)
988          <*  many spaceChar
989          <*> optionMaybe (oneOf "rlf")
990