1{-# LANGUAGE OverloadedStrings #-}
2{-# LANGUAGE ViewPatterns      #-}
3-- |
4-- Module      :  Documentation.Haddock.Parser
5-- Copyright   :  (c) Mateusz Kowalczyk 2013-2014,
6--                    Simon Hengel      2013
7-- License     :  BSD-like
8--
9-- Maintainer  :  haddock@projects.haskell.org
10-- Stability   :  experimental
11-- Portability :  portable
12--
13-- Parser used for Haddock comments. For external users of this
14-- library, the most commonly used combination of functions is going
15-- to be
16--
17-- @'toRegular' . '_doc' . 'parseParas'@
18module Documentation.Haddock.Parser (
19  parseString,
20  parseParas,
21  overIdentifier,
22  toRegular,
23  Identifier
24) where
25
26import           Control.Applicative
27import           Control.Arrow (first)
28import           Control.Monad
29import           Data.Char (chr, isUpper, isAlpha, isSpace)
30import           Data.List (intercalate, unfoldr, elemIndex)
31import           Data.Maybe (fromMaybe, mapMaybe)
32import           Data.Monoid
33import qualified Data.Set as Set
34import           Documentation.Haddock.Doc
35import           Documentation.Haddock.Markup ( markup, plainMarkup )
36import           Documentation.Haddock.Parser.Monad
37import           Documentation.Haddock.Parser.Util
38import           Documentation.Haddock.Parser.Identifier
39import           Documentation.Haddock.Types
40import           Prelude hiding (takeWhile)
41import qualified Prelude as P
42
43import qualified Text.Parsec as Parsec
44import           Text.Parsec (try)
45
46import qualified Data.Text as T
47import           Data.Text (Text)
48
49
50-- $setup
51-- >>> :set -XOverloadedStrings
52
53-- | Drops the quotes/backticks around all identifiers, as if they
54-- were valid but still 'String's.
55toRegular :: DocH mod Identifier -> DocH mod String
56toRegular = fmap (\(Identifier _ _ x _) -> x)
57
58-- | Maps over 'DocIdentifier's over 'String' with potentially failing
59-- conversion using user-supplied function. If the conversion fails,
60-- the identifier is deemed to not be valid and is treated as a
61-- regular string.
62overIdentifier :: (Namespace -> String -> Maybe a)
63               -> DocH mod Identifier
64               -> DocH mod a
65overIdentifier f d = g d
66  where
67    g (DocIdentifier (Identifier ns o x e)) = case f ns x of
68      Nothing -> DocString $ renderNs ns ++ [o] ++ x ++ [e]
69      Just x' -> DocIdentifier x'
70    g DocEmpty = DocEmpty
71    g (DocAppend x x') = DocAppend (g x) (g x')
72    g (DocString x) = DocString x
73    g (DocParagraph x) = DocParagraph $ g x
74    g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x
75    g (DocModule (ModLink m x)) = DocModule (ModLink m (fmap g x))
76    g (DocWarning x) = DocWarning $ g x
77    g (DocEmphasis x) = DocEmphasis $ g x
78    g (DocMonospaced x) = DocMonospaced $ g x
79    g (DocBold x) = DocBold $ g x
80    g (DocUnorderedList x) = DocUnorderedList $ fmap g x
81    g (DocOrderedList x) = DocOrderedList $ fmap g x
82    g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x
83    g (DocCodeBlock x) = DocCodeBlock $ g x
84    g (DocHyperlink (Hyperlink u x)) = DocHyperlink (Hyperlink u (fmap g x))
85    g (DocPic x) = DocPic x
86    g (DocMathInline x) = DocMathInline x
87    g (DocMathDisplay x) = DocMathDisplay x
88    g (DocAName x) = DocAName x
89    g (DocProperty x) = DocProperty x
90    g (DocExamples x) = DocExamples x
91    g (DocHeader (Header l x)) = DocHeader . Header l $ g x
92    g (DocTable (Table h b)) = DocTable (Table (map (fmap g) h) (map (fmap g) b))
93
94
95choice' :: [Parser a] -> Parser a
96choice' [] = empty
97choice' [p] = p
98choice' (p : ps) = try p <|> choice' ps
99
100parse :: Parser a -> Text -> (ParserState, a)
101parse p = either err id . parseOnly (p <* Parsec.eof)
102  where
103    err = error . ("Haddock.Parser.parse: " ++)
104
105-- | Main entry point to the parser. Appends the newline character
106-- to the input string.
107parseParas :: Maybe Package
108           -> String -- ^ String to parse
109           -> MetaDoc mod Identifier
110parseParas pkg input = case parseParasState input of
111  (state, a) -> MetaDoc { _meta = Meta { _version = parserStateSince state
112                                       , _package = pkg
113                                       }
114                        , _doc = a
115                        }
116
117parseParasState :: String -> (ParserState, DocH mod Identifier)
118parseParasState = parse (emptyLines *> p) . T.pack . (++ "\n") . filter (/= '\r')
119  where
120    p :: Parser (DocH mod Identifier)
121    p = docConcat <$> many (paragraph <* emptyLines)
122
123    emptyLines :: Parser ()
124    emptyLines = void $ many (try (skipHorizontalSpace *> "\n"))
125
126parseParagraphs :: String -> Parser (DocH mod Identifier)
127parseParagraphs input = case parseParasState input of
128  (state, a) -> Parsec.putState state *> pure a
129
130-- | Variant of 'parseText' for 'String' instead of 'Text'
131parseString :: String -> DocH mod Identifier
132parseString = parseText . T.pack
133
134-- | Parse a text paragraph. Actually just a wrapper over 'parseParagraph' which
135-- drops leading whitespace.
136parseText :: Text -> DocH mod Identifier
137parseText = parseParagraph . T.dropWhile isSpace . T.filter (/= '\r')
138
139parseParagraph :: Text -> DocH mod Identifier
140parseParagraph = snd . parse p
141  where
142    p :: Parser (DocH mod Identifier)
143    p = docConcat <$> many (choice' [ monospace
144                                    , anchor
145                                    , identifier
146                                    , moduleName
147                                    , picture
148                                    , mathDisplay
149                                    , mathInline
150                                    , markdownImage
151                                    , markdownLink
152                                    , hyperlink
153                                    , bold
154                                    , emphasis
155                                    , encodedChar
156                                    , string'
157                                    , skipSpecialChar
158                                    ])
159
160-- | Parses and processes
161-- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
162--
163-- >>> parseString "&#65;"
164-- DocString "A"
165encodedChar :: Parser (DocH mod a)
166encodedChar = "&#" *> c <* ";"
167  where
168    c = DocString . return . chr <$> num
169    num = hex <|> decimal
170    hex = ("x" <|> "X") *> hexadecimal
171
172-- | List of characters that we use to delimit any special markup.
173-- Once we have checked for any of these and tried to parse the
174-- relevant markup, we can assume they are used as regular text.
175specialChar :: [Char]
176specialChar = "_/<@\"&'`# "
177
178-- | Plain, regular parser for text. Called as one of the last parsers
179-- to ensure that we have already given a chance to more meaningful parsers
180-- before capturing their characers.
181string' :: Parser (DocH mod a)
182string' = DocString . unescape . T.unpack <$> takeWhile1_ (`notElem` specialChar)
183  where
184    unescape "" = ""
185    unescape ('\\':x:xs) = x : unescape xs
186    unescape (x:xs) = x : unescape xs
187
188-- | Skips a single special character and treats it as a plain string.
189-- This is done to skip over any special characters belonging to other
190-- elements but which were not deemed meaningful at their positions.
191skipSpecialChar :: Parser (DocH mod a)
192skipSpecialChar = DocString . return <$> Parsec.oneOf specialChar
193
194-- | Emphasis parser.
195--
196-- >>> parseString "/Hello world/"
197-- DocEmphasis (DocString "Hello world")
198emphasis :: Parser (DocH mod Identifier)
199emphasis = DocEmphasis . parseParagraph <$>
200  disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/")
201
202-- | Bold parser.
203--
204-- >>> parseString "__Hello world__"
205-- DocBold (DocString "Hello world")
206bold :: Parser (DocH mod Identifier)
207bold = DocBold . parseParagraph <$> disallowNewline ("__" *> takeUntil "__")
208
209disallowNewline :: Parser Text -> Parser Text
210disallowNewline = mfilter (T.all (/= '\n'))
211
212-- | Like `takeWhile`, but unconditionally take escaped characters.
213takeWhile_ :: (Char -> Bool) -> Parser Text
214takeWhile_ p = scan p_ False
215  where
216    p_ escaped c
217      | escaped = Just False
218      | not $ p c = Nothing
219      | otherwise = Just (c == '\\')
220
221-- | Like 'takeWhile1', but unconditionally take escaped characters.
222takeWhile1_ :: (Char -> Bool) -> Parser Text
223takeWhile1_ = mfilter (not . T.null) . takeWhile_
224
225-- | Text anchors to allow for jumping around the generated documentation.
226--
227-- >>> parseString "#Hello world#"
228-- DocAName "Hello world"
229anchor :: Parser (DocH mod a)
230anchor = DocAName . T.unpack <$>
231         ("#" *> takeWhile1_ (\x -> x /= '#' && not (isSpace x)) <* "#")
232
233-- | Monospaced strings.
234--
235-- >>> parseString "@cruel@"
236-- DocMonospaced (DocString "cruel")
237monospace :: Parser (DocH mod Identifier)
238monospace = DocMonospaced . parseParagraph
239            <$> ("@" *> takeWhile1_ (/= '@') <* "@")
240
241-- | Module names.
242--
243-- Note that we allow '#' and '\' to support anchors (old style anchors are of
244-- the form "SomeModule\#anchor").
245moduleName :: Parser (DocH mod a)
246moduleName = DocModule . flip ModLink Nothing <$> ("\"" *> moduleNameString <* "\"")
247
248-- | A module name, optionally with an anchor
249--
250moduleNameString :: Parser String
251moduleNameString = modid `maybeFollowedBy` anchor_
252  where
253    modid = intercalate "." <$> conid `Parsec.sepBy1` "."
254    anchor_ = (++)
255      <$> (Parsec.string "#" <|> Parsec.string "\\#")
256      <*> many (Parsec.satisfy (\c -> c /= '"' && not (isSpace c)))
257
258    maybeFollowedBy pre suf = (\x -> maybe x (x ++)) <$> pre <*> optional suf
259    conid :: Parser String
260    conid = (:)
261      <$> Parsec.satisfy (\c -> isAlpha c && isUpper c)
262      <*> many conChar
263
264    conChar = Parsec.alphaNum <|> Parsec.char '_'
265
266-- | A labeled link to an indentifier, module or url using markdown
267-- syntax.
268markdownLink :: Parser (DocH mod Identifier)
269markdownLink = do
270  lbl <- markdownLinkText
271  choice' [ markdownModuleName lbl, markdownURL lbl ]
272  where
273    markdownModuleName lbl = do
274      mn <- "(" *> skipHorizontalSpace *>
275            "\"" *> moduleNameString <* "\""
276            <* skipHorizontalSpace <* ")"
277      pure $ DocModule (ModLink mn (Just lbl))
278
279    markdownURL lbl = do
280      target <- markdownLinkTarget
281      pure $ DocHyperlink $ Hyperlink target (Just lbl)
282
283-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
284-- a title for the picture.
285--
286-- >>> parseString "<<hello.png>>"
287-- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Nothing})
288-- >>> parseString "<<hello.png world>>"
289-- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Just "world"})
290picture :: Parser (DocH mod a)
291picture = DocPic . makeLabeled Picture
292          <$> disallowNewline ("<<" *> takeUntil ">>")
293
294-- | Inline math parser, surrounded by \\( and \\).
295--
296-- >>> parseString "\\(\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\)"
297-- DocMathInline "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}"
298mathInline :: Parser (DocH mod a)
299mathInline = DocMathInline . T.unpack
300             <$> disallowNewline  ("\\(" *> takeUntil "\\)")
301
302-- | Display math parser, surrounded by \\[ and \\].
303--
304-- >>> parseString "\\[\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\]"
305-- DocMathDisplay "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}"
306mathDisplay :: Parser (DocH mod a)
307mathDisplay = DocMathDisplay . T.unpack
308              <$> ("\\[" *> takeUntil "\\]")
309
310-- | Markdown image parser. As per the commonmark reference recommendation, the
311-- description text for an image converted to its a plain string representation.
312--
313-- >>> parseString "![some /emphasis/ in a description](www.site.com)"
314-- DocPic (Picture "www.site.com" (Just "some emphasis in a description"))
315markdownImage :: Parser (DocH mod Identifier)
316markdownImage = do
317  text <- markup stringMarkup <$> ("!" *> markdownLinkText)
318  url <- markdownLinkTarget
319  pure $ DocPic (Picture url (Just text))
320  where
321    stringMarkup = plainMarkup (const "") renderIdent
322    renderIdent (Identifier ns l c r) = renderNs ns <> [l] <> c <> [r]
323
324-- | Paragraph parser, called by 'parseParas'.
325paragraph :: Parser (DocH mod Identifier)
326paragraph = choice' [ examples
327                    , table
328                    , do indent <- takeIndent
329                         choice' [ since
330                                 , unorderedList indent
331                                 , orderedList indent
332                                 , birdtracks
333                                 , codeblock
334                                 , property
335                                 , header
336                                 , textParagraphThatStartsWithMarkdownLink
337                                 , definitionList indent
338                                 , docParagraph <$> textParagraph
339                                 ]
340                    ]
341
342-- | Provides support for grid tables.
343--
344-- Tables are composed by an optional header and body. The header is composed by
345-- a single row. The body is composed by a non-empty list of rows.
346--
347-- Example table with header:
348--
349-- > +----------+----------+
350-- > | /32bit/  |   64bit  |
351-- > +==========+==========+
352-- > |  0x0000  | @0x0000@ |
353-- > +----------+----------+
354--
355-- Algorithms loosely follows ideas in
356-- http://docutils.sourceforge.net/docutils/parsers/rst/tableparser.py
357--
358table :: Parser (DocH mod Identifier)
359table = do
360    -- first we parse the first row, which determines the width of the table
361    firstRow <- parseFirstRow
362    let len = T.length firstRow
363
364    -- then we parse all consequtive rows starting and ending with + or |,
365    -- of the width `len`.
366    restRows <- many (try (parseRestRows len))
367
368    -- Now we gathered the table block, the next step is to split the block
369    -- into cells.
370    DocTable <$> tableStepTwo len (firstRow : restRows)
371  where
372    parseFirstRow :: Parser Text
373    parseFirstRow = do
374        skipHorizontalSpace
375        cs <- takeWhile (\c -> c == '-' || c == '+')
376
377        -- upper-left and upper-right corners are `+`
378        guard (T.length cs >= 2 &&
379               T.head cs == '+' &&
380               T.last cs == '+')
381
382        -- trailing space
383        skipHorizontalSpace
384        _ <- Parsec.newline
385
386        return cs
387
388    parseRestRows :: Int -> Parser Text
389    parseRestRows l = do
390        skipHorizontalSpace
391        bs <- scan predicate l
392
393        -- Left and right edges are `|` or `+`
394        guard (T.length bs >= 2 &&
395               (T.head bs == '|' || T.head bs == '+') &&
396               (T.last bs == '|' || T.last bs == '+'))
397
398        -- trailing space
399        skipHorizontalSpace
400        _ <- Parsec.newline
401
402        return bs
403      where
404        predicate n c
405            | n <= 0    = Nothing
406            | c == '\n' = Nothing
407            | otherwise = Just (n - 1)
408
409-- Second step searchs for row of '+' and '=' characters, records it's index
410-- and changes to '=' to '-'.
411tableStepTwo
412    :: Int              -- ^ width
413    -> [Text]           -- ^ rows
414    -> Parser (Table (DocH mod Identifier))
415tableStepTwo width = go 0 [] where
416    go _ left [] = tableStepThree width (reverse left) Nothing
417    go n left (r : rs)
418        | T.all (`elem` ['+', '=']) r =
419            tableStepThree width (reverse left ++ r' : rs) (Just n)
420        | otherwise =
421            go (n + 1) (r :  left) rs
422      where
423        r' = T.map (\c -> if c == '=' then '-' else c) r
424
425-- Third step recognises cells in the table area, returning a list of TC, cells.
426tableStepThree
427    :: Int              -- ^ width
428    -> [Text]           -- ^ rows
429    -> Maybe Int        -- ^ index of header separator
430    -> Parser (Table (DocH mod Identifier))
431tableStepThree width rs hdrIndex = do
432    cells <- loop (Set.singleton (0, 0))
433    tableStepFour rs hdrIndex cells
434  where
435    height = length rs
436
437    loop :: Set.Set (Int, Int) -> Parser [TC]
438    loop queue = case Set.minView queue of
439        Nothing -> return []
440        Just ((y, x), queue')
441            | y + 1 >= height || x + 1 >= width -> loop queue'
442            | otherwise -> case scanRight x y of
443                Nothing -> loop queue'
444                Just (x2, y2) -> do
445                    let tc = TC y x y2 x2
446                    fmap (tc :) $ loop $ queue' `Set.union` Set.fromList
447                        [(y, x2), (y2, x), (y2, x2)]
448
449    -- scan right looking for +, then try scan down
450    --
451    -- do we need to record + saw on the way left and down?
452    scanRight :: Int -> Int -> Maybe (Int, Int)
453    scanRight x y = go (x + 1) where
454        bs = rs !! y
455        go x' | x' >= width          = fail "overflow right "
456              | T.index bs x' == '+' = scanDown x y x' <|> go (x' + 1)
457              | T.index bs x' == '-' = go (x' + 1)
458              | otherwise            = fail $ "not a border (right) " ++ show (x,y,x')
459
460    -- scan down looking for +
461    scanDown :: Int -> Int -> Int -> Maybe (Int, Int)
462    scanDown x y x2 = go (y + 1) where
463        go y' | y' >= height                 = fail "overflow down"
464              | T.index (rs !! y') x2 == '+' = scanLeft x y x2 y' <|> go (y' + 1)
465              | T.index (rs !! y') x2 == '|' = go (y' + 1)
466              | otherwise                    = fail $ "not a border (down) " ++ show (x,y,x2,y')
467
468    -- check that at y2 x..x2 characters are '+' or '-'
469    scanLeft :: Int -> Int -> Int -> Int -> Maybe (Int, Int)
470    scanLeft x y x2 y2
471        | all (\x' -> T.index bs x' `elem` ['+', '-']) [x..x2] = scanUp x y x2 y2
472        | otherwise                                            = fail $ "not a border (left) " ++ show (x,y,x2,y2)
473      where
474        bs = rs !! y2
475
476    -- check that at y2 x..x2 characters are '+' or '-'
477    scanUp :: Int -> Int -> Int -> Int -> Maybe (Int, Int)
478    scanUp x y x2 y2
479        | all (\y' -> T.index (rs !! y') x `elem` ['+', '|']) [y..y2] = return (x2, y2)
480        | otherwise                                                   = fail $ "not a border (up) " ++ show (x,y,x2,y2)
481
482-- | table cell: top left bottom right
483data TC = TC !Int !Int !Int !Int
484  deriving Show
485
486tcXS :: TC -> [Int]
487tcXS (TC _ x _ x2) = [x, x2]
488
489tcYS :: TC -> [Int]
490tcYS (TC y _ y2 _) = [y, y2]
491
492-- | Fourth step. Given the locations of cells, forms 'Table' structure.
493tableStepFour :: [Text] -> Maybe Int -> [TC] -> Parser (Table (DocH mod Identifier))
494tableStepFour rs hdrIndex cells =  case hdrIndex of
495    Nothing -> return $ Table [] rowsDoc
496    Just i  -> case elemIndex i yTabStops of
497        Nothing -> return $ Table [] rowsDoc
498        Just i' -> return $ uncurry Table $ splitAt i' rowsDoc
499  where
500    xTabStops = sortNub $ concatMap tcXS cells
501    yTabStops = sortNub $ concatMap tcYS cells
502
503    sortNub :: Ord a => [a] -> [a]
504    sortNub = Set.toList . Set.fromList
505
506    init' :: [a] -> [a]
507    init' []       = []
508    init' [_]      = []
509    init' (x : xs) = x : init' xs
510
511    rowsDoc = (fmap . fmap) parseParagraph rows
512
513    rows = map makeRow (init' yTabStops)
514      where
515        makeRow y = TableRow $ mapMaybe (makeCell y) cells
516        makeCell y (TC y' x y2 x2)
517            | y /= y' = Nothing
518            | otherwise = Just $ TableCell xts yts (extract (x + 1) (y + 1) (x2 - 1) (y2 - 1))
519          where
520            xts = length $ P.takeWhile (< x2) $ dropWhile (< x) xTabStops
521            yts = length $ P.takeWhile (< y2) $ dropWhile (< y) yTabStops
522
523    -- extract cell contents given boundaries
524    extract :: Int -> Int -> Int -> Int -> Text
525    extract x y x2 y2 = T.intercalate "\n"
526        [ T.stripEnd $ T.stripStart $ T.take (x2 - x + 1) $ T.drop x $ rs !! y'
527        | y' <- [y .. y2]
528        ]
529
530-- | Parse \@since annotations.
531since :: Parser (DocH mod a)
532since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince >> return DocEmpty
533  where
534    version = decimal `Parsec.sepBy1` "."
535
536-- | Headers inside the comment denoted with @=@ signs, up to 6 levels
537-- deep.
538--
539-- >>> snd <$> parseOnly header "= Hello"
540-- Right (DocHeader (Header {headerLevel = 1, headerTitle = DocString "Hello"}))
541-- >>> snd <$> parseOnly header "== World"
542-- Right (DocHeader (Header {headerLevel = 2, headerTitle = DocString "World"}))
543header :: Parser (DocH mod Identifier)
544header = do
545  let psers = map (string . flip T.replicate "=") [6, 5 .. 1]
546      pser = Parsec.choice psers
547  depth <- T.length <$> pser
548  line <- parseText <$> (skipHorizontalSpace *> nonEmptyLine)
549  rest <- try paragraph <|> return DocEmpty
550  return $ DocHeader (Header depth line) `docAppend` rest
551
552textParagraph :: Parser (DocH mod Identifier)
553textParagraph = parseText . T.intercalate "\n" <$> some nonEmptyLine
554
555textParagraphThatStartsWithMarkdownLink :: Parser (DocH mod Identifier)
556textParagraphThatStartsWithMarkdownLink = docParagraph <$> (docAppend <$> markdownLink <*> optionalTextParagraph)
557  where
558    optionalTextParagraph :: Parser (DocH mod Identifier)
559    optionalTextParagraph = choice' [ docAppend <$> whitespace <*> textParagraph
560                                    , pure DocEmpty ]
561
562    whitespace :: Parser (DocH mod a)
563    whitespace = DocString <$> (f <$> takeHorizontalSpace <*> optional "\n")
564      where
565        f :: Text -> Maybe Text -> String
566        f xs (fromMaybe "" -> x)
567          | T.null (xs <> x) = ""
568          | otherwise = " "
569
570-- | Parses unordered (bullet) lists.
571unorderedList :: Text -> Parser (DocH mod Identifier)
572unorderedList indent = DocUnorderedList <$> p
573  where
574    p = ("*" <|> "-") *> innerList indent p
575
576-- | Parses ordered lists (numbered or dashed).
577orderedList :: Text -> Parser (DocH mod Identifier)
578orderedList indent = DocOrderedList <$> p
579  where
580    p = (paren <|> dot) *> innerList indent p
581    dot = (decimal :: Parser Int) <* "."
582    paren = "(" *> decimal <* ")"
583
584-- | Generic function collecting any further lines belonging to the
585-- list entry and recursively collecting any further lists in the
586-- same paragraph. Usually used as
587--
588-- > someListFunction = listBeginning *> innerList someListFunction
589innerList :: Text -> Parser [DocH mod Identifier]
590          -> Parser [DocH mod Identifier]
591innerList indent item = do
592  c <- takeLine
593  (cs, items) <- more indent item
594  let contents = docParagraph . parseText . dropNLs . T.unlines $ c : cs
595  return $ case items of
596    Left p -> [contents `docAppend` p]
597    Right i -> contents : i
598
599-- | Parses definition lists.
600definitionList :: Text -> Parser (DocH mod Identifier)
601definitionList indent = DocDefList <$> p
602  where
603    p = do
604      label <- "[" *> (parseParagraph <$> takeWhile1_ (`notElem` ("]\n" :: String))) <* ("]" <* optional ":")
605      c <- takeLine
606      (cs, items) <- more indent p
607      let contents = parseText . dropNLs . T.unlines $ c : cs
608      return $ case items of
609        Left x -> [(label, contents `docAppend` x)]
610        Right i -> (label, contents) : i
611
612-- | Drops all trailing newlines.
613dropNLs :: Text -> Text
614dropNLs = T.dropWhileEnd (== '\n')
615
616-- | Main worker for 'innerList' and 'definitionList'.
617-- We need the 'Either' here to be able to tell in the respective functions
618-- whether we're dealing with the next list or a nested paragraph.
619more :: Monoid a => Text -> Parser a
620     -> Parser ([Text], Either (DocH mod Identifier) a)
621more indent item = choice' [ innerParagraphs indent
622                           , moreListItems indent item
623                           , moreContent indent item
624                           , pure ([], Right mempty)
625                           ]
626
627-- | Used by 'innerList' and 'definitionList' to parse any nested paragraphs.
628innerParagraphs :: Text
629                -> Parser ([Text], Either (DocH mod Identifier) a)
630innerParagraphs indent = (,) [] . Left <$> ("\n" *> indentedParagraphs indent)
631
632-- | Attempts to fetch the next list if possibly. Used by 'innerList' and
633-- 'definitionList' to recursively grab lists that aren't separated by a whole
634-- paragraph.
635moreListItems :: Text -> Parser a
636              -> Parser ([Text], Either (DocH mod Identifier) a)
637moreListItems indent item = (,) [] . Right <$> indentedItem
638  where
639    indentedItem = string indent *> Parsec.spaces *> item
640
641-- | Helper for 'innerList' and 'definitionList' which simply takes
642-- a line of text and attempts to parse more list content with 'more'.
643moreContent :: Monoid a => Text -> Parser a
644            -> Parser ([Text], Either (DocH mod Identifier) a)
645moreContent indent item = first . (:) <$> nonEmptyLine <*> more indent item
646
647-- | Parses an indented paragraph.
648-- The indentation is 4 spaces.
649indentedParagraphs :: Text -> Parser (DocH mod Identifier)
650indentedParagraphs indent =
651    (T.unpack . T.concat <$> dropFrontOfPara indent') >>= parseParagraphs
652  where
653    indent' = string $ indent <> "    "
654
655-- | Grab as many fully indented paragraphs as we can.
656dropFrontOfPara :: Parser Text -> Parser [Text]
657dropFrontOfPara sp = do
658  currentParagraph <- some (try (sp *> takeNonEmptyLine))
659  followingParagraphs <-
660    choice' [ skipHorizontalSpace *> nextPar -- we have more paragraphs to take
661            , skipHorizontalSpace *> nlList -- end of the ride, remember the newline
662            , Parsec.eof *> return []       -- nothing more to take at all
663            ]
664  return (currentParagraph ++ followingParagraphs)
665  where
666    nextPar = (++) <$> nlList <*> dropFrontOfPara sp
667    nlList = "\n" *> return ["\n"]
668
669nonSpace :: Text -> Parser Text
670nonSpace xs
671  | T.all isSpace xs = fail "empty line"
672  | otherwise = return xs
673
674-- | Takes a non-empty, not fully whitespace line.
675--
676--  Doesn't discard the trailing newline.
677takeNonEmptyLine :: Parser Text
678takeNonEmptyLine = do
679    l <- takeWhile1 (/= '\n') >>= nonSpace
680    _ <- "\n"
681    pure (l <> "\n")
682
683-- | Takes indentation of first non-empty line.
684--
685-- More precisely: skips all whitespace-only lines and returns indentation
686-- (horizontal space, might be empty) of that non-empty line.
687takeIndent :: Parser Text
688takeIndent = do
689  indent <- takeHorizontalSpace
690  choice' [ "\n" *> takeIndent
691          , return indent
692          ]
693
694-- | Blocks of text of the form:
695--
696-- >> foo
697-- >> bar
698-- >> baz
699--
700birdtracks :: Parser (DocH mod a)
701birdtracks = DocCodeBlock . DocString . T.unpack . T.intercalate "\n" . stripSpace <$> some line
702  where
703    line = try (skipHorizontalSpace *> ">" *> takeLine)
704
705stripSpace :: [Text] -> [Text]
706stripSpace = fromMaybe <*> mapM strip'
707  where
708    strip' t = case T.uncons t of
709                 Nothing -> Just ""
710                 Just (' ',t') -> Just t'
711                 _ -> Nothing
712
713-- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
714-- Consecutive examples are accepted.
715examples :: Parser (DocH mod a)
716examples = DocExamples <$> (many (try (skipHorizontalSpace *> "\n")) *> go)
717  where
718    go :: Parser [Example]
719    go = do
720      prefix <- takeHorizontalSpace <* ">>>"
721      expr <- takeLine
722      (rs, es) <- resultAndMoreExamples
723      return (makeExample prefix expr rs : es)
724      where
725        resultAndMoreExamples :: Parser ([Text], [Example])
726        resultAndMoreExamples = choice' [ moreExamples, result, pure ([], []) ]
727          where
728            moreExamples :: Parser ([Text], [Example])
729            moreExamples = (,) [] <$> go
730
731            result :: Parser ([Text], [Example])
732            result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples
733
734    makeExample :: Text -> Text -> [Text] -> Example
735    makeExample prefix expression res =
736      Example (T.unpack (T.strip expression)) result
737      where
738        result = map (T.unpack . substituteBlankLine . tryStripPrefix) res
739
740        tryStripPrefix xs = fromMaybe xs (T.stripPrefix prefix xs)
741
742        substituteBlankLine "<BLANKLINE>" = ""
743        substituteBlankLine xs = xs
744
745nonEmptyLine :: Parser Text
746nonEmptyLine = try (mfilter (T.any (not . isSpace)) takeLine)
747
748takeLine :: Parser Text
749takeLine = try (takeWhile (/= '\n') <* endOfLine)
750
751endOfLine :: Parser ()
752endOfLine = void "\n" <|> Parsec.eof
753
754-- | Property parser.
755--
756-- >>> snd <$> parseOnly property "prop> hello world"
757-- Right (DocProperty "hello world")
758property :: Parser (DocH mod a)
759property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (/= '\n'))
760
761-- |
762-- Paragraph level codeblock. Anything between the two delimiting \@ is parsed
763-- for markup.
764codeblock :: Parser (DocH mod Identifier)
765codeblock =
766  DocCodeBlock . parseParagraph . dropSpaces
767  <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
768  where
769    dropSpaces xs =
770      case splitByNl xs of
771        [] -> xs
772        ys -> case T.uncons (last ys) of
773          Just (' ',_) -> case mapM dropSpace ys of
774                            Nothing -> xs
775                            Just zs -> T.intercalate "\n" zs
776          _ -> xs
777
778    -- This is necessary because ‘lines’ swallows up a trailing newline
779    -- and we lose information about whether the last line belongs to @ or to
780    -- text which we need to decide whether we actually want to be dropping
781    -- anything at all.
782    splitByNl = unfoldr (\x -> case T.uncons x of
783                                 Just ('\n',x') -> Just (T.span (/= '\n') x')
784                                 _ -> Nothing)
785                . ("\n" <>)
786
787    dropSpace t = case T.uncons t of
788                    Nothing -> Just ""
789                    Just (' ',t') -> Just t'
790                    _ -> Nothing
791
792    block' = scan p False
793      where
794        p isNewline c
795          | isNewline && c == '@' = Nothing
796          | isNewline && isSpace c = Just isNewline
797          | otherwise = Just $ c == '\n'
798
799hyperlink :: Parser (DocH mod Identifier)
800hyperlink = choice' [ angleBracketLink, autoUrl ]
801
802angleBracketLink :: Parser (DocH mod a)
803angleBracketLink =
804    DocHyperlink . makeLabeled (\s -> Hyperlink s . fmap DocString)
805    <$> disallowNewline ("<" *> takeUntil ">")
806
807-- | The text for a markdown link, enclosed in square brackets.
808markdownLinkText :: Parser (DocH mod Identifier)
809markdownLinkText = parseParagraph . T.strip <$> ("[" *> takeUntil "]")
810
811-- | The target for a markdown link, enclosed in parenthesis.
812markdownLinkTarget :: Parser String
813markdownLinkTarget = whitespace *> url
814  where
815    whitespace :: Parser ()
816    whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace)
817
818    url :: Parser String
819    url = rejectWhitespace (decode <$> ("(" *> takeUntil ")"))
820
821    rejectWhitespace :: MonadPlus m => m String -> m String
822    rejectWhitespace = mfilter (all (not . isSpace))
823
824    decode :: Text -> String
825    decode = T.unpack . removeEscapes
826
827-- | Looks for URL-like things to automatically hyperlink even if they
828-- weren't marked as links.
829autoUrl :: Parser (DocH mod a)
830autoUrl = mkLink <$> url
831  where
832    url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (not . isSpace)
833
834    mkLink :: Text -> DocH mod a
835    mkLink s = case T.unsnoc s of
836      Just (xs,x) | x `elem` (",.!?" :: String) -> DocHyperlink (mkHyperlink xs) `docAppend` DocString [x]
837      _ -> DocHyperlink (mkHyperlink s)
838
839    mkHyperlink :: Text -> Hyperlink (DocH mod a)
840    mkHyperlink lnk = Hyperlink (T.unpack lnk) Nothing
841
842
843-- | Parses identifiers with help of 'parseValid'.
844identifier :: Parser (DocH mod Identifier)
845identifier = DocIdentifier <$> parseValid
846