1{-# LANGUAGE OverloadedStrings #-} 2{-# LANGUAGE ViewPatterns #-} 3-- | 4-- Module : Documentation.Haddock.Parser 5-- Copyright : (c) Mateusz Kowalczyk 2013-2014, 6-- Simon Hengel 2013 7-- License : BSD-like 8-- 9-- Maintainer : haddock@projects.haskell.org 10-- Stability : experimental 11-- Portability : portable 12-- 13-- Parser used for Haddock comments. For external users of this 14-- library, the most commonly used combination of functions is going 15-- to be 16-- 17-- @'toRegular' . '_doc' . 'parseParas'@ 18module Documentation.Haddock.Parser ( 19 parseString, 20 parseParas, 21 overIdentifier, 22 toRegular, 23 Identifier 24) where 25 26import Control.Applicative 27import Control.Arrow (first) 28import Control.Monad 29import Data.Char (chr, isUpper, isAlpha, isSpace) 30import Data.List (intercalate, unfoldr, elemIndex) 31import Data.Maybe (fromMaybe, mapMaybe) 32import Data.Monoid 33import qualified Data.Set as Set 34import Documentation.Haddock.Doc 35import Documentation.Haddock.Markup ( markup, plainMarkup ) 36import Documentation.Haddock.Parser.Monad 37import Documentation.Haddock.Parser.Util 38import Documentation.Haddock.Parser.Identifier 39import Documentation.Haddock.Types 40import Prelude hiding (takeWhile) 41import qualified Prelude as P 42 43import qualified Text.Parsec as Parsec 44import Text.Parsec (try) 45 46import qualified Data.Text as T 47import Data.Text (Text) 48 49 50-- $setup 51-- >>> :set -XOverloadedStrings 52 53-- | Drops the quotes/backticks around all identifiers, as if they 54-- were valid but still 'String's. 55toRegular :: DocH mod Identifier -> DocH mod String 56toRegular = fmap (\(Identifier _ _ x _) -> x) 57 58-- | Maps over 'DocIdentifier's over 'String' with potentially failing 59-- conversion using user-supplied function. If the conversion fails, 60-- the identifier is deemed to not be valid and is treated as a 61-- regular string. 62overIdentifier :: (Namespace -> String -> Maybe a) 63 -> DocH mod Identifier 64 -> DocH mod a 65overIdentifier f d = g d 66 where 67 g (DocIdentifier (Identifier ns o x e)) = case f ns x of 68 Nothing -> DocString $ renderNs ns ++ [o] ++ x ++ [e] 69 Just x' -> DocIdentifier x' 70 g DocEmpty = DocEmpty 71 g (DocAppend x x') = DocAppend (g x) (g x') 72 g (DocString x) = DocString x 73 g (DocParagraph x) = DocParagraph $ g x 74 g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x 75 g (DocModule (ModLink m x)) = DocModule (ModLink m (fmap g x)) 76 g (DocWarning x) = DocWarning $ g x 77 g (DocEmphasis x) = DocEmphasis $ g x 78 g (DocMonospaced x) = DocMonospaced $ g x 79 g (DocBold x) = DocBold $ g x 80 g (DocUnorderedList x) = DocUnorderedList $ fmap g x 81 g (DocOrderedList x) = DocOrderedList $ fmap g x 82 g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x 83 g (DocCodeBlock x) = DocCodeBlock $ g x 84 g (DocHyperlink (Hyperlink u x)) = DocHyperlink (Hyperlink u (fmap g x)) 85 g (DocPic x) = DocPic x 86 g (DocMathInline x) = DocMathInline x 87 g (DocMathDisplay x) = DocMathDisplay x 88 g (DocAName x) = DocAName x 89 g (DocProperty x) = DocProperty x 90 g (DocExamples x) = DocExamples x 91 g (DocHeader (Header l x)) = DocHeader . Header l $ g x 92 g (DocTable (Table h b)) = DocTable (Table (map (fmap g) h) (map (fmap g) b)) 93 94 95choice' :: [Parser a] -> Parser a 96choice' [] = empty 97choice' [p] = p 98choice' (p : ps) = try p <|> choice' ps 99 100parse :: Parser a -> Text -> (ParserState, a) 101parse p = either err id . parseOnly (p <* Parsec.eof) 102 where 103 err = error . ("Haddock.Parser.parse: " ++) 104 105-- | Main entry point to the parser. Appends the newline character 106-- to the input string. 107parseParas :: Maybe Package 108 -> String -- ^ String to parse 109 -> MetaDoc mod Identifier 110parseParas pkg input = case parseParasState input of 111 (state, a) -> MetaDoc { _meta = Meta { _version = parserStateSince state 112 , _package = pkg 113 } 114 , _doc = a 115 } 116 117parseParasState :: String -> (ParserState, DocH mod Identifier) 118parseParasState = parse (emptyLines *> p) . T.pack . (++ "\n") . filter (/= '\r') 119 where 120 p :: Parser (DocH mod Identifier) 121 p = docConcat <$> many (paragraph <* emptyLines) 122 123 emptyLines :: Parser () 124 emptyLines = void $ many (try (skipHorizontalSpace *> "\n")) 125 126parseParagraphs :: String -> Parser (DocH mod Identifier) 127parseParagraphs input = case parseParasState input of 128 (state, a) -> Parsec.putState state *> pure a 129 130-- | Variant of 'parseText' for 'String' instead of 'Text' 131parseString :: String -> DocH mod Identifier 132parseString = parseText . T.pack 133 134-- | Parse a text paragraph. Actually just a wrapper over 'parseParagraph' which 135-- drops leading whitespace. 136parseText :: Text -> DocH mod Identifier 137parseText = parseParagraph . T.dropWhile isSpace . T.filter (/= '\r') 138 139parseParagraph :: Text -> DocH mod Identifier 140parseParagraph = snd . parse p 141 where 142 p :: Parser (DocH mod Identifier) 143 p = docConcat <$> many (choice' [ monospace 144 , anchor 145 , identifier 146 , moduleName 147 , picture 148 , mathDisplay 149 , mathInline 150 , markdownImage 151 , markdownLink 152 , hyperlink 153 , bold 154 , emphasis 155 , encodedChar 156 , string' 157 , skipSpecialChar 158 ]) 159 160-- | Parses and processes 161-- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references> 162-- 163-- >>> parseString "A" 164-- DocString "A" 165encodedChar :: Parser (DocH mod a) 166encodedChar = "&#" *> c <* ";" 167 where 168 c = DocString . return . chr <$> num 169 num = hex <|> decimal 170 hex = ("x" <|> "X") *> hexadecimal 171 172-- | List of characters that we use to delimit any special markup. 173-- Once we have checked for any of these and tried to parse the 174-- relevant markup, we can assume they are used as regular text. 175specialChar :: [Char] 176specialChar = "_/<@\"&'`# " 177 178-- | Plain, regular parser for text. Called as one of the last parsers 179-- to ensure that we have already given a chance to more meaningful parsers 180-- before capturing their characers. 181string' :: Parser (DocH mod a) 182string' = DocString . unescape . T.unpack <$> takeWhile1_ (`notElem` specialChar) 183 where 184 unescape "" = "" 185 unescape ('\\':x:xs) = x : unescape xs 186 unescape (x:xs) = x : unescape xs 187 188-- | Skips a single special character and treats it as a plain string. 189-- This is done to skip over any special characters belonging to other 190-- elements but which were not deemed meaningful at their positions. 191skipSpecialChar :: Parser (DocH mod a) 192skipSpecialChar = DocString . return <$> Parsec.oneOf specialChar 193 194-- | Emphasis parser. 195-- 196-- >>> parseString "/Hello world/" 197-- DocEmphasis (DocString "Hello world") 198emphasis :: Parser (DocH mod Identifier) 199emphasis = DocEmphasis . parseParagraph <$> 200 disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/") 201 202-- | Bold parser. 203-- 204-- >>> parseString "__Hello world__" 205-- DocBold (DocString "Hello world") 206bold :: Parser (DocH mod Identifier) 207bold = DocBold . parseParagraph <$> disallowNewline ("__" *> takeUntil "__") 208 209disallowNewline :: Parser Text -> Parser Text 210disallowNewline = mfilter (T.all (/= '\n')) 211 212-- | Like `takeWhile`, but unconditionally take escaped characters. 213takeWhile_ :: (Char -> Bool) -> Parser Text 214takeWhile_ p = scan p_ False 215 where 216 p_ escaped c 217 | escaped = Just False 218 | not $ p c = Nothing 219 | otherwise = Just (c == '\\') 220 221-- | Like 'takeWhile1', but unconditionally take escaped characters. 222takeWhile1_ :: (Char -> Bool) -> Parser Text 223takeWhile1_ = mfilter (not . T.null) . takeWhile_ 224 225-- | Text anchors to allow for jumping around the generated documentation. 226-- 227-- >>> parseString "#Hello world#" 228-- DocAName "Hello world" 229anchor :: Parser (DocH mod a) 230anchor = DocAName . T.unpack <$> 231 ("#" *> takeWhile1_ (\x -> x /= '#' && not (isSpace x)) <* "#") 232 233-- | Monospaced strings. 234-- 235-- >>> parseString "@cruel@" 236-- DocMonospaced (DocString "cruel") 237monospace :: Parser (DocH mod Identifier) 238monospace = DocMonospaced . parseParagraph 239 <$> ("@" *> takeWhile1_ (/= '@') <* "@") 240 241-- | Module names. 242-- 243-- Note that we allow '#' and '\' to support anchors (old style anchors are of 244-- the form "SomeModule\#anchor"). 245moduleName :: Parser (DocH mod a) 246moduleName = DocModule . flip ModLink Nothing <$> ("\"" *> moduleNameString <* "\"") 247 248-- | A module name, optionally with an anchor 249-- 250moduleNameString :: Parser String 251moduleNameString = modid `maybeFollowedBy` anchor_ 252 where 253 modid = intercalate "." <$> conid `Parsec.sepBy1` "." 254 anchor_ = (++) 255 <$> (Parsec.string "#" <|> Parsec.string "\\#") 256 <*> many (Parsec.satisfy (\c -> c /= '"' && not (isSpace c))) 257 258 maybeFollowedBy pre suf = (\x -> maybe x (x ++)) <$> pre <*> optional suf 259 conid :: Parser String 260 conid = (:) 261 <$> Parsec.satisfy (\c -> isAlpha c && isUpper c) 262 <*> many conChar 263 264 conChar = Parsec.alphaNum <|> Parsec.char '_' 265 266-- | A labeled link to an indentifier, module or url using markdown 267-- syntax. 268markdownLink :: Parser (DocH mod Identifier) 269markdownLink = do 270 lbl <- markdownLinkText 271 choice' [ markdownModuleName lbl, markdownURL lbl ] 272 where 273 markdownModuleName lbl = do 274 mn <- "(" *> skipHorizontalSpace *> 275 "\"" *> moduleNameString <* "\"" 276 <* skipHorizontalSpace <* ")" 277 pure $ DocModule (ModLink mn (Just lbl)) 278 279 markdownURL lbl = do 280 target <- markdownLinkTarget 281 pure $ DocHyperlink $ Hyperlink target (Just lbl) 282 283-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify 284-- a title for the picture. 285-- 286-- >>> parseString "<<hello.png>>" 287-- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Nothing}) 288-- >>> parseString "<<hello.png world>>" 289-- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Just "world"}) 290picture :: Parser (DocH mod a) 291picture = DocPic . makeLabeled Picture 292 <$> disallowNewline ("<<" *> takeUntil ">>") 293 294-- | Inline math parser, surrounded by \\( and \\). 295-- 296-- >>> parseString "\\(\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\)" 297-- DocMathInline "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}" 298mathInline :: Parser (DocH mod a) 299mathInline = DocMathInline . T.unpack 300 <$> disallowNewline ("\\(" *> takeUntil "\\)") 301 302-- | Display math parser, surrounded by \\[ and \\]. 303-- 304-- >>> parseString "\\[\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\]" 305-- DocMathDisplay "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}" 306mathDisplay :: Parser (DocH mod a) 307mathDisplay = DocMathDisplay . T.unpack 308 <$> ("\\[" *> takeUntil "\\]") 309 310-- | Markdown image parser. As per the commonmark reference recommendation, the 311-- description text for an image converted to its a plain string representation. 312-- 313-- >>> parseString "![some /emphasis/ in a description](www.site.com)" 314-- DocPic (Picture "www.site.com" (Just "some emphasis in a description")) 315markdownImage :: Parser (DocH mod Identifier) 316markdownImage = do 317 text <- markup stringMarkup <$> ("!" *> markdownLinkText) 318 url <- markdownLinkTarget 319 pure $ DocPic (Picture url (Just text)) 320 where 321 stringMarkup = plainMarkup (const "") renderIdent 322 renderIdent (Identifier ns l c r) = renderNs ns <> [l] <> c <> [r] 323 324-- | Paragraph parser, called by 'parseParas'. 325paragraph :: Parser (DocH mod Identifier) 326paragraph = choice' [ examples 327 , table 328 , do indent <- takeIndent 329 choice' [ since 330 , unorderedList indent 331 , orderedList indent 332 , birdtracks 333 , codeblock 334 , property 335 , header 336 , textParagraphThatStartsWithMarkdownLink 337 , definitionList indent 338 , docParagraph <$> textParagraph 339 ] 340 ] 341 342-- | Provides support for grid tables. 343-- 344-- Tables are composed by an optional header and body. The header is composed by 345-- a single row. The body is composed by a non-empty list of rows. 346-- 347-- Example table with header: 348-- 349-- > +----------+----------+ 350-- > | /32bit/ | 64bit | 351-- > +==========+==========+ 352-- > | 0x0000 | @0x0000@ | 353-- > +----------+----------+ 354-- 355-- Algorithms loosely follows ideas in 356-- http://docutils.sourceforge.net/docutils/parsers/rst/tableparser.py 357-- 358table :: Parser (DocH mod Identifier) 359table = do 360 -- first we parse the first row, which determines the width of the table 361 firstRow <- parseFirstRow 362 let len = T.length firstRow 363 364 -- then we parse all consequtive rows starting and ending with + or |, 365 -- of the width `len`. 366 restRows <- many (try (parseRestRows len)) 367 368 -- Now we gathered the table block, the next step is to split the block 369 -- into cells. 370 DocTable <$> tableStepTwo len (firstRow : restRows) 371 where 372 parseFirstRow :: Parser Text 373 parseFirstRow = do 374 skipHorizontalSpace 375 cs <- takeWhile (\c -> c == '-' || c == '+') 376 377 -- upper-left and upper-right corners are `+` 378 guard (T.length cs >= 2 && 379 T.head cs == '+' && 380 T.last cs == '+') 381 382 -- trailing space 383 skipHorizontalSpace 384 _ <- Parsec.newline 385 386 return cs 387 388 parseRestRows :: Int -> Parser Text 389 parseRestRows l = do 390 skipHorizontalSpace 391 bs <- scan predicate l 392 393 -- Left and right edges are `|` or `+` 394 guard (T.length bs >= 2 && 395 (T.head bs == '|' || T.head bs == '+') && 396 (T.last bs == '|' || T.last bs == '+')) 397 398 -- trailing space 399 skipHorizontalSpace 400 _ <- Parsec.newline 401 402 return bs 403 where 404 predicate n c 405 | n <= 0 = Nothing 406 | c == '\n' = Nothing 407 | otherwise = Just (n - 1) 408 409-- Second step searchs for row of '+' and '=' characters, records it's index 410-- and changes to '=' to '-'. 411tableStepTwo 412 :: Int -- ^ width 413 -> [Text] -- ^ rows 414 -> Parser (Table (DocH mod Identifier)) 415tableStepTwo width = go 0 [] where 416 go _ left [] = tableStepThree width (reverse left) Nothing 417 go n left (r : rs) 418 | T.all (`elem` ['+', '=']) r = 419 tableStepThree width (reverse left ++ r' : rs) (Just n) 420 | otherwise = 421 go (n + 1) (r : left) rs 422 where 423 r' = T.map (\c -> if c == '=' then '-' else c) r 424 425-- Third step recognises cells in the table area, returning a list of TC, cells. 426tableStepThree 427 :: Int -- ^ width 428 -> [Text] -- ^ rows 429 -> Maybe Int -- ^ index of header separator 430 -> Parser (Table (DocH mod Identifier)) 431tableStepThree width rs hdrIndex = do 432 cells <- loop (Set.singleton (0, 0)) 433 tableStepFour rs hdrIndex cells 434 where 435 height = length rs 436 437 loop :: Set.Set (Int, Int) -> Parser [TC] 438 loop queue = case Set.minView queue of 439 Nothing -> return [] 440 Just ((y, x), queue') 441 | y + 1 >= height || x + 1 >= width -> loop queue' 442 | otherwise -> case scanRight x y of 443 Nothing -> loop queue' 444 Just (x2, y2) -> do 445 let tc = TC y x y2 x2 446 fmap (tc :) $ loop $ queue' `Set.union` Set.fromList 447 [(y, x2), (y2, x), (y2, x2)] 448 449 -- scan right looking for +, then try scan down 450 -- 451 -- do we need to record + saw on the way left and down? 452 scanRight :: Int -> Int -> Maybe (Int, Int) 453 scanRight x y = go (x + 1) where 454 bs = rs !! y 455 go x' | x' >= width = fail "overflow right " 456 | T.index bs x' == '+' = scanDown x y x' <|> go (x' + 1) 457 | T.index bs x' == '-' = go (x' + 1) 458 | otherwise = fail $ "not a border (right) " ++ show (x,y,x') 459 460 -- scan down looking for + 461 scanDown :: Int -> Int -> Int -> Maybe (Int, Int) 462 scanDown x y x2 = go (y + 1) where 463 go y' | y' >= height = fail "overflow down" 464 | T.index (rs !! y') x2 == '+' = scanLeft x y x2 y' <|> go (y' + 1) 465 | T.index (rs !! y') x2 == '|' = go (y' + 1) 466 | otherwise = fail $ "not a border (down) " ++ show (x,y,x2,y') 467 468 -- check that at y2 x..x2 characters are '+' or '-' 469 scanLeft :: Int -> Int -> Int -> Int -> Maybe (Int, Int) 470 scanLeft x y x2 y2 471 | all (\x' -> T.index bs x' `elem` ['+', '-']) [x..x2] = scanUp x y x2 y2 472 | otherwise = fail $ "not a border (left) " ++ show (x,y,x2,y2) 473 where 474 bs = rs !! y2 475 476 -- check that at y2 x..x2 characters are '+' or '-' 477 scanUp :: Int -> Int -> Int -> Int -> Maybe (Int, Int) 478 scanUp x y x2 y2 479 | all (\y' -> T.index (rs !! y') x `elem` ['+', '|']) [y..y2] = return (x2, y2) 480 | otherwise = fail $ "not a border (up) " ++ show (x,y,x2,y2) 481 482-- | table cell: top left bottom right 483data TC = TC !Int !Int !Int !Int 484 deriving Show 485 486tcXS :: TC -> [Int] 487tcXS (TC _ x _ x2) = [x, x2] 488 489tcYS :: TC -> [Int] 490tcYS (TC y _ y2 _) = [y, y2] 491 492-- | Fourth step. Given the locations of cells, forms 'Table' structure. 493tableStepFour :: [Text] -> Maybe Int -> [TC] -> Parser (Table (DocH mod Identifier)) 494tableStepFour rs hdrIndex cells = case hdrIndex of 495 Nothing -> return $ Table [] rowsDoc 496 Just i -> case elemIndex i yTabStops of 497 Nothing -> return $ Table [] rowsDoc 498 Just i' -> return $ uncurry Table $ splitAt i' rowsDoc 499 where 500 xTabStops = sortNub $ concatMap tcXS cells 501 yTabStops = sortNub $ concatMap tcYS cells 502 503 sortNub :: Ord a => [a] -> [a] 504 sortNub = Set.toList . Set.fromList 505 506 init' :: [a] -> [a] 507 init' [] = [] 508 init' [_] = [] 509 init' (x : xs) = x : init' xs 510 511 rowsDoc = (fmap . fmap) parseParagraph rows 512 513 rows = map makeRow (init' yTabStops) 514 where 515 makeRow y = TableRow $ mapMaybe (makeCell y) cells 516 makeCell y (TC y' x y2 x2) 517 | y /= y' = Nothing 518 | otherwise = Just $ TableCell xts yts (extract (x + 1) (y + 1) (x2 - 1) (y2 - 1)) 519 where 520 xts = length $ P.takeWhile (< x2) $ dropWhile (< x) xTabStops 521 yts = length $ P.takeWhile (< y2) $ dropWhile (< y) yTabStops 522 523 -- extract cell contents given boundaries 524 extract :: Int -> Int -> Int -> Int -> Text 525 extract x y x2 y2 = T.intercalate "\n" 526 [ T.stripEnd $ T.stripStart $ T.take (x2 - x + 1) $ T.drop x $ rs !! y' 527 | y' <- [y .. y2] 528 ] 529 530-- | Parse \@since annotations. 531since :: Parser (DocH mod a) 532since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince >> return DocEmpty 533 where 534 version = decimal `Parsec.sepBy1` "." 535 536-- | Headers inside the comment denoted with @=@ signs, up to 6 levels 537-- deep. 538-- 539-- >>> snd <$> parseOnly header "= Hello" 540-- Right (DocHeader (Header {headerLevel = 1, headerTitle = DocString "Hello"})) 541-- >>> snd <$> parseOnly header "== World" 542-- Right (DocHeader (Header {headerLevel = 2, headerTitle = DocString "World"})) 543header :: Parser (DocH mod Identifier) 544header = do 545 let psers = map (string . flip T.replicate "=") [6, 5 .. 1] 546 pser = Parsec.choice psers 547 depth <- T.length <$> pser 548 line <- parseText <$> (skipHorizontalSpace *> nonEmptyLine) 549 rest <- try paragraph <|> return DocEmpty 550 return $ DocHeader (Header depth line) `docAppend` rest 551 552textParagraph :: Parser (DocH mod Identifier) 553textParagraph = parseText . T.intercalate "\n" <$> some nonEmptyLine 554 555textParagraphThatStartsWithMarkdownLink :: Parser (DocH mod Identifier) 556textParagraphThatStartsWithMarkdownLink = docParagraph <$> (docAppend <$> markdownLink <*> optionalTextParagraph) 557 where 558 optionalTextParagraph :: Parser (DocH mod Identifier) 559 optionalTextParagraph = choice' [ docAppend <$> whitespace <*> textParagraph 560 , pure DocEmpty ] 561 562 whitespace :: Parser (DocH mod a) 563 whitespace = DocString <$> (f <$> takeHorizontalSpace <*> optional "\n") 564 where 565 f :: Text -> Maybe Text -> String 566 f xs (fromMaybe "" -> x) 567 | T.null (xs <> x) = "" 568 | otherwise = " " 569 570-- | Parses unordered (bullet) lists. 571unorderedList :: Text -> Parser (DocH mod Identifier) 572unorderedList indent = DocUnorderedList <$> p 573 where 574 p = ("*" <|> "-") *> innerList indent p 575 576-- | Parses ordered lists (numbered or dashed). 577orderedList :: Text -> Parser (DocH mod Identifier) 578orderedList indent = DocOrderedList <$> p 579 where 580 p = (paren <|> dot) *> innerList indent p 581 dot = (decimal :: Parser Int) <* "." 582 paren = "(" *> decimal <* ")" 583 584-- | Generic function collecting any further lines belonging to the 585-- list entry and recursively collecting any further lists in the 586-- same paragraph. Usually used as 587-- 588-- > someListFunction = listBeginning *> innerList someListFunction 589innerList :: Text -> Parser [DocH mod Identifier] 590 -> Parser [DocH mod Identifier] 591innerList indent item = do 592 c <- takeLine 593 (cs, items) <- more indent item 594 let contents = docParagraph . parseText . dropNLs . T.unlines $ c : cs 595 return $ case items of 596 Left p -> [contents `docAppend` p] 597 Right i -> contents : i 598 599-- | Parses definition lists. 600definitionList :: Text -> Parser (DocH mod Identifier) 601definitionList indent = DocDefList <$> p 602 where 603 p = do 604 label <- "[" *> (parseParagraph <$> takeWhile1_ (`notElem` ("]\n" :: String))) <* ("]" <* optional ":") 605 c <- takeLine 606 (cs, items) <- more indent p 607 let contents = parseText . dropNLs . T.unlines $ c : cs 608 return $ case items of 609 Left x -> [(label, contents `docAppend` x)] 610 Right i -> (label, contents) : i 611 612-- | Drops all trailing newlines. 613dropNLs :: Text -> Text 614dropNLs = T.dropWhileEnd (== '\n') 615 616-- | Main worker for 'innerList' and 'definitionList'. 617-- We need the 'Either' here to be able to tell in the respective functions 618-- whether we're dealing with the next list or a nested paragraph. 619more :: Monoid a => Text -> Parser a 620 -> Parser ([Text], Either (DocH mod Identifier) a) 621more indent item = choice' [ innerParagraphs indent 622 , moreListItems indent item 623 , moreContent indent item 624 , pure ([], Right mempty) 625 ] 626 627-- | Used by 'innerList' and 'definitionList' to parse any nested paragraphs. 628innerParagraphs :: Text 629 -> Parser ([Text], Either (DocH mod Identifier) a) 630innerParagraphs indent = (,) [] . Left <$> ("\n" *> indentedParagraphs indent) 631 632-- | Attempts to fetch the next list if possibly. Used by 'innerList' and 633-- 'definitionList' to recursively grab lists that aren't separated by a whole 634-- paragraph. 635moreListItems :: Text -> Parser a 636 -> Parser ([Text], Either (DocH mod Identifier) a) 637moreListItems indent item = (,) [] . Right <$> indentedItem 638 where 639 indentedItem = string indent *> Parsec.spaces *> item 640 641-- | Helper for 'innerList' and 'definitionList' which simply takes 642-- a line of text and attempts to parse more list content with 'more'. 643moreContent :: Monoid a => Text -> Parser a 644 -> Parser ([Text], Either (DocH mod Identifier) a) 645moreContent indent item = first . (:) <$> nonEmptyLine <*> more indent item 646 647-- | Parses an indented paragraph. 648-- The indentation is 4 spaces. 649indentedParagraphs :: Text -> Parser (DocH mod Identifier) 650indentedParagraphs indent = 651 (T.unpack . T.concat <$> dropFrontOfPara indent') >>= parseParagraphs 652 where 653 indent' = string $ indent <> " " 654 655-- | Grab as many fully indented paragraphs as we can. 656dropFrontOfPara :: Parser Text -> Parser [Text] 657dropFrontOfPara sp = do 658 currentParagraph <- some (try (sp *> takeNonEmptyLine)) 659 followingParagraphs <- 660 choice' [ skipHorizontalSpace *> nextPar -- we have more paragraphs to take 661 , skipHorizontalSpace *> nlList -- end of the ride, remember the newline 662 , Parsec.eof *> return [] -- nothing more to take at all 663 ] 664 return (currentParagraph ++ followingParagraphs) 665 where 666 nextPar = (++) <$> nlList <*> dropFrontOfPara sp 667 nlList = "\n" *> return ["\n"] 668 669nonSpace :: Text -> Parser Text 670nonSpace xs 671 | T.all isSpace xs = fail "empty line" 672 | otherwise = return xs 673 674-- | Takes a non-empty, not fully whitespace line. 675-- 676-- Doesn't discard the trailing newline. 677takeNonEmptyLine :: Parser Text 678takeNonEmptyLine = do 679 l <- takeWhile1 (/= '\n') >>= nonSpace 680 _ <- "\n" 681 pure (l <> "\n") 682 683-- | Takes indentation of first non-empty line. 684-- 685-- More precisely: skips all whitespace-only lines and returns indentation 686-- (horizontal space, might be empty) of that non-empty line. 687takeIndent :: Parser Text 688takeIndent = do 689 indent <- takeHorizontalSpace 690 choice' [ "\n" *> takeIndent 691 , return indent 692 ] 693 694-- | Blocks of text of the form: 695-- 696-- >> foo 697-- >> bar 698-- >> baz 699-- 700birdtracks :: Parser (DocH mod a) 701birdtracks = DocCodeBlock . DocString . T.unpack . T.intercalate "\n" . stripSpace <$> some line 702 where 703 line = try (skipHorizontalSpace *> ">" *> takeLine) 704 705stripSpace :: [Text] -> [Text] 706stripSpace = fromMaybe <*> mapM strip' 707 where 708 strip' t = case T.uncons t of 709 Nothing -> Just "" 710 Just (' ',t') -> Just t' 711 _ -> Nothing 712 713-- | Parses examples. Examples are a paragraph level entitity (separated by an empty line). 714-- Consecutive examples are accepted. 715examples :: Parser (DocH mod a) 716examples = DocExamples <$> (many (try (skipHorizontalSpace *> "\n")) *> go) 717 where 718 go :: Parser [Example] 719 go = do 720 prefix <- takeHorizontalSpace <* ">>>" 721 expr <- takeLine 722 (rs, es) <- resultAndMoreExamples 723 return (makeExample prefix expr rs : es) 724 where 725 resultAndMoreExamples :: Parser ([Text], [Example]) 726 resultAndMoreExamples = choice' [ moreExamples, result, pure ([], []) ] 727 where 728 moreExamples :: Parser ([Text], [Example]) 729 moreExamples = (,) [] <$> go 730 731 result :: Parser ([Text], [Example]) 732 result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples 733 734 makeExample :: Text -> Text -> [Text] -> Example 735 makeExample prefix expression res = 736 Example (T.unpack (T.strip expression)) result 737 where 738 result = map (T.unpack . substituteBlankLine . tryStripPrefix) res 739 740 tryStripPrefix xs = fromMaybe xs (T.stripPrefix prefix xs) 741 742 substituteBlankLine "<BLANKLINE>" = "" 743 substituteBlankLine xs = xs 744 745nonEmptyLine :: Parser Text 746nonEmptyLine = try (mfilter (T.any (not . isSpace)) takeLine) 747 748takeLine :: Parser Text 749takeLine = try (takeWhile (/= '\n') <* endOfLine) 750 751endOfLine :: Parser () 752endOfLine = void "\n" <|> Parsec.eof 753 754-- | Property parser. 755-- 756-- >>> snd <$> parseOnly property "prop> hello world" 757-- Right (DocProperty "hello world") 758property :: Parser (DocH mod a) 759property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (/= '\n')) 760 761-- | 762-- Paragraph level codeblock. Anything between the two delimiting \@ is parsed 763-- for markup. 764codeblock :: Parser (DocH mod Identifier) 765codeblock = 766 DocCodeBlock . parseParagraph . dropSpaces 767 <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@") 768 where 769 dropSpaces xs = 770 case splitByNl xs of 771 [] -> xs 772 ys -> case T.uncons (last ys) of 773 Just (' ',_) -> case mapM dropSpace ys of 774 Nothing -> xs 775 Just zs -> T.intercalate "\n" zs 776 _ -> xs 777 778 -- This is necessary because ‘lines’ swallows up a trailing newline 779 -- and we lose information about whether the last line belongs to @ or to 780 -- text which we need to decide whether we actually want to be dropping 781 -- anything at all. 782 splitByNl = unfoldr (\x -> case T.uncons x of 783 Just ('\n',x') -> Just (T.span (/= '\n') x') 784 _ -> Nothing) 785 . ("\n" <>) 786 787 dropSpace t = case T.uncons t of 788 Nothing -> Just "" 789 Just (' ',t') -> Just t' 790 _ -> Nothing 791 792 block' = scan p False 793 where 794 p isNewline c 795 | isNewline && c == '@' = Nothing 796 | isNewline && isSpace c = Just isNewline 797 | otherwise = Just $ c == '\n' 798 799hyperlink :: Parser (DocH mod Identifier) 800hyperlink = choice' [ angleBracketLink, autoUrl ] 801 802angleBracketLink :: Parser (DocH mod a) 803angleBracketLink = 804 DocHyperlink . makeLabeled (\s -> Hyperlink s . fmap DocString) 805 <$> disallowNewline ("<" *> takeUntil ">") 806 807-- | The text for a markdown link, enclosed in square brackets. 808markdownLinkText :: Parser (DocH mod Identifier) 809markdownLinkText = parseParagraph . T.strip <$> ("[" *> takeUntil "]") 810 811-- | The target for a markdown link, enclosed in parenthesis. 812markdownLinkTarget :: Parser String 813markdownLinkTarget = whitespace *> url 814 where 815 whitespace :: Parser () 816 whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace) 817 818 url :: Parser String 819 url = rejectWhitespace (decode <$> ("(" *> takeUntil ")")) 820 821 rejectWhitespace :: MonadPlus m => m String -> m String 822 rejectWhitespace = mfilter (all (not . isSpace)) 823 824 decode :: Text -> String 825 decode = T.unpack . removeEscapes 826 827-- | Looks for URL-like things to automatically hyperlink even if they 828-- weren't marked as links. 829autoUrl :: Parser (DocH mod a) 830autoUrl = mkLink <$> url 831 where 832 url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (not . isSpace) 833 834 mkLink :: Text -> DocH mod a 835 mkLink s = case T.unsnoc s of 836 Just (xs,x) | x `elem` (",.!?" :: String) -> DocHyperlink (mkHyperlink xs) `docAppend` DocString [x] 837 _ -> DocHyperlink (mkHyperlink s) 838 839 mkHyperlink :: Text -> Hyperlink (DocH mod a) 840 mkHyperlink lnk = Hyperlink (T.unpack lnk) Nothing 841 842 843-- | Parses identifiers with help of 'parseValid'. 844identifier :: Parser (DocH mod Identifier) 845identifier = DocIdentifier <$> parseValid 846