1{-# LANGUAGE GeneralizedNewtypeDeriving #-} 2{-# LANGUAGE FlexibleContexts #-} 3{-# LANGUAGE FlexibleInstances #-} 4{-# LANGUAGE TypeFamilies #-} 5{-# LANGUAGE BangPatterns #-} 6{-# LANGUAGE TypeSynonymInstances #-} 7-- | 8-- Module : Documentation.Haddock.Parser.Monad 9-- Copyright : (c) Alec Theriault 2018-2019, 10-- License : BSD-like 11-- 12-- Maintainer : haddock@projects.haskell.org 13-- Stability : experimental 14-- Portability : portable 15-- 16-- Defines the Parsec monad over which all parsing is done and also provides 17-- more efficient versions of the usual parsec combinator functions (but 18-- specialized to 'Text'). 19 20module Documentation.Haddock.Parser.Monad where 21 22import qualified Text.Parsec.Char as Parsec 23import qualified Text.Parsec as Parsec 24import Text.Parsec.Pos ( updatePosChar ) 25import Text.Parsec ( State(..) 26 , getParserState, setParserState ) 27 28import qualified Data.Text as T 29import Data.Text ( Text ) 30 31import Control.Monad ( mfilter ) 32import Data.String ( IsString(..) ) 33import Data.Bits ( Bits(..) ) 34import Data.Char ( ord ) 35import Data.List ( foldl' ) 36import Control.Applicative as App 37 38import Documentation.Haddock.Types ( Version ) 39 40import Prelude hiding (takeWhile) 41import CompatPrelude 42 43-- | The only bit of information we really care about truding along with us 44-- through parsing is the version attached to a @\@since@ annotation - if 45-- the doc even contained one. 46newtype ParserState = ParserState { 47 parserStateSince :: Maybe Version 48} deriving (Eq, Show) 49 50initialParserState :: ParserState 51initialParserState = ParserState Nothing 52 53setSince :: Version -> Parser () 54setSince since = Parsec.modifyState (\st -> st{ parserStateSince = Just since }) 55 56type Parser = Parsec.Parsec Text ParserState 57 58instance (a ~ Text) => IsString (Parser a) where 59 fromString = fmap T.pack . Parsec.string 60 61parseOnly :: Parser a -> Text -> Either String (ParserState, a) 62parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of 63 Left e -> Left (show e) 64 Right (x,s) -> Right (s,x) 65 where p' = (,) <$> p <*> Parsec.getState 66 67-- | Always succeeds, but returns 'Nothing' if at the end of input. Does not 68-- consume input. 69-- 70-- Equivalent to @Parsec.optionMaybe . Parsec.lookAhead $ Parsec.anyChar@, but 71-- more efficient. 72peekChar :: Parser (Maybe Char) 73peekChar = headOpt . stateInput <$> getParserState 74 where headOpt t | T.null t = Nothing 75 | otherwise = Just (T.head t) 76{-# INLINE peekChar #-} 77 78-- | Fails if at the end of input. Does not consume input. 79-- 80-- Equivalent to @Parsec.lookAhead Parsec.anyChar@, but more efficient. 81peekChar' :: Parser Char 82peekChar' = headFail . stateInput =<< getParserState 83 where headFail t | T.null t = Parsec.parserFail "peekChar': reached EOF" 84 | otherwise = App.pure (T.head t) 85{-# INLINE peekChar' #-} 86 87-- | Parses the given string. Returns the parsed string. 88-- 89-- Equivalent to @Parsec.string (T.unpack t) $> t@, but more efficient. 90string :: Text -> Parser Text 91string t = do 92 s@State{ stateInput = inp, statePos = pos } <- getParserState 93 case T.stripPrefix t inp of 94 Nothing -> Parsec.parserFail "string: Failed to match the input string" 95 Just inp' -> 96 let pos' = T.foldl updatePosChar pos t 97 s' = s{ stateInput = inp', statePos = pos' } 98 in setParserState s' $> t 99 100-- | Keep matching characters as long as the predicate function holds (and 101-- return them). 102-- 103-- Equivalent to @fmap T.pack . Parsec.many@, but more efficient. 104takeWhile :: (Char -> Bool) -> Parser Text 105takeWhile f = do 106 s@State{ stateInput = inp, statePos = pos } <- getParserState 107 let (t, inp') = T.span f inp 108 pos' = T.foldl updatePosChar pos t 109 s' = s{ stateInput = inp', statePos = pos' } 110 setParserState s' $> t 111 112-- | Like 'takeWhile', but fails if no characters matched. 113-- 114-- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient. 115takeWhile1 :: (Char -> Bool) -> Parser Text 116takeWhile1 = mfilter (not . T.null) . takeWhile 117 118-- | Scan the input text, accumulating characters as long as the scanning 119-- function returns true. 120scan :: (s -> Char -> Maybe s) -- ^ scan function 121 -> s -- ^ initial state 122 -> Parser Text 123scan f st = do 124 s@State{ stateInput = inp, statePos = pos } <- getParserState 125 go inp st pos 0 $ \inp' pos' n -> 126 let s' = s{ Parsec.stateInput = inp', Parsec.statePos = pos' } 127 in setParserState s' $> T.take n inp 128 where 129 go inp s !pos !n cont 130 = case T.uncons inp of 131 Nothing -> cont inp pos n -- ran out of input 132 Just (c, inp') -> 133 case f s c of 134 Nothing -> cont inp pos n -- scan function failed 135 Just s' -> go inp' s' (updatePosChar pos c) (n+1) cont 136 137 138-- | Parse a decimal number. 139decimal :: Integral a => Parser a 140decimal = foldl' step 0 `fmap` Parsec.many1 Parsec.digit 141 where step a c = a * 10 + fromIntegral (ord c - 48) 142 143-- | Parse a hexadecimal number. 144hexadecimal :: (Integral a, Bits a) => Parser a 145hexadecimal = foldl' step 0 `fmap` Parsec.many1 Parsec.hexDigit 146 where 147 step a c | w >= 48 && w <= 57 = (a `shiftL` 4) .|. fromIntegral (w - 48) 148 | w >= 97 = (a `shiftL` 4) .|. fromIntegral (w - 87) 149 | otherwise = (a `shiftL` 4) .|. fromIntegral (w - 55) 150 where w = ord c 151