1{-# LANGUAGE GeneralizedNewtypeDeriving #-}
2{-# LANGUAGE FlexibleContexts #-}
3{-# LANGUAGE FlexibleInstances #-}
4{-# LANGUAGE TypeFamilies #-}
5{-# LANGUAGE BangPatterns #-}
6{-# LANGUAGE TypeSynonymInstances #-}
7-- |
8-- Module      :  Documentation.Haddock.Parser.Monad
9-- Copyright   :  (c) Alec Theriault 2018-2019,
10-- License     :  BSD-like
11--
12-- Maintainer  :  haddock@projects.haskell.org
13-- Stability   :  experimental
14-- Portability :  portable
15--
16-- Defines the Parsec monad over which all parsing is done and also provides
17-- more efficient versions of the usual parsec combinator functions (but
18-- specialized to 'Text').
19
20module Documentation.Haddock.Parser.Monad where
21
22import qualified Text.Parsec.Char as Parsec
23import qualified Text.Parsec as Parsec
24import           Text.Parsec.Pos             ( updatePosChar )
25import           Text.Parsec                 ( State(..)
26                                             , getParserState, setParserState )
27
28import qualified Data.Text as T
29import           Data.Text                   ( Text )
30
31import           Control.Monad               ( mfilter )
32import           Data.String                 ( IsString(..) )
33import           Data.Bits                   ( Bits(..) )
34import           Data.Char                   ( ord )
35import           Data.List                   ( foldl' )
36import           Control.Applicative as App
37
38import           Documentation.Haddock.Types ( Version )
39
40import           Prelude hiding (takeWhile)
41import           CompatPrelude
42
43-- | The only bit of information we really care about truding along with us
44-- through parsing is the version attached to a @\@since@ annotation - if
45-- the doc even contained one.
46newtype ParserState = ParserState {
47  parserStateSince :: Maybe Version
48} deriving (Eq, Show)
49
50initialParserState :: ParserState
51initialParserState = ParserState Nothing
52
53setSince :: Version -> Parser ()
54setSince since = Parsec.modifyState (\st -> st{ parserStateSince = Just since })
55
56type Parser = Parsec.Parsec Text ParserState
57
58instance (a ~ Text) => IsString (Parser a) where
59  fromString = fmap T.pack . Parsec.string
60
61parseOnly :: Parser a -> Text -> Either String (ParserState, a)
62parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of
63                  Left e -> Left (show e)
64                  Right (x,s) -> Right (s,x)
65  where p' = (,) <$> p <*> Parsec.getState
66
67-- | Always succeeds, but returns 'Nothing' if at the end of input. Does not
68-- consume input.
69--
70-- Equivalent to @Parsec.optionMaybe . Parsec.lookAhead $ Parsec.anyChar@, but
71-- more efficient.
72peekChar :: Parser (Maybe Char)
73peekChar = headOpt . stateInput <$> getParserState
74  where headOpt t | T.null t = Nothing
75                  | otherwise = Just (T.head t)
76{-# INLINE peekChar #-}
77
78-- | Fails if at the end of input. Does not consume input.
79--
80-- Equivalent to @Parsec.lookAhead Parsec.anyChar@, but more efficient.
81peekChar' :: Parser Char
82peekChar' = headFail . stateInput =<< getParserState
83  where headFail t | T.null t = Parsec.parserFail "peekChar': reached EOF"
84                   | otherwise = App.pure (T.head t)
85{-# INLINE peekChar' #-}
86
87-- | Parses the given string. Returns the parsed string.
88--
89-- Equivalent to @Parsec.string (T.unpack t) $> t@, but more efficient.
90string :: Text -> Parser Text
91string t = do
92  s@State{ stateInput = inp, statePos = pos } <- getParserState
93  case T.stripPrefix t inp of
94    Nothing -> Parsec.parserFail "string: Failed to match the input string"
95    Just inp' ->
96      let pos' = T.foldl updatePosChar pos t
97          s' = s{ stateInput = inp', statePos = pos' }
98      in setParserState s' $> t
99
100-- | Keep matching characters as long as the predicate function holds (and
101-- return them).
102--
103-- Equivalent to @fmap T.pack . Parsec.many@, but more efficient.
104takeWhile :: (Char -> Bool) -> Parser Text
105takeWhile f = do
106  s@State{ stateInput = inp, statePos = pos } <- getParserState
107  let (t, inp') = T.span f inp
108      pos' = T.foldl updatePosChar pos t
109      s' = s{ stateInput = inp', statePos = pos' }
110  setParserState s' $> t
111
112-- | Like 'takeWhile', but fails if no characters matched.
113--
114-- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient.
115takeWhile1 :: (Char -> Bool) -> Parser Text
116takeWhile1 = mfilter (not . T.null) . takeWhile
117
118-- | Scan the input text, accumulating characters as long as the scanning
119-- function returns true.
120scan :: (s -> Char -> Maybe s) -- ^ scan function
121     -> s                      -- ^ initial state
122     -> Parser Text
123scan f st = do
124  s@State{ stateInput = inp, statePos = pos } <- getParserState
125  go inp st pos 0 $ \inp' pos' n ->
126    let s' = s{ Parsec.stateInput = inp', Parsec.statePos = pos' }
127    in setParserState s' $> T.take n inp
128  where
129    go inp s !pos !n cont
130      = case T.uncons inp of
131          Nothing -> cont inp pos n        -- ran out of input
132          Just (c, inp') ->
133             case f s c of
134               Nothing -> cont inp pos n   -- scan function failed
135               Just s' -> go inp' s' (updatePosChar pos c) (n+1) cont
136
137
138-- | Parse a decimal number.
139decimal :: Integral a => Parser a
140decimal = foldl' step 0 `fmap` Parsec.many1 Parsec.digit
141  where step a c = a * 10 + fromIntegral (ord c - 48)
142
143-- | Parse a hexadecimal number.
144hexadecimal :: (Integral a, Bits a) => Parser a
145hexadecimal = foldl' step 0 `fmap` Parsec.many1 Parsec.hexDigit
146  where
147  step a c | w >= 48 && w <= 57  = (a `shiftL` 4) .|. fromIntegral (w - 48)
148           | w >= 97             = (a `shiftL` 4) .|. fromIntegral (w - 87)
149           | otherwise           = (a `shiftL` 4) .|. fromIntegral (w - 55)
150    where w = ord c
151