1{-# LANGUAGE OverloadedStrings #-}
2{-# LANGUAGE BangPatterns #-}
3module Commonmark.TokParsers
4  ( satisfyTok
5  , satisfyWord
6  , anyTok
7  , anySymbol
8  , symbol
9  , whitespace
10  , lineEnd
11  , spaceTok
12  , oneOfToks
13  , noneOfToks
14  , gobbleSpaces
15  , gobbleUpToSpaces
16  , withRaw
17  , hasType
18  , textIs
19  , blankLine
20  , restOfLine
21  , isOneOfCI
22  , nonindentSpaces
23  , skipManyTill
24  , skipWhile
25  )
26  where
27import           Control.Monad   (mzero, void)
28import           Data.Text       (Text)
29import qualified Data.Text       as T
30import           Text.Parsec
31import           Text.Parsec.Pos (updatePosString)
32import           Commonmark.Tokens
33
34-- | Parses a single 'Tok' satisfying a predicate.
35satisfyTok :: Monad m => (Tok -> Bool) -> ParsecT [Tok] s m Tok
36satisfyTok f = tokenPrim (T.unpack . tokContents) updatePos matcher
37  where matcher t | f t       = Just t
38                  | otherwise = Nothing
39        updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
40        updatePos _spos _ (Tok _ !pos _ : _) = pos
41        updatePos !spos (Tok _ _pos !t) []    =
42          updatePosString spos (T.unpack t)
43{-# INLINE satisfyTok #-}
44
45-- | Parses any 'Tok'.
46anyTok :: Monad m => ParsecT [Tok] s m Tok
47anyTok = satisfyTok (const True)
48{-# INLINE anyTok #-}
49
50-- | Parses any 'Symbol' 'Tok'.
51anySymbol :: Monad m => ParsecT [Tok] s m Tok
52anySymbol = satisfyTok (\t -> case tokType t of
53                                    Symbol _ -> True
54                                    _        -> False)
55{-# INLINE anySymbol #-}
56
57-- | Parses a 'Symbol' with character @c@.
58symbol ::  Monad m => Char -> ParsecT [Tok] s m Tok
59symbol c = satisfyTok (hasType (Symbol c))
60{-# INLINE symbol #-}
61
62-- | Parses a 'Tok' with one of the listed types.
63oneOfToks ::  Monad m => [TokType] -> ParsecT [Tok] s m Tok
64oneOfToks toktypes = satisfyTok (hasTypeIn toktypes)
65{-# INLINE oneOfToks #-}
66
67-- | Parses a 'Tok' with none of the listed types.
68noneOfToks ::  Monad m => [TokType] -> ParsecT [Tok] s m Tok
69noneOfToks toktypes = satisfyTok (not . hasTypeIn toktypes)
70{-# INLINE noneOfToks #-}
71
72-- | Parses one or more whitespace 'Tok's.
73whitespace ::  Monad m => ParsecT [Tok] s m [Tok]
74whitespace = many1 $ satisfyTok (\t -> case tokType t of
75                                         Spaces  -> True
76                                         LineEnd -> True
77                                         _       -> False)
78{-# INLINE whitespace #-}
79
80-- | Parses a 'LineEnd' token.
81lineEnd ::  Monad m => ParsecT [Tok] s m Tok
82lineEnd = satisfyTok (hasType LineEnd)
83{-# INLINE lineEnd #-}
84
85-- | Parses a 'Spaces' token.
86spaceTok :: Monad m => ParsecT [Tok] s m Tok
87spaceTok = satisfyTok (hasType Spaces)
88{-# INLINE spaceTok #-}
89
90-- | Parses a 'WordChars' token matching a predicate.
91satisfyWord ::  Monad m => (Text -> Bool) -> ParsecT [Tok] s m Tok
92satisfyWord f = satisfyTok (\t -> hasType WordChars t && textIs f t)
93{-# INLINE satisfyWord #-}
94
95-- | Parses exactly @n@ spaces. If tabs are encountered,
96-- they are split into spaces before being consumed; so
97-- a tab may be partially consumed by this parser.
98gobbleSpaces :: Monad m => Int -> ParsecT [Tok] u m Int
99gobbleSpaces 0 = return 0
100gobbleSpaces n = try $ gobble' True n
101{-# INLINE gobbleSpaces #-}
102
103-- | Parses up to @n@ spaces.
104gobbleUpToSpaces :: Monad m => Int -> ParsecT [Tok] u m Int
105gobbleUpToSpaces 0 = return 0
106gobbleUpToSpaces n = gobble' False n
107{-# INLINE gobbleUpToSpaces #-}
108
109gobble' :: Monad m => Bool -> Int -> ParsecT [Tok] u m Int
110gobble' requireAll numspaces
111  | numspaces >= 1 = (do
112    Tok Spaces pos _ <- satisfyTok (hasType Spaces)
113    pos' <- getPosition
114    case sourceColumn pos' - sourceColumn pos of
115         n | n < numspaces  -> (+ n) <$> gobble' requireAll (numspaces - n)
116           | n == numspaces -> return $! n
117           | otherwise      -> do
118               let newpos = incSourceColumn pos numspaces
119               let newtok = Tok Spaces newpos
120                      (T.replicate (n - numspaces) " ")
121               getInput >>= setInput . (newtok:)
122               setPosition $ newpos
123               return $! numspaces)
124    <|> if requireAll
125           then mzero
126           else return 0
127  | otherwise     = return 0
128{-# INLINE gobble' #-}
129
130-- | Applies a parser and returns its value (if successful)
131-- plus a list of the raw tokens parsed.
132withRaw :: Monad m => ParsecT [Tok] s m a -> ParsecT [Tok] s m (a, [Tok])
133withRaw parser = do
134  toks <- getInput
135  res <- parser
136  newpos <- getPosition
137  let getrawtoks (t:ts)
138        | tokPos t < newpos = t : getrawtoks ts
139      getrawtoks _ = []
140  let rawtoks = getrawtoks toks
141  return (res, rawtoks)
142{-# INLINE withRaw #-}
143
144-- | Filters tokens of a certain type.
145hasType :: TokType -> Tok -> Bool
146hasType ty (Tok ty' _ _) = ty == ty'
147{-# INLINE hasType #-}
148
149hasTypeIn :: [TokType] -> Tok -> Bool
150hasTypeIn tys (Tok ty' _ _) = ty' `elem` tys
151
152-- | Filters tokens with certain contents.
153textIs :: (Text -> Bool) -> Tok -> Bool
154textIs f (Tok _ _ t) = f t
155{-# INLINE textIs #-}
156
157-- | Gobble up to 3 spaces (may be part of a tab).
158nonindentSpaces :: Monad m => ParsecT [Tok] u m ()
159nonindentSpaces = void $ gobbleUpToSpaces 3
160{-# INLINE nonindentSpaces #-}
161
162-- | Case-insensitive membership in a list of 'Text's.
163isOneOfCI :: [Text] -> Text -> Bool
164isOneOfCI ts t = T.toLower t `elem` ts
165{-# INLINE isOneOfCI #-}
166
167-- | Apply @p@ many times until @stop@ succeeds, discarding results.
168skipManyTill :: ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m ()
169skipManyTill p stop = scan
170    where scan = (() <$ stop) <|> (p >> scan)
171{-# INLINE skipManyTill #-}
172
173-- | Efficiently skip 'Tok's satisfying a certain condition.
174skipWhile :: Monad m => (Tok -> Bool) -> ParsecT [Tok] u m ()
175skipWhile f = skipMany (satisfyTok f)
176{-# INLINE skipWhile #-}
177
178-- | Parse optional spaces and an endline.
179blankLine :: Monad m => ParsecT [Tok] s m ()
180blankLine = try $ do
181  skipWhile (hasType Spaces)
182  void lineEnd
183{-# INLINE blankLine #-}
184
185-- | Efficiently parse the remaining tokens on a line,
186-- including the LineEnd (if any).
187restOfLine :: Monad m => ParsecT [Tok] s m [Tok]
188restOfLine = go
189  where
190   go = option [] $ do
191     !tok <- anyTok
192     case tokType tok of
193       LineEnd -> return [tok]
194       _       -> (tok:) <$> go
195{-# INLINE restOfLine #-}
196