1{-# LANGUAGE FlexibleInstances #-}
2{-# LANGUAGE FunctionalDependencies #-}
3{-# LANGUAGE MultiParamTypeClasses #-}
4{-# LANGUAGE Safe #-}
5{-# LANGUAGE TupleSections #-}
6{-# LANGUAGE UndecidableInstances #-}
7
8-- |
9-- Module      :  Text.Megaparsec.Class
10-- Copyright   :  © 2015–present Megaparsec contributors
11--                © 2007 Paolo Martini
12--                © 1999–2001 Daan Leijen
13-- License     :  FreeBSD
14--
15-- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
16-- Stability   :  experimental
17-- Portability :  portable
18--
19-- Definition of 'MonadParsec'—type class describing monads that implement
20-- the full set of primitive parsers.
21--
22-- @since 6.5.0
23module Text.Megaparsec.Class
24  ( MonadParsec (..),
25  )
26where
27
28import Control.Monad
29import Control.Monad.Identity
30import qualified Control.Monad.RWS.Lazy as L
31import qualified Control.Monad.RWS.Strict as S
32import Control.Monad.Trans
33import qualified Control.Monad.Trans.Reader as L
34import qualified Control.Monad.Trans.State.Lazy as L
35import qualified Control.Monad.Trans.State.Strict as S
36import qualified Control.Monad.Trans.Writer.Lazy as L
37import qualified Control.Monad.Trans.Writer.Strict as S
38import Data.Set (Set)
39import Text.Megaparsec.Error
40import Text.Megaparsec.State
41import Text.Megaparsec.Stream
42
43-- | Type class describing monads that implement the full set of primitive
44-- parsers.
45--
46-- __Note__ that the following primitives are “fast” and should be taken
47-- advantage of as much as possible if your aim is a fast parser: 'tokens',
48-- 'takeWhileP', 'takeWhile1P', and 'takeP'.
49class (Stream s, MonadPlus m) => MonadParsec e s m | m -> e s where
50  -- | Stop parsing and report the 'ParseError'. This is the only way to
51  -- control position of the error without manipulating parser state
52  -- manually.
53  --
54  -- @since 8.0.0
55  parseError :: ParseError s e -> m a
56
57  -- | The parser @'label' name p@ behaves as parser @p@, but whenever the
58  -- parser @p@ fails /without consuming any input/, it replaces names of
59  -- “expected” tokens with the name @name@.
60  label :: String -> m a -> m a
61
62  -- | @'hidden' p@ behaves just like parser @p@, but it doesn't show any
63  -- “expected” tokens in error message when @p@ fails.
64  --
65  -- Please use 'hidden' instead of the old @'label' ""@ idiom.
66  hidden :: m a -> m a
67  hidden = label ""
68
69  -- | The parser @'try' p@ behaves like parser @p@, except that it
70  -- backtracks the parser state when @p@ fails (either consuming input or
71  -- not).
72  --
73  -- This combinator is used whenever arbitrary look ahead is needed. Since
74  -- it pretends that it hasn't consumed any input when @p@ fails, the
75  -- ('A.<|>') combinator will try its second alternative even if the first
76  -- parser failed while consuming input.
77  --
78  -- For example, here is a parser that is supposed to parse the word “let”
79  -- or the word “lexical”:
80  --
81  -- >>> parseTest (string "let" <|> string "lexical") "lexical"
82  -- 1:1:
83  -- unexpected "lex"
84  -- expecting "let"
85  --
86  -- What happens here? The first parser consumes “le” and fails (because it
87  -- doesn't see a “t”). The second parser, however, isn't tried, since the
88  -- first parser has already consumed some input! 'try' fixes this behavior
89  -- and allows backtracking to work:
90  --
91  -- >>> parseTest (try (string "let") <|> string "lexical") "lexical"
92  -- "lexical"
93  --
94  -- 'try' also improves error messages in case of overlapping alternatives,
95  -- because Megaparsec's hint system can be used:
96  --
97  -- >>> parseTest (try (string "let") <|> string "lexical") "le"
98  -- 1:1:
99  -- unexpected "le"
100  -- expecting "let" or "lexical"
101  --
102  -- __Note__ that as of Megaparsec 4.4.0, 'Text.Megaparsec.Char.string'
103  -- backtracks automatically (see 'tokens'), so it does not need 'try'.
104  -- However, the examples above demonstrate the idea behind 'try' so well
105  -- that it was decided to keep them. You still need to use 'try' when your
106  -- alternatives are complex, composite parsers.
107  try :: m a -> m a
108
109  -- | If @p@ in @'lookAhead' p@ succeeds (either consuming input or not)
110  -- the whole parser behaves like @p@ succeeded without consuming anything
111  -- (parser state is not updated as well). If @p@ fails, 'lookAhead' has no
112  -- effect, i.e. it will fail consuming input if @p@ fails consuming input.
113  -- Combine with 'try' if this is undesirable.
114  lookAhead :: m a -> m a
115
116  -- | @'notFollowedBy' p@ only succeeds when the parser @p@ fails. This
117  -- parser /never consumes/ any input and /never modifies/ parser state. It
118  -- can be used to implement the “longest match” rule.
119  notFollowedBy :: m a -> m ()
120
121  -- | @'withRecovery' r p@ allows continue parsing even if parser @p@
122  -- fails. In this case @r@ is called with the actual 'ParseError' as its
123  -- argument. Typical usage is to return a value signifying failure to
124  -- parse this particular object and to consume some part of the input up
125  -- to the point where the next object starts.
126  --
127  -- Note that if @r@ fails, original error message is reported as if
128  -- without 'withRecovery'. In no way recovering parser @r@ can influence
129  -- error messages.
130  --
131  -- @since 4.4.0
132  withRecovery ::
133    -- | How to recover from failure
134    (ParseError s e -> m a) ->
135    -- | Original parser
136    m a ->
137    -- | Parser that can recover from failures
138    m a
139
140  -- | @'observing' p@ allows to “observe” failure of the @p@ parser, should
141  -- it happen, without actually ending parsing but instead getting the
142  -- 'ParseError' in 'Left'. On success parsed value is returned in 'Right'
143  -- as usual. Note that this primitive just allows you to observe parse
144  -- errors as they happen, it does not backtrack or change how the @p@
145  -- parser works in any way.
146  --
147  -- @since 5.1.0
148  observing ::
149    -- | The parser to run
150    m a ->
151    m (Either (ParseError s e) a)
152
153  -- | This parser only succeeds at the end of input.
154  eof :: m ()
155
156  -- | The parser @'token' test expected@ accepts a token @t@ with result
157  -- @x@ when the function @test t@ returns @'Just' x@. @expected@ specifies
158  -- the collection of expected items to report in error messages.
159  --
160  -- This is the most primitive combinator for accepting tokens. For
161  -- example, the 'Text.Megaparsec.satisfy' parser is implemented as:
162  --
163  -- > satisfy f = token testToken E.empty
164  -- >   where
165  -- >     testToken x = if f x then Just x else Nothing
166  --
167  -- __Note__: type signature of this primitive was changed in the version
168  -- /7.0.0/.
169  token ::
170    -- | Matching function for the token to parse
171    (Token s -> Maybe a) ->
172    -- | Expected items (in case of an error)
173    Set (ErrorItem (Token s)) ->
174    m a
175
176  -- | The parser @'tokens' test chk@ parses a chunk of input @chk@ and
177  -- returns it. The supplied predicate @test@ is used to check equality of
178  -- given and parsed chunks after a candidate chunk of correct length is
179  -- fetched from the stream.
180  --
181  -- This can be used for example to write 'Text.Megaparsec.chunk':
182  --
183  -- > chunk = tokens (==)
184  --
185  -- Note that beginning from Megaparsec 4.4.0, this is an auto-backtracking
186  -- primitive, which means that if it fails, it never consumes any input.
187  -- This is done to make its consumption model match how error messages for
188  -- this primitive are reported (which becomes an important thing as user
189  -- gets more control with primitives like 'withRecovery'):
190  --
191  -- >>> parseTest (string "abc") "abd"
192  -- 1:1:
193  -- unexpected "abd"
194  -- expecting "abc"
195  --
196  -- This means, in particular, that it's no longer necessary to use 'try'
197  -- with 'tokens'-based parsers, such as 'Text.Megaparsec.Char.string' and
198  -- 'Text.Megaparsec.Char.string''. This feature /does not/ affect
199  -- performance in any way.
200  tokens ::
201    -- | Predicate to check equality of chunks
202    (Tokens s -> Tokens s -> Bool) ->
203    -- | Chunk of input to match against
204    Tokens s ->
205    m (Tokens s)
206
207  -- | Parse /zero/ or more tokens for which the supplied predicate holds.
208  -- Try to use this as much as possible because for many streams the
209  -- combinator is much faster than parsers built with
210  -- 'Control.Monad.Combinators.many' and 'Text.Megaparsec.satisfy'.
211  --
212  -- The following equations should clarify the behavior:
213  --
214  -- > takeWhileP (Just "foo") f = many (satisfy f <?> "foo")
215  -- > takeWhileP Nothing      f = many (satisfy f)
216  --
217  -- The combinator never fails, although it may parse the empty chunk.
218  --
219  -- @since 6.0.0
220  takeWhileP ::
221    -- | Name for a single token in the row
222    Maybe String ->
223    -- | Predicate to use to test tokens
224    (Token s -> Bool) ->
225    -- | A chunk of matching tokens
226    m (Tokens s)
227
228  -- | Similar to 'takeWhileP', but fails if it can't parse at least one
229  -- token. Note that the combinator either succeeds or fails without
230  -- consuming any input, so 'try' is not necessary with it.
231  --
232  -- @since 6.0.0
233  takeWhile1P ::
234    -- | Name for a single token in the row
235    Maybe String ->
236    -- | Predicate to use to test tokens
237    (Token s -> Bool) ->
238    -- | A chunk of matching tokens
239    m (Tokens s)
240
241  -- | Extract the specified number of tokens from the input stream and
242  -- return them packed as a chunk of stream. If there is not enough tokens
243  -- in the stream, a parse error will be signaled. It's guaranteed that if
244  -- the parser succeeds, the requested number of tokens will be returned.
245  --
246  -- The parser is roughly equivalent to:
247  --
248  -- > takeP (Just "foo") n = count n (anySingle <?> "foo")
249  -- > takeP Nothing      n = count n anySingle
250  --
251  -- Note that if the combinator fails due to insufficient number of tokens
252  -- in the input stream, it backtracks automatically. No 'try' is necessary
253  -- with 'takeP'.
254  --
255  -- @since 6.0.0
256  takeP ::
257    -- | Name for a single token in the row
258    Maybe String ->
259    -- | How many tokens to extract
260    Int ->
261    -- | A chunk of matching tokens
262    m (Tokens s)
263
264  -- | Return the full parser state as a 'State' record.
265  getParserState :: m (State s e)
266
267  -- | @'updateParserState' f@ applies the function @f@ to the parser state.
268  updateParserState :: (State s e -> State s e) -> m ()
269
270----------------------------------------------------------------------------
271-- Lifting through MTL
272
273instance MonadParsec e s m => MonadParsec e s (L.StateT st m) where
274  parseError e = lift (parseError e)
275  label n (L.StateT m) = L.StateT $ label n . m
276  try (L.StateT m) = L.StateT $ try . m
277  lookAhead (L.StateT m) = L.StateT $ \s ->
278    (,s) . fst <$> lookAhead (m s)
279  notFollowedBy (L.StateT m) = L.StateT $ \s ->
280    notFollowedBy (fst <$> m s) >> return ((), s)
281  withRecovery r (L.StateT m) = L.StateT $ \s ->
282    withRecovery (\e -> L.runStateT (r e) s) (m s)
283  observing (L.StateT m) = L.StateT $ \s ->
284    fixs s <$> observing (m s)
285  eof = lift eof
286  token test mt = lift (token test mt)
287  tokens e ts = lift (tokens e ts)
288  takeWhileP l f = lift (takeWhileP l f)
289  takeWhile1P l f = lift (takeWhile1P l f)
290  takeP l n = lift (takeP l n)
291  getParserState = lift getParserState
292  updateParserState f = lift (updateParserState f)
293
294instance MonadParsec e s m => MonadParsec e s (S.StateT st m) where
295  parseError e = lift (parseError e)
296  label n (S.StateT m) = S.StateT $ label n . m
297  try (S.StateT m) = S.StateT $ try . m
298  lookAhead (S.StateT m) = S.StateT $ \s ->
299    (,s) . fst <$> lookAhead (m s)
300  notFollowedBy (S.StateT m) = S.StateT $ \s ->
301    notFollowedBy (fst <$> m s) >> return ((), s)
302  withRecovery r (S.StateT m) = S.StateT $ \s ->
303    withRecovery (\e -> S.runStateT (r e) s) (m s)
304  observing (S.StateT m) = S.StateT $ \s ->
305    fixs s <$> observing (m s)
306  eof = lift eof
307  token test mt = lift (token test mt)
308  tokens e ts = lift (tokens e ts)
309  takeWhileP l f = lift (takeWhileP l f)
310  takeWhile1P l f = lift (takeWhile1P l f)
311  takeP l n = lift (takeP l n)
312  getParserState = lift getParserState
313  updateParserState f = lift (updateParserState f)
314
315instance MonadParsec e s m => MonadParsec e s (L.ReaderT r m) where
316  parseError e = lift (parseError e)
317  label n (L.ReaderT m) = L.ReaderT $ label n . m
318  try (L.ReaderT m) = L.ReaderT $ try . m
319  lookAhead (L.ReaderT m) = L.ReaderT $ lookAhead . m
320  notFollowedBy (L.ReaderT m) = L.ReaderT $ notFollowedBy . m
321  withRecovery r (L.ReaderT m) = L.ReaderT $ \s ->
322    withRecovery (\e -> L.runReaderT (r e) s) (m s)
323  observing (L.ReaderT m) = L.ReaderT $ observing . m
324  eof = lift eof
325  token test mt = lift (token test mt)
326  tokens e ts = lift (tokens e ts)
327  takeWhileP l f = lift (takeWhileP l f)
328  takeWhile1P l f = lift (takeWhile1P l f)
329  takeP l n = lift (takeP l n)
330  getParserState = lift getParserState
331  updateParserState f = lift (updateParserState f)
332
333instance (Monoid w, MonadParsec e s m) => MonadParsec e s (L.WriterT w m) where
334  parseError e = lift (parseError e)
335  label n (L.WriterT m) = L.WriterT $ label n m
336  try (L.WriterT m) = L.WriterT $ try m
337  lookAhead (L.WriterT m) =
338    L.WriterT $
339      (,mempty) . fst <$> lookAhead m
340  notFollowedBy (L.WriterT m) =
341    L.WriterT $
342      (,mempty) <$> notFollowedBy (fst <$> m)
343  withRecovery r (L.WriterT m) =
344    L.WriterT $
345      withRecovery (L.runWriterT . r) m
346  observing (L.WriterT m) =
347    L.WriterT $
348      fixs mempty <$> observing m
349  eof = lift eof
350  token test mt = lift (token test mt)
351  tokens e ts = lift (tokens e ts)
352  takeWhileP l f = lift (takeWhileP l f)
353  takeWhile1P l f = lift (takeWhile1P l f)
354  takeP l n = lift (takeP l n)
355  getParserState = lift getParserState
356  updateParserState f = lift (updateParserState f)
357
358instance (Monoid w, MonadParsec e s m) => MonadParsec e s (S.WriterT w m) where
359  parseError e = lift (parseError e)
360  label n (S.WriterT m) = S.WriterT $ label n m
361  try (S.WriterT m) = S.WriterT $ try m
362  lookAhead (S.WriterT m) =
363    S.WriterT $
364      (,mempty) . fst <$> lookAhead m
365  notFollowedBy (S.WriterT m) =
366    S.WriterT $
367      (,mempty) <$> notFollowedBy (fst <$> m)
368  withRecovery r (S.WriterT m) =
369    S.WriterT $
370      withRecovery (S.runWriterT . r) m
371  observing (S.WriterT m) =
372    S.WriterT $
373      fixs mempty <$> observing m
374  eof = lift eof
375  token test mt = lift (token test mt)
376  tokens e ts = lift (tokens e ts)
377  takeWhileP l f = lift (takeWhileP l f)
378  takeWhile1P l f = lift (takeWhile1P l f)
379  takeP l n = lift (takeP l n)
380  getParserState = lift getParserState
381  updateParserState f = lift (updateParserState f)
382
383-- | @since 5.2.0
384instance (Monoid w, MonadParsec e s m) => MonadParsec e s (L.RWST r w st m) where
385  parseError e = lift (parseError e)
386  label n (L.RWST m) = L.RWST $ \r s -> label n (m r s)
387  try (L.RWST m) = L.RWST $ \r s -> try (m r s)
388  lookAhead (L.RWST m) = L.RWST $ \r s -> do
389    (x, _, _) <- lookAhead (m r s)
390    return (x, s, mempty)
391  notFollowedBy (L.RWST m) = L.RWST $ \r s -> do
392    notFollowedBy (void $ m r s)
393    return ((), s, mempty)
394  withRecovery n (L.RWST m) = L.RWST $ \r s ->
395    withRecovery (\e -> L.runRWST (n e) r s) (m r s)
396  observing (L.RWST m) = L.RWST $ \r s ->
397    fixs' s <$> observing (m r s)
398  eof = lift eof
399  token test mt = lift (token test mt)
400  tokens e ts = lift (tokens e ts)
401  takeWhileP l f = lift (takeWhileP l f)
402  takeWhile1P l f = lift (takeWhile1P l f)
403  takeP l n = lift (takeP l n)
404  getParserState = lift getParserState
405  updateParserState f = lift (updateParserState f)
406
407-- | @since 5.2.0
408instance (Monoid w, MonadParsec e s m) => MonadParsec e s (S.RWST r w st m) where
409  parseError e = lift (parseError e)
410  label n (S.RWST m) = S.RWST $ \r s -> label n (m r s)
411  try (S.RWST m) = S.RWST $ \r s -> try (m r s)
412  lookAhead (S.RWST m) = S.RWST $ \r s -> do
413    (x, _, _) <- lookAhead (m r s)
414    return (x, s, mempty)
415  notFollowedBy (S.RWST m) = S.RWST $ \r s -> do
416    notFollowedBy (void $ m r s)
417    return ((), s, mempty)
418  withRecovery n (S.RWST m) = S.RWST $ \r s ->
419    withRecovery (\e -> S.runRWST (n e) r s) (m r s)
420  observing (S.RWST m) = S.RWST $ \r s ->
421    fixs' s <$> observing (m r s)
422  eof = lift eof
423  token test mt = lift (token test mt)
424  tokens e ts = lift (tokens e ts)
425  takeWhileP l f = lift (takeWhileP l f)
426  takeWhile1P l f = lift (takeWhile1P l f)
427  takeP l n = lift (takeP l n)
428  getParserState = lift getParserState
429  updateParserState f = lift (updateParserState f)
430
431instance MonadParsec e s m => MonadParsec e s (IdentityT m) where
432  parseError e = lift (parseError e)
433  label n (IdentityT m) = IdentityT $ label n m
434  try = IdentityT . try . runIdentityT
435  lookAhead (IdentityT m) = IdentityT $ lookAhead m
436  notFollowedBy (IdentityT m) = IdentityT $ notFollowedBy m
437  withRecovery r (IdentityT m) =
438    IdentityT $
439      withRecovery (runIdentityT . r) m
440  observing (IdentityT m) = IdentityT $ observing m
441  eof = lift eof
442  token test mt = lift (token test mt)
443  tokens e ts = lift $ tokens e ts
444  takeWhileP l f = lift (takeWhileP l f)
445  takeWhile1P l f = lift (takeWhile1P l f)
446  takeP l n = lift (takeP l n)
447  getParserState = lift getParserState
448  updateParserState f = lift $ updateParserState f
449
450fixs :: s -> Either a (b, s) -> (Either a b, s)
451fixs s (Left a) = (Left a, s)
452fixs _ (Right (b, s)) = (Right b, s)
453{-# INLINE fixs #-}
454
455fixs' :: Monoid w => s -> Either a (b, s, w) -> (Either a b, s, w)
456fixs' s (Left a) = (Left a, s, mempty)
457fixs' _ (Right (b, s, w)) = (Right b, s, w)
458{-# INLINE fixs' #-}
459