1-- |
2-- Module      :  Text.Megaparsec
3-- Copyright   :  © 2015–present Megaparsec contributors
4--                © 2007 Paolo Martini
5--                © 1999–2001 Daan Leijen
6-- License     :  FreeBSD
7--
8-- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
9-- Stability   :  experimental
10-- Portability :  portable
11--
12-- This module includes everything you need to get started writing a parser.
13-- If you are new to Megaparsec and don't know where to begin, take a look
14-- at the tutorials
15-- <https://markkarpov.com/learn-haskell.html#megaparsec-tutorials>.
16--
17-- In addition to the "Text.Megaparsec" module, which exports and re-exports
18-- most everything that you may need, we advise to import
19-- "Text.Megaparsec.Char" if you plan to work with a stream of 'Char' tokens
20-- or "Text.Megaparsec.Byte" if you intend to parse binary data.
21--
22-- It is common to start working with the library by defining a type synonym
23-- like this:
24--
25-- > type Parser = Parsec Void Text
26-- >                      ^    ^
27-- >                      |    |
28-- > Custom error component    Input stream type
29--
30-- Then you can write type signatures like @Parser 'Int'@—for a parser that
31-- returns an 'Int' for example.
32--
33-- Similarly (since it's known to cause confusion), you should use
34-- 'ParseErrorBundle' type parametrized like this:
35--
36-- > ParseErrorBundle Text Void
37-- >                  ^    ^
38-- >                  |    |
39-- >  Input stream type    Custom error component (the same you used in Parser)
40--
41-- Megaparsec uses some type-level machinery to provide flexibility without
42-- compromising on type safety. Thus type signatures are sometimes necessary
43-- to avoid ambiguous types. If you're seeing an error message that reads
44-- like “Type variable @e0@ is ambiguous …”, you need to give an explicit
45-- signature to your parser to resolve the ambiguity. It's a good idea to
46-- provide type signatures for all top-level definitions.
47
48{-# LANGUAGE FlexibleContexts      #-}
49{-# LANGUAGE FlexibleInstances     #-}
50{-# LANGUAGE MultiParamTypeClasses #-}
51{-# LANGUAGE RankNTypes            #-}
52{-# LANGUAGE ScopedTypeVariables   #-}
53{-# LANGUAGE TypeFamilies          #-}
54{-# LANGUAGE UndecidableInstances  #-}
55
56module Text.Megaparsec
57  ( -- * Re-exports
58    -- $reexports
59    module Text.Megaparsec.Pos
60  , module Text.Megaparsec.Error
61  , module Text.Megaparsec.Stream
62  , module Control.Monad.Combinators
63    -- * Data types
64  , State (..)
65  , PosState (..)
66  , Parsec
67  , ParsecT
68    -- * Running parser
69  , parse
70  , parseMaybe
71  , parseTest
72  , runParser
73  , runParser'
74  , runParserT
75  , runParserT'
76    -- * Primitive combinators
77  , MonadParsec (..)
78    -- * Signaling parse errors
79    -- $parse-errors
80  , failure
81  , fancyFailure
82  , unexpected
83  , customFailure
84  , region
85  , registerParseError
86  , registerFailure
87  , registerFancyFailure
88    -- * Derivatives of primitive combinators
89  , single
90  , satisfy
91  , anySingle
92  , anySingleBut
93  , oneOf
94  , noneOf
95  , chunk
96  , (<?>)
97  , match
98  , takeRest
99  , atEnd
100    -- * Parser state combinators
101  , getInput
102  , setInput
103  , getSourcePos
104  , getOffset
105  , setOffset
106  , setParserState )
107where
108
109import Control.Monad.Combinators
110import Control.Monad.Identity
111import Data.List.NonEmpty (NonEmpty (..))
112import Data.Maybe (fromJust)
113import Data.Set (Set)
114import Text.Megaparsec.Class
115import Text.Megaparsec.Error
116import Text.Megaparsec.Internal
117import Text.Megaparsec.Pos
118import Text.Megaparsec.State
119import Text.Megaparsec.Stream
120import qualified Data.List.NonEmpty as NE
121import qualified Data.Set as E
122
123-- $reexports
124--
125-- Note that we re-export monadic combinators from
126-- "Control.Monad.Combinators" because these are more efficient than
127-- 'Applicative'-based ones. Thus 'many' and 'some' may clash with the
128-- functions from "Control.Applicative". You need to hide the functions like
129-- this:
130--
131-- > import Control.Applicative hiding (many, some)
132--
133-- Also note that you can import "Control.Monad.Combinators.NonEmpty" if you
134-- wish that combinators like 'some' return 'NonEmpty' lists. The module
135-- lives in the @parser-combinators@ package (you need at least version
136-- /0.4.0/).
137--
138-- This module is intended to be imported qualified:
139--
140-- > import qualified Control.Monad.Combinators.NonEmpty as NE
141--
142-- Other modules of interest are:
143--
144--     * "Control.Monad.Combinators.Expr" for parsing of expressions.
145--     * "Control.Applicative.Permutations" for parsing of permutations
146--       phrases.
147
148----------------------------------------------------------------------------
149-- Data types
150
151-- | 'Parsec' is a non-transformer variant of the more general 'ParsecT'
152-- monad transformer.
153
154type Parsec e s = ParsecT e s Identity
155
156----------------------------------------------------------------------------
157-- Running a parser
158
159-- | @'parse' p file input@ runs parser @p@ over 'Identity' (see
160-- 'runParserT' if you're using the 'ParsecT' monad transformer; 'parse'
161-- itself is just a synonym for 'runParser'). It returns either a
162-- 'ParseErrorBundle' ('Left') or a value of type @a@ ('Right').
163-- 'errorBundlePretty' can be used to turn 'ParseErrorBundle' into the
164-- string representation of the error message. See "Text.Megaparsec.Error"
165-- if you need to do more advanced error analysis.
166--
167-- > main = case parse numbers "" "11,2,43" of
168-- >          Left bundle -> putStr (errorBundlePretty bundle)
169-- >          Right xs -> print (sum xs)
170-- >
171-- > numbers = decimal `sepBy` char ','
172
173parse
174  :: Parsec e s a -- ^ Parser to run
175  -> String       -- ^ Name of source file
176  -> s            -- ^ Input for parser
177  -> Either (ParseErrorBundle s e) a
178parse = runParser
179
180-- | @'parseMaybe' p input@ runs the parser @p@ on @input@ and returns the
181-- result inside 'Just' on success and 'Nothing' on failure. This function
182-- also parses 'eof', so if the parser doesn't consume all of its input, it
183-- will fail.
184--
185-- The function is supposed to be useful for lightweight parsing, where
186-- error messages (and thus file names) are not important and entire input
187-- should be parsed. For example, it can be used when parsing of a single
188-- number according to a specification of its format is desired.
189
190parseMaybe :: (Ord e, Stream s) => Parsec e s a -> s -> Maybe a
191parseMaybe p s =
192  case parse (p <* eof) "" s of
193    Left  _ -> Nothing
194    Right x -> Just x
195
196-- | The expression @'parseTest' p input@ applies the parser @p@ against the
197-- input @input@ and prints the result to stdout. Useful for testing.
198
199parseTest :: ( ShowErrorComponent e
200             , Show a
201             , Stream s
202             )
203  => Parsec e s a -- ^ Parser to run
204  -> s            -- ^ Input for parser
205  -> IO ()
206parseTest p input =
207  case parse p "" input of
208    Left  e -> putStr (errorBundlePretty e)
209    Right x -> print x
210
211-- | @'runParser' p file input@ runs parser @p@ on the input stream of
212-- tokens @input@, obtained from source @file@. The @file@ is only used in
213-- error messages and may be the empty string. Returns either a
214-- 'ParseErrorBundle' ('Left') or a value of type @a@ ('Right').
215--
216-- > parseFromFile p file = runParser p file <$> readFile file
217
218runParser
219  :: Parsec e s a -- ^ Parser to run
220  -> String     -- ^ Name of source file
221  -> s          -- ^ Input for parser
222  -> Either (ParseErrorBundle s e) a
223runParser p name s = snd $ runParser' p (initialState name s)
224
225-- | The function is similar to 'runParser' with the difference that it
226-- accepts and returns parser state. This allows to specify arbitrary
227-- textual position at the beginning of parsing, for example. This is the
228-- most general way to run a parser over the 'Identity' monad.
229--
230-- @since 4.2.0
231
232runParser'
233  :: Parsec e s a -- ^ Parser to run
234  -> State s e    -- ^ Initial state
235  -> (State s e, Either (ParseErrorBundle s e) a)
236runParser' p = runIdentity . runParserT' p
237
238-- | @'runParserT' p file input@ runs parser @p@ on the input list of tokens
239-- @input@, obtained from source @file@. The @file@ is only used in error
240-- messages and may be the empty string. Returns a computation in the
241-- underlying monad @m@ that returns either a 'ParseErrorBundle' ('Left') or
242-- a value of type @a@ ('Right').
243
244runParserT :: Monad m
245  => ParsecT e s m a -- ^ Parser to run
246  -> String        -- ^ Name of source file
247  -> s             -- ^ Input for parser
248  -> m (Either (ParseErrorBundle s e) a)
249runParserT p name s = snd <$> runParserT' p (initialState name s)
250
251-- | This function is similar to 'runParserT', but like 'runParser'' it
252-- accepts and returns parser state. This is thus the most general way to
253-- run a parser.
254--
255-- @since 4.2.0
256
257runParserT' :: Monad m
258  => ParsecT e s m a -- ^ Parser to run
259  -> State s e     -- ^ Initial state
260  -> m (State s e, Either (ParseErrorBundle s e) a)
261runParserT' p s = do
262  (Reply s' _ result) <- runParsecT p s
263  let toBundle es = ParseErrorBundle
264        { bundleErrors =
265            NE.sortWith errorOffset es
266        , bundlePosState = statePosState s
267        }
268  return $ case result of
269    OK x ->
270      case NE.nonEmpty (stateParseErrors s') of
271        Nothing -> (s', Right x)
272        Just de -> (s', Left (toBundle de))
273    Error e ->
274      (s', Left (toBundle (e :| stateParseErrors s')))
275
276-- | Given name of source file and input construct initial state for parser.
277
278initialState :: String -> s -> State s e
279initialState name s = State
280  { stateInput  = s
281  , stateOffset = 0
282  , statePosState = PosState
283    { pstateInput = s
284    , pstateOffset = 0
285    , pstateSourcePos = initialPos name
286    , pstateTabWidth = defaultTabWidth
287    , pstateLinePrefix = ""
288    }
289  , stateParseErrors = []
290  }
291
292----------------------------------------------------------------------------
293-- Signaling parse errors
294
295-- $parse-errors
296--
297-- The most general function to fail and end parsing is 'parseError'. These
298-- are built on top of it. The section also includes functions starting with
299-- the @register@ prefix which allow users to register “delayed”
300-- 'ParseError's.
301
302-- | Stop parsing and report a trivial 'ParseError'.
303--
304-- @since 6.0.0
305
306failure
307  :: MonadParsec e s m
308  => Maybe (ErrorItem (Token s)) -- ^ Unexpected item (if any)
309  -> Set (ErrorItem (Token s)) -- ^ Expected items
310  -> m a
311failure us ps = do
312  o <- getOffset
313  parseError (TrivialError o us ps)
314{-# INLINE failure #-}
315
316-- | Stop parsing and report a fancy 'ParseError'. To report a single custom
317-- parse error, see 'Text.Megaparsec.customFailure'.
318--
319-- @since 6.0.0
320
321fancyFailure
322  :: MonadParsec e s m
323  => Set (ErrorFancy e) -- ^ Fancy error components
324  -> m a
325fancyFailure xs = do
326  o <- getOffset
327  parseError (FancyError o xs)
328{-# INLINE fancyFailure #-}
329
330-- | The parser @'unexpected' item@ fails with an error message telling
331-- about unexpected item @item@ without consuming any input.
332--
333-- > unexpected item = failure (Just item) Set.empty
334
335unexpected :: MonadParsec e s m => ErrorItem (Token s) -> m a
336unexpected item = failure (Just item) E.empty
337{-# INLINE unexpected #-}
338
339-- | Report a custom parse error. For a more general version, see
340-- 'fancyFailure'.
341--
342-- > customFailure = fancyFailure . Set.singleton . ErrorCustom
343--
344-- @since 6.3.0
345
346customFailure :: MonadParsec e s m => e -> m a
347customFailure = fancyFailure . E.singleton . ErrorCustom
348{-# INLINE customFailure #-}
349
350-- | Specify how to process 'ParseError's that happen inside of this
351-- wrapper. This applies to both normal and delayed 'ParseError's.
352--
353-- As a side-effect of the implementation the inner computation will start
354-- with empty collection of delayed errors and they will be updated and
355-- “restored” on the way out of 'region'.
356--
357-- @since 5.3.0
358
359region :: MonadParsec e s m
360  => (ParseError s e -> ParseError s e)
361     -- ^ How to process 'ParseError's
362  -> m a               -- ^ The “region” that the processing applies to
363  -> m a
364region f m = do
365  deSoFar <- stateParseErrors <$> getParserState
366  updateParserState $ \s ->
367    s { stateParseErrors = [] }
368  r <- observing m
369  updateParserState $ \s ->
370    s { stateParseErrors = (f <$> stateParseErrors s) ++ deSoFar }
371  case r of
372    Left err -> parseError (f err)
373    Right x -> return x
374{-# INLINEABLE region #-}
375
376-- | Register a 'ParseError' for later reporting. This action does not end
377-- parsing and has no effect except for adding the given 'ParseError' to the
378-- collection of “delayed” 'ParseError's which will be taken into
379-- consideration at the end of parsing. Only if this collection is empty
380-- parser will succeed. This is the main way to report several parse errors
381-- at once.
382--
383-- @since 8.0.0
384
385registerParseError :: MonadParsec e s m => ParseError s e -> m ()
386registerParseError e = updateParserState $ \s ->
387  s { stateParseErrors = e : stateParseErrors s }
388{-# INLINE registerParseError #-}
389
390-- | Like 'failure', but for delayed 'ParseError's.
391--
392-- @since 8.0.0
393
394registerFailure
395  :: MonadParsec e s m
396  => Maybe (ErrorItem (Token s)) -- ^ Unexpected item (if any)
397  -> Set (ErrorItem (Token s)) -- ^ Expected items
398  -> m ()
399registerFailure us ps = do
400  o <- getOffset
401  registerParseError (TrivialError o us ps)
402{-# INLINE registerFailure #-}
403
404-- | Like 'fancyFailure', but for delayed 'ParseError's.
405--
406-- @since 8.0.0
407
408registerFancyFailure
409  :: MonadParsec e s m
410  => Set (ErrorFancy e) -- ^ Fancy error components
411  -> m ()
412registerFancyFailure xs = do
413  o <- getOffset
414  registerParseError (FancyError o xs)
415{-# INLINE registerFancyFailure #-}
416
417----------------------------------------------------------------------------
418-- Derivatives of primitive combinators
419
420-- | @'single' t@ only matches the single token @t@.
421--
422-- > semicolon = single ';'
423--
424-- See also: 'token', 'anySingle', 'Text.Megaparsec.Byte.char',
425-- 'Text.Megaparsec.Char.char'.
426--
427-- @since 7.0.0
428
429single :: MonadParsec e s m
430  => Token s           -- ^ Token to match
431  -> m (Token s)
432single t = token testToken expected
433  where
434    testToken x = if x == t then Just x else Nothing
435    expected    = E.singleton (Tokens (t:|[]))
436{-# INLINE single #-}
437
438-- | The parser @'satisfy' f@ succeeds for any token for which the supplied
439-- function @f@ returns 'True'.
440--
441-- > digitChar = satisfy isDigit <?> "digit"
442-- > oneOf cs  = satisfy (`elem` cs)
443--
444-- See also: 'anySingle', 'anySingleBut', 'oneOf', 'noneOf'.
445--
446-- @since 7.0.0
447
448satisfy :: MonadParsec e s m
449  => (Token s -> Bool) -- ^ Predicate to apply
450  -> m (Token s)
451satisfy f = token testChar E.empty
452  where
453    testChar x = if f x then Just x else Nothing
454{-# INLINE satisfy #-}
455
456-- | Parse and return a single token. It's a good idea to attach a 'label'
457-- to this parser.
458--
459-- > anySingle = satisfy (const True)
460--
461-- See also: 'satisfy', 'anySingleBut'.
462--
463-- @since 7.0.0
464
465anySingle :: MonadParsec e s m => m (Token s)
466anySingle = satisfy (const True)
467{-# INLINE anySingle #-}
468
469-- | Match any token but the given one. It's a good idea to attach a 'label'
470-- to this parser.
471--
472-- > anySingleBut t = satisfy (/= t)
473--
474-- See also: 'single', 'anySingle', 'satisfy'.
475--
476-- @since 7.0.0
477
478anySingleBut :: MonadParsec e s m
479  => Token s           -- ^ Token we should not match
480  -> m (Token s)
481anySingleBut t = satisfy (/= t)
482{-# INLINE anySingleBut #-}
483
484-- | @'oneOf' ts@ succeeds if the current token is in the supplied
485-- collection of tokens @ts@. Returns the parsed token. Note that this
486-- parser cannot automatically generate the “expected” component of error
487-- message, so usually you should label it manually with 'label' or ('<?>').
488--
489-- > oneOf cs = satisfy (`elem` cs)
490--
491-- See also: 'satisfy'.
492--
493-- > digit = oneOf ['0'..'9'] <?> "digit"
494--
495-- __Performance note__: prefer 'satisfy' when you can because it's faster
496-- when you have only a couple of tokens to compare to:
497--
498-- > quoteFast = satisfy (\x -> x == '\'' || x == '\"')
499-- > quoteSlow = oneOf "'\""
500--
501-- @since 7.0.0
502
503oneOf :: (Foldable f, MonadParsec e s m)
504  => f (Token s)       -- ^ Collection of matching tokens
505  -> m (Token s)
506oneOf cs = satisfy (`elem` cs)
507{-# INLINE oneOf #-}
508
509-- | As the dual of 'oneOf', @'noneOf' ts@ succeeds if the current token
510-- /not/ in the supplied list of tokens @ts@. Returns the parsed character.
511-- Note that this parser cannot automatically generate the “expected”
512-- component of error message, so usually you should label it manually with
513-- 'label' or ('<?>').
514--
515-- > noneOf cs = satisfy (`notElem` cs)
516--
517-- See also: 'satisfy'.
518--
519-- __Performance note__: prefer 'satisfy' and 'anySingleBut' when you can
520-- because it's faster.
521--
522-- @since 7.0.0
523
524noneOf :: (Foldable f, MonadParsec e s m)
525  => f (Token s)       -- ^ Collection of taken we should not match
526  -> m (Token s)
527noneOf cs = satisfy (`notElem` cs)
528{-# INLINE noneOf #-}
529
530-- | @'chunk' chk@ only matches the chunk @chk@.
531--
532-- > divOrMod = chunk "div" <|> chunk "mod"
533--
534-- See also: 'tokens', 'Text.Megaparsec.Char.string',
535-- 'Text.Megaparsec.Byte.string'.
536--
537-- @since 7.0.0
538
539chunk :: MonadParsec e s m
540  => Tokens s          -- ^ Chunk to match
541  -> m (Tokens s)
542chunk = tokens (==)
543{-# INLINE chunk #-}
544
545-- | A synonym for 'label' in the form of an operator.
546
547infix 0 <?>
548
549(<?>) :: MonadParsec e s m => m a -> String -> m a
550(<?>) = flip label
551{-# INLINE (<?>) #-}
552
553-- | Return both the result of a parse and a chunk of input that was
554-- consumed during parsing. This relies on the change of the 'stateOffset'
555-- value to evaluate how many tokens were consumed. If you mess with it
556-- manually in the argument parser, prepare for troubles.
557--
558-- @since 5.3.0
559
560match :: MonadParsec e s m => m a -> m (Tokens s, a)
561match p = do
562  o  <- getOffset
563  s  <- getInput
564  r  <- p
565  o' <- getOffset
566  -- NOTE The 'fromJust' call here should never fail because if the stream
567  -- is empty before 'p' (the only case when 'takeN_' can return 'Nothing'
568  -- as per its invariants), (tp' - tp) won't be greater than 0, and in that
569  -- case 'Just' is guaranteed to be returned as per another invariant of
570  -- 'takeN_'.
571  return ((fst . fromJust) (takeN_ (o' - o) s), r)
572{-# INLINEABLE match #-}
573
574-- | Consume the rest of the input and return it as a chunk. This parser
575-- never fails, but may return the empty chunk.
576--
577-- > takeRest = takeWhileP Nothing (const True)
578--
579-- @since 6.0.0
580
581takeRest :: MonadParsec e s m => m (Tokens s)
582takeRest = takeWhileP Nothing (const True)
583{-# INLINE takeRest #-}
584
585-- | Return 'True' when end of input has been reached.
586--
587-- > atEnd = option False (True <$ hidden eof)
588--
589-- @since 6.0.0
590
591atEnd :: MonadParsec e s m => m Bool
592atEnd = option False (True <$ hidden eof)
593{-# INLINE atEnd #-}
594
595----------------------------------------------------------------------------
596-- Parser state combinators
597
598-- | Return the current input.
599
600getInput :: MonadParsec e s m => m s
601getInput = stateInput <$> getParserState
602{-# INLINE getInput #-}
603
604-- | @'setInput' input@ continues parsing with @input@.
605
606setInput :: MonadParsec e s m => s -> m ()
607setInput s = updateParserState (\(State _ o pst de) -> State s o pst de)
608{-# INLINE setInput #-}
609
610-- | Return the current source position. This function /is not cheap/, do
611-- not call it e.g. on matching of every token, that's a bad idea. Still you
612-- can use it to get 'SourcePos' to attach to things that you parse.
613--
614-- The function works under the assumption that we move in the input stream
615-- only forwards and never backwards, which is always true unless the user
616-- abuses the library.
617--
618-- @since 7.0.0
619
620getSourcePos :: MonadParsec e s m => m SourcePos
621getSourcePos = do
622  st <- getParserState
623  let pst = reachOffsetNoLine (stateOffset st) (statePosState st)
624  setParserState st { statePosState = pst }
625  return (pstateSourcePos pst)
626{-# INLINE getSourcePos #-}
627
628-- | Get the number of tokens processed so far.
629--
630-- See also: 'setOffset'.
631--
632-- @since 7.0.0
633
634getOffset :: MonadParsec e s m => m Int
635getOffset = stateOffset <$> getParserState
636{-# INLINE getOffset #-}
637
638-- | Set the number of tokens processed so far.
639--
640-- See also: 'getOffset'.
641--
642-- @since 7.0.0
643
644setOffset :: MonadParsec e s m => Int -> m ()
645setOffset o = updateParserState $ \(State s _ pst de) ->
646  State s o pst de
647{-# INLINE setOffset #-}
648
649-- | @'setParserState' st@ sets the parser state to @st@.
650--
651-- See also: 'getParserState', 'updateParserState'.
652
653setParserState :: MonadParsec e s m => State s e -> m ()
654setParserState st = updateParserState (const st)
655{-# INLINE setParserState #-}
656