1{-# LANGUAGE FlexibleContexts #-}
2{-# LANGUAGE FlexibleInstances #-}
3{-# LANGUAGE MultiParamTypeClasses #-}
4{-# LANGUAGE RankNTypes #-}
5{-# LANGUAGE Safe #-}
6{-# LANGUAGE ScopedTypeVariables #-}
7{-# LANGUAGE TypeFamilies #-}
8{-# LANGUAGE UndecidableInstances #-}
9
10-- |
11-- Module      :  Text.Megaparsec
12-- Copyright   :  © 2015–present Megaparsec contributors
13--                © 2007 Paolo Martini
14--                © 1999–2001 Daan Leijen
15-- License     :  FreeBSD
16--
17-- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
18-- Stability   :  experimental
19-- Portability :  portable
20--
21-- This module includes everything you need to get started writing a parser.
22-- If you are new to Megaparsec and don't know where to begin, take a look
23-- at the tutorial <https://markkarpov.com/tutorial/megaparsec.html>.
24--
25-- In addition to the "Text.Megaparsec" module, which exports and re-exports
26-- most everything that you may need, we advise to import
27-- "Text.Megaparsec.Char" if you plan to work with a stream of 'Char' tokens
28-- or "Text.Megaparsec.Byte" if you intend to parse binary data.
29--
30-- It is common to start working with the library by defining a type synonym
31-- like this:
32--
33-- > type Parser = Parsec Void Text
34-- >                      ^    ^
35-- >                      |    |
36-- > Custom error component    Input stream type
37--
38-- Then you can write type signatures like @Parser 'Int'@—for a parser that
39-- returns an 'Int' for example.
40--
41-- Similarly (since it's known to cause confusion), you should use
42-- 'ParseErrorBundle' type parametrized like this:
43--
44-- > ParseErrorBundle Text Void
45-- >                  ^    ^
46-- >                  |    |
47-- >  Input stream type    Custom error component (the same you used in Parser)
48--
49-- Megaparsec uses some type-level machinery to provide flexibility without
50-- compromising on type safety. Thus type signatures are sometimes necessary
51-- to avoid ambiguous types. If you're seeing an error message that reads
52-- like “Type variable @e0@ is ambiguous …”, you need to give an explicit
53-- signature to your parser to resolve the ambiguity. It's a good idea to
54-- provide type signatures for all top-level definitions.
55module Text.Megaparsec
56  ( -- * Re-exports
57    -- $reexports
58    module Text.Megaparsec.Pos,
59    module Text.Megaparsec.Error,
60    module Text.Megaparsec.Stream,
61    module Control.Monad.Combinators,
62
63    -- * Data types
64    State (..),
65    PosState (..),
66    Parsec,
67    ParsecT,
68
69    -- * Running parser
70    parse,
71    parseMaybe,
72    parseTest,
73    runParser,
74    runParser',
75    runParserT,
76    runParserT',
77
78    -- * Primitive combinators
79    MonadParsec (..),
80
81    -- * Signaling parse errors
82    -- $parse-errors
83    failure,
84    fancyFailure,
85    unexpected,
86    customFailure,
87    region,
88    registerParseError,
89    registerFailure,
90    registerFancyFailure,
91
92    -- * Derivatives of primitive combinators
93    single,
94    satisfy,
95    anySingle,
96    anySingleBut,
97    oneOf,
98    noneOf,
99    chunk,
100    (<?>),
101    match,
102    takeRest,
103    atEnd,
104
105    -- * Parser state combinators
106    getInput,
107    setInput,
108    getSourcePos,
109    getOffset,
110    setOffset,
111    setParserState,
112  )
113where
114
115import Control.Monad.Combinators
116import Control.Monad.Identity
117import Data.List.NonEmpty (NonEmpty (..))
118import qualified Data.List.NonEmpty as NE
119import Data.Maybe (fromJust)
120import Data.Set (Set)
121import qualified Data.Set as E
122import Text.Megaparsec.Class
123import Text.Megaparsec.Error
124import Text.Megaparsec.Internal
125import Text.Megaparsec.Pos
126import Text.Megaparsec.State
127import Text.Megaparsec.Stream
128
129-- $reexports
130--
131-- Note that we re-export monadic combinators from
132-- "Control.Monad.Combinators" because these are more efficient than
133-- 'Applicative'-based ones. Thus 'many' and 'some' may clash with the
134-- functions from "Control.Applicative". You need to hide the functions like
135-- this:
136--
137-- > import Control.Applicative hiding (many, some)
138--
139-- Also note that you can import "Control.Monad.Combinators.NonEmpty" if you
140-- wish that combinators like 'some' return 'NonEmpty' lists. The module
141-- lives in the @parser-combinators@ package (you need at least version
142-- /0.4.0/).
143--
144-- This module is intended to be imported qualified:
145--
146-- > import qualified Control.Monad.Combinators.NonEmpty as NE
147--
148-- Other modules of interest are:
149--
150--     * "Control.Monad.Combinators.Expr" for parsing of expressions.
151--     * "Control.Applicative.Permutations" for parsing of permutations
152--       phrases.
153
154----------------------------------------------------------------------------
155-- Data types
156
157-- | 'Parsec' is a non-transformer variant of the more general 'ParsecT'
158-- monad transformer.
159type Parsec e s = ParsecT e s Identity
160
161----------------------------------------------------------------------------
162-- Running a parser
163
164-- | @'parse' p file input@ runs parser @p@ over 'Identity' (see
165-- 'runParserT' if you're using the 'ParsecT' monad transformer; 'parse'
166-- itself is just a synonym for 'runParser'). It returns either a
167-- 'ParseErrorBundle' ('Left') or a value of type @a@ ('Right').
168-- 'errorBundlePretty' can be used to turn 'ParseErrorBundle' into the
169-- string representation of the error message. See "Text.Megaparsec.Error"
170-- if you need to do more advanced error analysis.
171--
172-- > main = case parse numbers "" "11,2,43" of
173-- >          Left bundle -> putStr (errorBundlePretty bundle)
174-- >          Right xs -> print (sum xs)
175-- >
176-- > numbers = decimal `sepBy` char ','
177parse ::
178  -- | Parser to run
179  Parsec e s a ->
180  -- | Name of source file
181  String ->
182  -- | Input for parser
183  s ->
184  Either (ParseErrorBundle s e) a
185parse = runParser
186
187-- | @'parseMaybe' p input@ runs the parser @p@ on @input@ and returns the
188-- result inside 'Just' on success and 'Nothing' on failure. This function
189-- also parses 'eof', so if the parser doesn't consume all of its input, it
190-- will fail.
191--
192-- The function is supposed to be useful for lightweight parsing, where
193-- error messages (and thus file names) are not important and entire input
194-- should be parsed. For example, it can be used when parsing of a single
195-- number according to a specification of its format is desired.
196parseMaybe :: (Ord e, Stream s) => Parsec e s a -> s -> Maybe a
197parseMaybe p s =
198  case parse (p <* eof) "" s of
199    Left _ -> Nothing
200    Right x -> Just x
201
202-- | The expression @'parseTest' p input@ applies the parser @p@ against the
203-- input @input@ and prints the result to stdout. Useful for testing.
204parseTest ::
205  ( ShowErrorComponent e,
206    Show a,
207    VisualStream s,
208    TraversableStream s
209  ) =>
210  -- | Parser to run
211  Parsec e s a ->
212  -- | Input for parser
213  s ->
214  IO ()
215parseTest p input =
216  case parse p "" input of
217    Left e -> putStr (errorBundlePretty e)
218    Right x -> print x
219
220-- | @'runParser' p file input@ runs parser @p@ on the input stream of
221-- tokens @input@, obtained from source @file@. The @file@ is only used in
222-- error messages and may be the empty string. Returns either a
223-- 'ParseErrorBundle' ('Left') or a value of type @a@ ('Right').
224--
225-- > parseFromFile p file = runParser p file <$> readFile file
226runParser ::
227  -- | Parser to run
228  Parsec e s a ->
229  -- | Name of source file
230  String ->
231  -- | Input for parser
232  s ->
233  Either (ParseErrorBundle s e) a
234runParser p name s = snd $ runParser' p (initialState name s)
235
236-- | The function is similar to 'runParser' with the difference that it
237-- accepts and returns parser state. This allows to specify arbitrary
238-- textual position at the beginning of parsing, for example. This is the
239-- most general way to run a parser over the 'Identity' monad.
240--
241-- @since 4.2.0
242runParser' ::
243  -- | Parser to run
244  Parsec e s a ->
245  -- | Initial state
246  State s e ->
247  (State s e, Either (ParseErrorBundle s e) a)
248runParser' p = runIdentity . runParserT' p
249
250-- | @'runParserT' p file input@ runs parser @p@ on the input list of tokens
251-- @input@, obtained from source @file@. The @file@ is only used in error
252-- messages and may be the empty string. Returns a computation in the
253-- underlying monad @m@ that returns either a 'ParseErrorBundle' ('Left') or
254-- a value of type @a@ ('Right').
255runParserT ::
256  Monad m =>
257  -- | Parser to run
258  ParsecT e s m a ->
259  -- | Name of source file
260  String ->
261  -- | Input for parser
262  s ->
263  m (Either (ParseErrorBundle s e) a)
264runParserT p name s = snd <$> runParserT' p (initialState name s)
265
266-- | This function is similar to 'runParserT', but like 'runParser'' it
267-- accepts and returns parser state. This is thus the most general way to
268-- run a parser.
269--
270-- @since 4.2.0
271runParserT' ::
272  Monad m =>
273  -- | Parser to run
274  ParsecT e s m a ->
275  -- | Initial state
276  State s e ->
277  m (State s e, Either (ParseErrorBundle s e) a)
278runParserT' p s = do
279  (Reply s' _ result) <- runParsecT p s
280  let toBundle es =
281        ParseErrorBundle
282          { bundleErrors =
283              NE.sortWith errorOffset es,
284            bundlePosState = statePosState s
285          }
286  return $ case result of
287    OK x ->
288      case NE.nonEmpty (stateParseErrors s') of
289        Nothing -> (s', Right x)
290        Just de -> (s', Left (toBundle de))
291    Error e ->
292      (s', Left (toBundle (e :| stateParseErrors s')))
293
294-- | Given name of source file and input construct initial state for parser.
295initialState :: String -> s -> State s e
296initialState name s =
297  State
298    { stateInput = s,
299      stateOffset = 0,
300      statePosState =
301        PosState
302          { pstateInput = s,
303            pstateOffset = 0,
304            pstateSourcePos = initialPos name,
305            pstateTabWidth = defaultTabWidth,
306            pstateLinePrefix = ""
307          },
308      stateParseErrors = []
309    }
310
311----------------------------------------------------------------------------
312-- Signaling parse errors
313
314-- $parse-errors
315--
316-- The most general function to fail and end parsing is 'parseError'. These
317-- are built on top of it. The section also includes functions starting with
318-- the @register@ prefix which allow users to register “delayed”
319-- 'ParseError's.
320
321-- | Stop parsing and report a trivial 'ParseError'.
322--
323-- @since 6.0.0
324failure ::
325  MonadParsec e s m =>
326  -- | Unexpected item (if any)
327  Maybe (ErrorItem (Token s)) ->
328  -- | Expected items
329  Set (ErrorItem (Token s)) ->
330  m a
331failure us ps = do
332  o <- getOffset
333  parseError (TrivialError o us ps)
334{-# INLINE failure #-}
335
336-- | Stop parsing and report a fancy 'ParseError'. To report a single custom
337-- parse error, see 'Text.Megaparsec.customFailure'.
338--
339-- @since 6.0.0
340fancyFailure ::
341  MonadParsec e s m =>
342  -- | Fancy error components
343  Set (ErrorFancy e) ->
344  m a
345fancyFailure xs = do
346  o <- getOffset
347  parseError (FancyError o xs)
348{-# INLINE fancyFailure #-}
349
350-- | The parser @'unexpected' item@ fails with an error message telling
351-- about unexpected item @item@ without consuming any input.
352--
353-- > unexpected item = failure (Just item) Set.empty
354unexpected :: MonadParsec e s m => ErrorItem (Token s) -> m a
355unexpected item = failure (Just item) E.empty
356{-# INLINE unexpected #-}
357
358-- | Report a custom parse error. For a more general version, see
359-- 'fancyFailure'.
360--
361-- > customFailure = fancyFailure . Set.singleton . ErrorCustom
362--
363-- @since 6.3.0
364customFailure :: MonadParsec e s m => e -> m a
365customFailure = fancyFailure . E.singleton . ErrorCustom
366{-# INLINE customFailure #-}
367
368-- | Specify how to process 'ParseError's that happen inside of this
369-- wrapper. This applies to both normal and delayed 'ParseError's.
370--
371-- As a side-effect of the implementation the inner computation will start
372-- with empty collection of delayed errors and they will be updated and
373-- “restored” on the way out of 'region'.
374--
375-- @since 5.3.0
376region ::
377  MonadParsec e s m =>
378  -- | How to process 'ParseError's
379  (ParseError s e -> ParseError s e) ->
380  -- | The “region” that the processing applies to
381  m a ->
382  m a
383region f m = do
384  deSoFar <- stateParseErrors <$> getParserState
385  updateParserState $ \s ->
386    s {stateParseErrors = []}
387  r <- observing m
388  updateParserState $ \s ->
389    s {stateParseErrors = (f <$> stateParseErrors s) ++ deSoFar}
390  case r of
391    Left err -> parseError (f err)
392    Right x -> return x
393{-# INLINEABLE region #-}
394
395-- | Register a 'ParseError' for later reporting. This action does not end
396-- parsing and has no effect except for adding the given 'ParseError' to the
397-- collection of “delayed” 'ParseError's which will be taken into
398-- consideration at the end of parsing. Only if this collection is empty
399-- parser will succeed. This is the main way to report several parse errors
400-- at once.
401--
402-- @since 8.0.0
403registerParseError :: MonadParsec e s m => ParseError s e -> m ()
404registerParseError e = updateParserState $ \s ->
405  s {stateParseErrors = e : stateParseErrors s}
406{-# INLINE registerParseError #-}
407
408-- | Like 'failure', but for delayed 'ParseError's.
409--
410-- @since 8.0.0
411registerFailure ::
412  MonadParsec e s m =>
413  -- | Unexpected item (if any)
414  Maybe (ErrorItem (Token s)) ->
415  -- | Expected items
416  Set (ErrorItem (Token s)) ->
417  m ()
418registerFailure us ps = do
419  o <- getOffset
420  registerParseError (TrivialError o us ps)
421{-# INLINE registerFailure #-}
422
423-- | Like 'fancyFailure', but for delayed 'ParseError's.
424--
425-- @since 8.0.0
426registerFancyFailure ::
427  MonadParsec e s m =>
428  -- | Fancy error components
429  Set (ErrorFancy e) ->
430  m ()
431registerFancyFailure xs = do
432  o <- getOffset
433  registerParseError (FancyError o xs)
434{-# INLINE registerFancyFailure #-}
435
436----------------------------------------------------------------------------
437-- Derivatives of primitive combinators
438
439-- | @'single' t@ only matches the single token @t@.
440--
441-- > semicolon = single ';'
442--
443-- See also: 'token', 'anySingle', 'Text.Megaparsec.Byte.char',
444-- 'Text.Megaparsec.Char.char'.
445--
446-- @since 7.0.0
447single ::
448  MonadParsec e s m =>
449  -- | Token to match
450  Token s ->
451  m (Token s)
452single t = token testToken expected
453  where
454    testToken x = if x == t then Just x else Nothing
455    expected = E.singleton (Tokens (t :| []))
456{-# INLINE single #-}
457
458-- | The parser @'satisfy' f@ succeeds for any token for which the supplied
459-- function @f@ returns 'True'.
460--
461-- > digitChar = satisfy isDigit <?> "digit"
462-- > oneOf cs  = satisfy (`elem` cs)
463--
464-- See also: 'anySingle', 'anySingleBut', 'oneOf', 'noneOf'.
465--
466-- @since 7.0.0
467satisfy ::
468  MonadParsec e s m =>
469  -- | Predicate to apply
470  (Token s -> Bool) ->
471  m (Token s)
472satisfy f = token testChar E.empty
473  where
474    testChar x = if f x then Just x else Nothing
475{-# INLINE satisfy #-}
476
477-- | Parse and return a single token. It's a good idea to attach a 'label'
478-- to this parser.
479--
480-- > anySingle = satisfy (const True)
481--
482-- See also: 'satisfy', 'anySingleBut'.
483--
484-- @since 7.0.0
485anySingle :: MonadParsec e s m => m (Token s)
486anySingle = satisfy (const True)
487{-# INLINE anySingle #-}
488
489-- | Match any token but the given one. It's a good idea to attach a 'label'
490-- to this parser.
491--
492-- > anySingleBut t = satisfy (/= t)
493--
494-- See also: 'single', 'anySingle', 'satisfy'.
495--
496-- @since 7.0.0
497anySingleBut ::
498  MonadParsec e s m =>
499  -- | Token we should not match
500  Token s ->
501  m (Token s)
502anySingleBut t = satisfy (/= t)
503{-# INLINE anySingleBut #-}
504
505-- | @'oneOf' ts@ succeeds if the current token is in the supplied
506-- collection of tokens @ts@. Returns the parsed token. Note that this
507-- parser cannot automatically generate the “expected” component of error
508-- message, so usually you should label it manually with 'label' or ('<?>').
509--
510-- > oneOf cs = satisfy (`elem` cs)
511--
512-- See also: 'satisfy'.
513--
514-- > digit = oneOf ['0'..'9'] <?> "digit"
515--
516-- __Performance note__: prefer 'satisfy' when you can because it's faster
517-- when you have only a couple of tokens to compare to:
518--
519-- > quoteFast = satisfy (\x -> x == '\'' || x == '\"')
520-- > quoteSlow = oneOf "'\""
521--
522-- @since 7.0.0
523oneOf ::
524  (Foldable f, MonadParsec e s m) =>
525  -- | Collection of matching tokens
526  f (Token s) ->
527  m (Token s)
528oneOf cs = satisfy (`elem` cs)
529{-# INLINE oneOf #-}
530
531-- | As the dual of 'oneOf', @'noneOf' ts@ succeeds if the current token
532-- /not/ in the supplied list of tokens @ts@. Returns the parsed character.
533-- Note that this parser cannot automatically generate the “expected”
534-- component of error message, so usually you should label it manually with
535-- 'label' or ('<?>').
536--
537-- > noneOf cs = satisfy (`notElem` cs)
538--
539-- See also: 'satisfy'.
540--
541-- __Performance note__: prefer 'satisfy' and 'anySingleBut' when you can
542-- because it's faster.
543--
544-- @since 7.0.0
545noneOf ::
546  (Foldable f, MonadParsec e s m) =>
547  -- | Collection of taken we should not match
548  f (Token s) ->
549  m (Token s)
550noneOf cs = satisfy (`notElem` cs)
551{-# INLINE noneOf #-}
552
553-- | @'chunk' chk@ only matches the chunk @chk@.
554--
555-- > divOrMod = chunk "div" <|> chunk "mod"
556--
557-- See also: 'tokens', 'Text.Megaparsec.Char.string',
558-- 'Text.Megaparsec.Byte.string'.
559--
560-- @since 7.0.0
561chunk ::
562  MonadParsec e s m =>
563  -- | Chunk to match
564  Tokens s ->
565  m (Tokens s)
566chunk = tokens (==)
567{-# INLINE chunk #-}
568
569-- | A synonym for 'label' in the form of an operator.
570infix 0 <?>
571
572(<?>) :: MonadParsec e s m => m a -> String -> m a
573(<?>) = flip label
574{-# INLINE (<?>) #-}
575
576-- | Return both the result of a parse and a chunk of input that was
577-- consumed during parsing. This relies on the change of the 'stateOffset'
578-- value to evaluate how many tokens were consumed. If you mess with it
579-- manually in the argument parser, prepare for troubles.
580--
581-- @since 5.3.0
582match :: MonadParsec e s m => m a -> m (Tokens s, a)
583match p = do
584  o <- getOffset
585  s <- getInput
586  r <- p
587  o' <- getOffset
588  -- NOTE The 'fromJust' call here should never fail because if the stream
589  -- is empty before 'p' (the only case when 'takeN_' can return 'Nothing'
590  -- as per its invariants), (tp' - tp) won't be greater than 0, and in that
591  -- case 'Just' is guaranteed to be returned as per another invariant of
592  -- 'takeN_'.
593  return ((fst . fromJust) (takeN_ (o' - o) s), r)
594{-# INLINEABLE match #-}
595
596-- | Consume the rest of the input and return it as a chunk. This parser
597-- never fails, but may return the empty chunk.
598--
599-- > takeRest = takeWhileP Nothing (const True)
600--
601-- @since 6.0.0
602takeRest :: MonadParsec e s m => m (Tokens s)
603takeRest = takeWhileP Nothing (const True)
604{-# INLINE takeRest #-}
605
606-- | Return 'True' when end of input has been reached.
607--
608-- > atEnd = option False (True <$ hidden eof)
609--
610-- @since 6.0.0
611atEnd :: MonadParsec e s m => m Bool
612atEnd = option False (True <$ hidden eof)
613{-# INLINE atEnd #-}
614
615----------------------------------------------------------------------------
616-- Parser state combinators
617
618-- | Return the current input.
619getInput :: MonadParsec e s m => m s
620getInput = stateInput <$> getParserState
621{-# INLINE getInput #-}
622
623-- | @'setInput' input@ continues parsing with @input@.
624setInput :: MonadParsec e s m => s -> m ()
625setInput s = updateParserState (\(State _ o pst de) -> State s o pst de)
626{-# INLINE setInput #-}
627
628-- | Return the current source position. This function /is not cheap/, do
629-- not call it e.g. on matching of every token, that's a bad idea. Still you
630-- can use it to get 'SourcePos' to attach to things that you parse.
631--
632-- The function works under the assumption that we move in the input stream
633-- only forwards and never backwards, which is always true unless the user
634-- abuses the library.
635--
636-- @since 7.0.0
637getSourcePos :: (TraversableStream s, MonadParsec e s m) => m SourcePos
638getSourcePos = do
639  st <- getParserState
640  let pst = reachOffsetNoLine (stateOffset st) (statePosState st)
641  setParserState st {statePosState = pst}
642  return (pstateSourcePos pst)
643{-# INLINE getSourcePos #-}
644
645-- | Get the number of tokens processed so far.
646--
647-- See also: 'setOffset'.
648--
649-- @since 7.0.0
650getOffset :: MonadParsec e s m => m Int
651getOffset = stateOffset <$> getParserState
652{-# INLINE getOffset #-}
653
654-- | Set the number of tokens processed so far.
655--
656-- See also: 'getOffset'.
657--
658-- @since 7.0.0
659setOffset :: MonadParsec e s m => Int -> m ()
660setOffset o = updateParserState $ \(State s _ pst de) ->
661  State s o pst de
662{-# INLINE setOffset #-}
663
664-- | @'setParserState' st@ sets the parser state to @st@.
665--
666-- See also: 'getParserState', 'updateParserState'.
667setParserState :: MonadParsec e s m => State s e -> m ()
668setParserState st = updateParserState (const st)
669{-# INLINE setParserState #-}
670