1{-# LANGUAGE FlexibleContexts #-} 2{-# LANGUAGE FlexibleInstances #-} 3{-# LANGUAGE MultiParamTypeClasses #-} 4{-# LANGUAGE RankNTypes #-} 5{-# LANGUAGE Safe #-} 6{-# LANGUAGE ScopedTypeVariables #-} 7{-# LANGUAGE TypeFamilies #-} 8{-# LANGUAGE UndecidableInstances #-} 9 10-- | 11-- Module : Text.Megaparsec 12-- Copyright : © 2015–present Megaparsec contributors 13-- © 2007 Paolo Martini 14-- © 1999–2001 Daan Leijen 15-- License : FreeBSD 16-- 17-- Maintainer : Mark Karpov <markkarpov92@gmail.com> 18-- Stability : experimental 19-- Portability : portable 20-- 21-- This module includes everything you need to get started writing a parser. 22-- If you are new to Megaparsec and don't know where to begin, take a look 23-- at the tutorial <https://markkarpov.com/tutorial/megaparsec.html>. 24-- 25-- In addition to the "Text.Megaparsec" module, which exports and re-exports 26-- most everything that you may need, we advise to import 27-- "Text.Megaparsec.Char" if you plan to work with a stream of 'Char' tokens 28-- or "Text.Megaparsec.Byte" if you intend to parse binary data. 29-- 30-- It is common to start working with the library by defining a type synonym 31-- like this: 32-- 33-- > type Parser = Parsec Void Text 34-- > ^ ^ 35-- > | | 36-- > Custom error component Input stream type 37-- 38-- Then you can write type signatures like @Parser 'Int'@—for a parser that 39-- returns an 'Int' for example. 40-- 41-- Similarly (since it's known to cause confusion), you should use 42-- 'ParseErrorBundle' type parametrized like this: 43-- 44-- > ParseErrorBundle Text Void 45-- > ^ ^ 46-- > | | 47-- > Input stream type Custom error component (the same you used in Parser) 48-- 49-- Megaparsec uses some type-level machinery to provide flexibility without 50-- compromising on type safety. Thus type signatures are sometimes necessary 51-- to avoid ambiguous types. If you're seeing an error message that reads 52-- like “Type variable @e0@ is ambiguous …”, you need to give an explicit 53-- signature to your parser to resolve the ambiguity. It's a good idea to 54-- provide type signatures for all top-level definitions. 55module Text.Megaparsec 56 ( -- * Re-exports 57 -- $reexports 58 module Text.Megaparsec.Pos, 59 module Text.Megaparsec.Error, 60 module Text.Megaparsec.Stream, 61 module Control.Monad.Combinators, 62 63 -- * Data types 64 State (..), 65 PosState (..), 66 Parsec, 67 ParsecT, 68 69 -- * Running parser 70 parse, 71 parseMaybe, 72 parseTest, 73 runParser, 74 runParser', 75 runParserT, 76 runParserT', 77 78 -- * Primitive combinators 79 MonadParsec (..), 80 81 -- * Signaling parse errors 82 -- $parse-errors 83 failure, 84 fancyFailure, 85 unexpected, 86 customFailure, 87 region, 88 registerParseError, 89 registerFailure, 90 registerFancyFailure, 91 92 -- * Derivatives of primitive combinators 93 single, 94 satisfy, 95 anySingle, 96 anySingleBut, 97 oneOf, 98 noneOf, 99 chunk, 100 (<?>), 101 match, 102 takeRest, 103 atEnd, 104 105 -- * Parser state combinators 106 getInput, 107 setInput, 108 getSourcePos, 109 getOffset, 110 setOffset, 111 setParserState, 112 ) 113where 114 115import Control.Monad.Combinators 116import Control.Monad.Identity 117import Data.List.NonEmpty (NonEmpty (..)) 118import qualified Data.List.NonEmpty as NE 119import Data.Maybe (fromJust) 120import Data.Set (Set) 121import qualified Data.Set as E 122import Text.Megaparsec.Class 123import Text.Megaparsec.Error 124import Text.Megaparsec.Internal 125import Text.Megaparsec.Pos 126import Text.Megaparsec.State 127import Text.Megaparsec.Stream 128 129-- $reexports 130-- 131-- Note that we re-export monadic combinators from 132-- "Control.Monad.Combinators" because these are more efficient than 133-- 'Applicative'-based ones. Thus 'many' and 'some' may clash with the 134-- functions from "Control.Applicative". You need to hide the functions like 135-- this: 136-- 137-- > import Control.Applicative hiding (many, some) 138-- 139-- Also note that you can import "Control.Monad.Combinators.NonEmpty" if you 140-- wish that combinators like 'some' return 'NonEmpty' lists. The module 141-- lives in the @parser-combinators@ package (you need at least version 142-- /0.4.0/). 143-- 144-- This module is intended to be imported qualified: 145-- 146-- > import qualified Control.Monad.Combinators.NonEmpty as NE 147-- 148-- Other modules of interest are: 149-- 150-- * "Control.Monad.Combinators.Expr" for parsing of expressions. 151-- * "Control.Applicative.Permutations" for parsing of permutations 152-- phrases. 153 154---------------------------------------------------------------------------- 155-- Data types 156 157-- | 'Parsec' is a non-transformer variant of the more general 'ParsecT' 158-- monad transformer. 159type Parsec e s = ParsecT e s Identity 160 161---------------------------------------------------------------------------- 162-- Running a parser 163 164-- | @'parse' p file input@ runs parser @p@ over 'Identity' (see 165-- 'runParserT' if you're using the 'ParsecT' monad transformer; 'parse' 166-- itself is just a synonym for 'runParser'). It returns either a 167-- 'ParseErrorBundle' ('Left') or a value of type @a@ ('Right'). 168-- 'errorBundlePretty' can be used to turn 'ParseErrorBundle' into the 169-- string representation of the error message. See "Text.Megaparsec.Error" 170-- if you need to do more advanced error analysis. 171-- 172-- > main = case parse numbers "" "11,2,43" of 173-- > Left bundle -> putStr (errorBundlePretty bundle) 174-- > Right xs -> print (sum xs) 175-- > 176-- > numbers = decimal `sepBy` char ',' 177parse :: 178 -- | Parser to run 179 Parsec e s a -> 180 -- | Name of source file 181 String -> 182 -- | Input for parser 183 s -> 184 Either (ParseErrorBundle s e) a 185parse = runParser 186 187-- | @'parseMaybe' p input@ runs the parser @p@ on @input@ and returns the 188-- result inside 'Just' on success and 'Nothing' on failure. This function 189-- also parses 'eof', so if the parser doesn't consume all of its input, it 190-- will fail. 191-- 192-- The function is supposed to be useful for lightweight parsing, where 193-- error messages (and thus file names) are not important and entire input 194-- should be parsed. For example, it can be used when parsing of a single 195-- number according to a specification of its format is desired. 196parseMaybe :: (Ord e, Stream s) => Parsec e s a -> s -> Maybe a 197parseMaybe p s = 198 case parse (p <* eof) "" s of 199 Left _ -> Nothing 200 Right x -> Just x 201 202-- | The expression @'parseTest' p input@ applies the parser @p@ against the 203-- input @input@ and prints the result to stdout. Useful for testing. 204parseTest :: 205 ( ShowErrorComponent e, 206 Show a, 207 VisualStream s, 208 TraversableStream s 209 ) => 210 -- | Parser to run 211 Parsec e s a -> 212 -- | Input for parser 213 s -> 214 IO () 215parseTest p input = 216 case parse p "" input of 217 Left e -> putStr (errorBundlePretty e) 218 Right x -> print x 219 220-- | @'runParser' p file input@ runs parser @p@ on the input stream of 221-- tokens @input@, obtained from source @file@. The @file@ is only used in 222-- error messages and may be the empty string. Returns either a 223-- 'ParseErrorBundle' ('Left') or a value of type @a@ ('Right'). 224-- 225-- > parseFromFile p file = runParser p file <$> readFile file 226runParser :: 227 -- | Parser to run 228 Parsec e s a -> 229 -- | Name of source file 230 String -> 231 -- | Input for parser 232 s -> 233 Either (ParseErrorBundle s e) a 234runParser p name s = snd $ runParser' p (initialState name s) 235 236-- | The function is similar to 'runParser' with the difference that it 237-- accepts and returns parser state. This allows to specify arbitrary 238-- textual position at the beginning of parsing, for example. This is the 239-- most general way to run a parser over the 'Identity' monad. 240-- 241-- @since 4.2.0 242runParser' :: 243 -- | Parser to run 244 Parsec e s a -> 245 -- | Initial state 246 State s e -> 247 (State s e, Either (ParseErrorBundle s e) a) 248runParser' p = runIdentity . runParserT' p 249 250-- | @'runParserT' p file input@ runs parser @p@ on the input list of tokens 251-- @input@, obtained from source @file@. The @file@ is only used in error 252-- messages and may be the empty string. Returns a computation in the 253-- underlying monad @m@ that returns either a 'ParseErrorBundle' ('Left') or 254-- a value of type @a@ ('Right'). 255runParserT :: 256 Monad m => 257 -- | Parser to run 258 ParsecT e s m a -> 259 -- | Name of source file 260 String -> 261 -- | Input for parser 262 s -> 263 m (Either (ParseErrorBundle s e) a) 264runParserT p name s = snd <$> runParserT' p (initialState name s) 265 266-- | This function is similar to 'runParserT', but like 'runParser'' it 267-- accepts and returns parser state. This is thus the most general way to 268-- run a parser. 269-- 270-- @since 4.2.0 271runParserT' :: 272 Monad m => 273 -- | Parser to run 274 ParsecT e s m a -> 275 -- | Initial state 276 State s e -> 277 m (State s e, Either (ParseErrorBundle s e) a) 278runParserT' p s = do 279 (Reply s' _ result) <- runParsecT p s 280 let toBundle es = 281 ParseErrorBundle 282 { bundleErrors = 283 NE.sortWith errorOffset es, 284 bundlePosState = statePosState s 285 } 286 return $ case result of 287 OK x -> 288 case NE.nonEmpty (stateParseErrors s') of 289 Nothing -> (s', Right x) 290 Just de -> (s', Left (toBundle de)) 291 Error e -> 292 (s', Left (toBundle (e :| stateParseErrors s'))) 293 294-- | Given name of source file and input construct initial state for parser. 295initialState :: String -> s -> State s e 296initialState name s = 297 State 298 { stateInput = s, 299 stateOffset = 0, 300 statePosState = 301 PosState 302 { pstateInput = s, 303 pstateOffset = 0, 304 pstateSourcePos = initialPos name, 305 pstateTabWidth = defaultTabWidth, 306 pstateLinePrefix = "" 307 }, 308 stateParseErrors = [] 309 } 310 311---------------------------------------------------------------------------- 312-- Signaling parse errors 313 314-- $parse-errors 315-- 316-- The most general function to fail and end parsing is 'parseError'. These 317-- are built on top of it. The section also includes functions starting with 318-- the @register@ prefix which allow users to register “delayed” 319-- 'ParseError's. 320 321-- | Stop parsing and report a trivial 'ParseError'. 322-- 323-- @since 6.0.0 324failure :: 325 MonadParsec e s m => 326 -- | Unexpected item (if any) 327 Maybe (ErrorItem (Token s)) -> 328 -- | Expected items 329 Set (ErrorItem (Token s)) -> 330 m a 331failure us ps = do 332 o <- getOffset 333 parseError (TrivialError o us ps) 334{-# INLINE failure #-} 335 336-- | Stop parsing and report a fancy 'ParseError'. To report a single custom 337-- parse error, see 'Text.Megaparsec.customFailure'. 338-- 339-- @since 6.0.0 340fancyFailure :: 341 MonadParsec e s m => 342 -- | Fancy error components 343 Set (ErrorFancy e) -> 344 m a 345fancyFailure xs = do 346 o <- getOffset 347 parseError (FancyError o xs) 348{-# INLINE fancyFailure #-} 349 350-- | The parser @'unexpected' item@ fails with an error message telling 351-- about unexpected item @item@ without consuming any input. 352-- 353-- > unexpected item = failure (Just item) Set.empty 354unexpected :: MonadParsec e s m => ErrorItem (Token s) -> m a 355unexpected item = failure (Just item) E.empty 356{-# INLINE unexpected #-} 357 358-- | Report a custom parse error. For a more general version, see 359-- 'fancyFailure'. 360-- 361-- > customFailure = fancyFailure . Set.singleton . ErrorCustom 362-- 363-- @since 6.3.0 364customFailure :: MonadParsec e s m => e -> m a 365customFailure = fancyFailure . E.singleton . ErrorCustom 366{-# INLINE customFailure #-} 367 368-- | Specify how to process 'ParseError's that happen inside of this 369-- wrapper. This applies to both normal and delayed 'ParseError's. 370-- 371-- As a side-effect of the implementation the inner computation will start 372-- with empty collection of delayed errors and they will be updated and 373-- “restored” on the way out of 'region'. 374-- 375-- @since 5.3.0 376region :: 377 MonadParsec e s m => 378 -- | How to process 'ParseError's 379 (ParseError s e -> ParseError s e) -> 380 -- | The “region” that the processing applies to 381 m a -> 382 m a 383region f m = do 384 deSoFar <- stateParseErrors <$> getParserState 385 updateParserState $ \s -> 386 s {stateParseErrors = []} 387 r <- observing m 388 updateParserState $ \s -> 389 s {stateParseErrors = (f <$> stateParseErrors s) ++ deSoFar} 390 case r of 391 Left err -> parseError (f err) 392 Right x -> return x 393{-# INLINEABLE region #-} 394 395-- | Register a 'ParseError' for later reporting. This action does not end 396-- parsing and has no effect except for adding the given 'ParseError' to the 397-- collection of “delayed” 'ParseError's which will be taken into 398-- consideration at the end of parsing. Only if this collection is empty 399-- parser will succeed. This is the main way to report several parse errors 400-- at once. 401-- 402-- @since 8.0.0 403registerParseError :: MonadParsec e s m => ParseError s e -> m () 404registerParseError e = updateParserState $ \s -> 405 s {stateParseErrors = e : stateParseErrors s} 406{-# INLINE registerParseError #-} 407 408-- | Like 'failure', but for delayed 'ParseError's. 409-- 410-- @since 8.0.0 411registerFailure :: 412 MonadParsec e s m => 413 -- | Unexpected item (if any) 414 Maybe (ErrorItem (Token s)) -> 415 -- | Expected items 416 Set (ErrorItem (Token s)) -> 417 m () 418registerFailure us ps = do 419 o <- getOffset 420 registerParseError (TrivialError o us ps) 421{-# INLINE registerFailure #-} 422 423-- | Like 'fancyFailure', but for delayed 'ParseError's. 424-- 425-- @since 8.0.0 426registerFancyFailure :: 427 MonadParsec e s m => 428 -- | Fancy error components 429 Set (ErrorFancy e) -> 430 m () 431registerFancyFailure xs = do 432 o <- getOffset 433 registerParseError (FancyError o xs) 434{-# INLINE registerFancyFailure #-} 435 436---------------------------------------------------------------------------- 437-- Derivatives of primitive combinators 438 439-- | @'single' t@ only matches the single token @t@. 440-- 441-- > semicolon = single ';' 442-- 443-- See also: 'token', 'anySingle', 'Text.Megaparsec.Byte.char', 444-- 'Text.Megaparsec.Char.char'. 445-- 446-- @since 7.0.0 447single :: 448 MonadParsec e s m => 449 -- | Token to match 450 Token s -> 451 m (Token s) 452single t = token testToken expected 453 where 454 testToken x = if x == t then Just x else Nothing 455 expected = E.singleton (Tokens (t :| [])) 456{-# INLINE single #-} 457 458-- | The parser @'satisfy' f@ succeeds for any token for which the supplied 459-- function @f@ returns 'True'. 460-- 461-- > digitChar = satisfy isDigit <?> "digit" 462-- > oneOf cs = satisfy (`elem` cs) 463-- 464-- See also: 'anySingle', 'anySingleBut', 'oneOf', 'noneOf'. 465-- 466-- @since 7.0.0 467satisfy :: 468 MonadParsec e s m => 469 -- | Predicate to apply 470 (Token s -> Bool) -> 471 m (Token s) 472satisfy f = token testChar E.empty 473 where 474 testChar x = if f x then Just x else Nothing 475{-# INLINE satisfy #-} 476 477-- | Parse and return a single token. It's a good idea to attach a 'label' 478-- to this parser. 479-- 480-- > anySingle = satisfy (const True) 481-- 482-- See also: 'satisfy', 'anySingleBut'. 483-- 484-- @since 7.0.0 485anySingle :: MonadParsec e s m => m (Token s) 486anySingle = satisfy (const True) 487{-# INLINE anySingle #-} 488 489-- | Match any token but the given one. It's a good idea to attach a 'label' 490-- to this parser. 491-- 492-- > anySingleBut t = satisfy (/= t) 493-- 494-- See also: 'single', 'anySingle', 'satisfy'. 495-- 496-- @since 7.0.0 497anySingleBut :: 498 MonadParsec e s m => 499 -- | Token we should not match 500 Token s -> 501 m (Token s) 502anySingleBut t = satisfy (/= t) 503{-# INLINE anySingleBut #-} 504 505-- | @'oneOf' ts@ succeeds if the current token is in the supplied 506-- collection of tokens @ts@. Returns the parsed token. Note that this 507-- parser cannot automatically generate the “expected” component of error 508-- message, so usually you should label it manually with 'label' or ('<?>'). 509-- 510-- > oneOf cs = satisfy (`elem` cs) 511-- 512-- See also: 'satisfy'. 513-- 514-- > digit = oneOf ['0'..'9'] <?> "digit" 515-- 516-- __Performance note__: prefer 'satisfy' when you can because it's faster 517-- when you have only a couple of tokens to compare to: 518-- 519-- > quoteFast = satisfy (\x -> x == '\'' || x == '\"') 520-- > quoteSlow = oneOf "'\"" 521-- 522-- @since 7.0.0 523oneOf :: 524 (Foldable f, MonadParsec e s m) => 525 -- | Collection of matching tokens 526 f (Token s) -> 527 m (Token s) 528oneOf cs = satisfy (`elem` cs) 529{-# INLINE oneOf #-} 530 531-- | As the dual of 'oneOf', @'noneOf' ts@ succeeds if the current token 532-- /not/ in the supplied list of tokens @ts@. Returns the parsed character. 533-- Note that this parser cannot automatically generate the “expected” 534-- component of error message, so usually you should label it manually with 535-- 'label' or ('<?>'). 536-- 537-- > noneOf cs = satisfy (`notElem` cs) 538-- 539-- See also: 'satisfy'. 540-- 541-- __Performance note__: prefer 'satisfy' and 'anySingleBut' when you can 542-- because it's faster. 543-- 544-- @since 7.0.0 545noneOf :: 546 (Foldable f, MonadParsec e s m) => 547 -- | Collection of taken we should not match 548 f (Token s) -> 549 m (Token s) 550noneOf cs = satisfy (`notElem` cs) 551{-# INLINE noneOf #-} 552 553-- | @'chunk' chk@ only matches the chunk @chk@. 554-- 555-- > divOrMod = chunk "div" <|> chunk "mod" 556-- 557-- See also: 'tokens', 'Text.Megaparsec.Char.string', 558-- 'Text.Megaparsec.Byte.string'. 559-- 560-- @since 7.0.0 561chunk :: 562 MonadParsec e s m => 563 -- | Chunk to match 564 Tokens s -> 565 m (Tokens s) 566chunk = tokens (==) 567{-# INLINE chunk #-} 568 569-- | A synonym for 'label' in the form of an operator. 570infix 0 <?> 571 572(<?>) :: MonadParsec e s m => m a -> String -> m a 573(<?>) = flip label 574{-# INLINE (<?>) #-} 575 576-- | Return both the result of a parse and a chunk of input that was 577-- consumed during parsing. This relies on the change of the 'stateOffset' 578-- value to evaluate how many tokens were consumed. If you mess with it 579-- manually in the argument parser, prepare for troubles. 580-- 581-- @since 5.3.0 582match :: MonadParsec e s m => m a -> m (Tokens s, a) 583match p = do 584 o <- getOffset 585 s <- getInput 586 r <- p 587 o' <- getOffset 588 -- NOTE The 'fromJust' call here should never fail because if the stream 589 -- is empty before 'p' (the only case when 'takeN_' can return 'Nothing' 590 -- as per its invariants), (tp' - tp) won't be greater than 0, and in that 591 -- case 'Just' is guaranteed to be returned as per another invariant of 592 -- 'takeN_'. 593 return ((fst . fromJust) (takeN_ (o' - o) s), r) 594{-# INLINEABLE match #-} 595 596-- | Consume the rest of the input and return it as a chunk. This parser 597-- never fails, but may return the empty chunk. 598-- 599-- > takeRest = takeWhileP Nothing (const True) 600-- 601-- @since 6.0.0 602takeRest :: MonadParsec e s m => m (Tokens s) 603takeRest = takeWhileP Nothing (const True) 604{-# INLINE takeRest #-} 605 606-- | Return 'True' when end of input has been reached. 607-- 608-- > atEnd = option False (True <$ hidden eof) 609-- 610-- @since 6.0.0 611atEnd :: MonadParsec e s m => m Bool 612atEnd = option False (True <$ hidden eof) 613{-# INLINE atEnd #-} 614 615---------------------------------------------------------------------------- 616-- Parser state combinators 617 618-- | Return the current input. 619getInput :: MonadParsec e s m => m s 620getInput = stateInput <$> getParserState 621{-# INLINE getInput #-} 622 623-- | @'setInput' input@ continues parsing with @input@. 624setInput :: MonadParsec e s m => s -> m () 625setInput s = updateParserState (\(State _ o pst de) -> State s o pst de) 626{-# INLINE setInput #-} 627 628-- | Return the current source position. This function /is not cheap/, do 629-- not call it e.g. on matching of every token, that's a bad idea. Still you 630-- can use it to get 'SourcePos' to attach to things that you parse. 631-- 632-- The function works under the assumption that we move in the input stream 633-- only forwards and never backwards, which is always true unless the user 634-- abuses the library. 635-- 636-- @since 7.0.0 637getSourcePos :: (TraversableStream s, MonadParsec e s m) => m SourcePos 638getSourcePos = do 639 st <- getParserState 640 let pst = reachOffsetNoLine (stateOffset st) (statePosState st) 641 setParserState st {statePosState = pst} 642 return (pstateSourcePos pst) 643{-# INLINE getSourcePos #-} 644 645-- | Get the number of tokens processed so far. 646-- 647-- See also: 'setOffset'. 648-- 649-- @since 7.0.0 650getOffset :: MonadParsec e s m => m Int 651getOffset = stateOffset <$> getParserState 652{-# INLINE getOffset #-} 653 654-- | Set the number of tokens processed so far. 655-- 656-- See also: 'getOffset'. 657-- 658-- @since 7.0.0 659setOffset :: MonadParsec e s m => Int -> m () 660setOffset o = updateParserState $ \(State s _ pst de) -> 661 State s o pst de 662{-# INLINE setOffset #-} 663 664-- | @'setParserState' st@ sets the parser state to @st@. 665-- 666-- See also: 'getParserState', 'updateParserState'. 667setParserState :: MonadParsec e s m => State s e -> m () 668setParserState st = updateParserState (const st) 669{-# INLINE setParserState #-} 670