1{-# LANGUAGE CPP #-} 2{-# LANGUAGE FlexibleContexts #-} 3{-# LANGUAGE FlexibleInstances #-} 4{-# LANGUAGE GeneralizedNewtypeDeriving #-} 5{-# LANGUAGE MultiParamTypeClasses #-} 6{-# LANGUAGE ScopedTypeVariables #-} 7{-# LANGUAGE UndecidableInstances #-} 8#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 702 9{-# LANGUAGE Trustworthy #-} 10#endif 11{-# OPTIONS_GHC -fspec-constr -fspec-constr-count=8 #-} 12----------------------------------------------------------------------------- 13-- | 14-- Module : Text.Parser.Token 15-- Copyright : (c) Edward Kmett 2011 16-- (c) Daan Leijen 1999-2001 17-- License : BSD3 18-- 19-- Maintainer : ekmett@gmail.com 20-- Stability : experimental 21-- Portability : non-portable 22-- 23-- Parsers that comprehend whitespace and identifier styles 24-- 25-- > idStyle = haskellIdents { styleReserved = ... } 26-- > identifier = ident idStyle 27-- > reserved = reserve idStyle 28-- 29----------------------------------------------------------------------------- 30module Text.Parser.Token 31 ( 32 -- * Token Parsing 33 whiteSpace -- :: TokenParsing m => m () 34 , charLiteral -- :: TokenParsing m => m Char 35 , stringLiteral -- :: (TokenParsing m, IsString s) => m s 36 , stringLiteral' -- :: (TokenParsing m, IsString s) => m s 37 , natural -- :: TokenParsing m => m Integer 38 , integer -- :: TokenParsing m => m Integer 39 , double -- :: TokenParsing m => m Double 40 , naturalOrDouble -- :: TokenParsing m => m (Either Integer Double) 41 , integerOrDouble -- :: TokenParsing m => m (Either Integer Double) 42 , scientific -- :: TokenParsing m => m Scientific 43 , naturalOrScientific -- :: TokenParsing m => m (Either Integer Scientific) 44 , integerOrScientific -- :: TokenParsing m => m (Either Integer Scientific) 45 , symbol -- :: TokenParsing m => String -> m String 46 , textSymbol -- :: TokenParsing m => Text -> m Text 47 , symbolic -- :: TokenParsing m => Char -> m Char 48 , parens -- :: TokenParsing m => m a -> m a 49 , braces -- :: TokenParsing m => m a -> m a 50 , angles -- :: TokenParsing m => m a -> m a 51 , brackets -- :: TokenParsing m => m a -> m a 52 , comma -- :: TokenParsing m => m Char 53 , colon -- :: TokenParsing m => m Char 54 , dot -- :: TokenParsing m => m Char 55 , semiSep -- :: TokenParsing m => m a -> m [a] 56 , semiSep1 -- :: TokenParsing m => m a -> m [a] 57 , commaSep -- :: TokenParsing m => m a -> m [a] 58 , commaSep1 -- :: TokenParsing m => m a -> m [a] 59 -- ** Token Parsing Class 60 , TokenParsing(..) 61 -- ** Token Parsing Transformers 62 , Unspaced(..) 63 , Unlined(..) 64 , Unhighlighted(..) 65 -- ** /Non-Token/ Parsers 66 , decimal -- :: TokenParsing m => m Integer 67 , hexadecimal -- :: TokenParsing m => m Integer 68 , octal -- :: TokenParsing m => m Integer 69 , characterChar -- :: TokenParsing m => m Char 70 , integer' -- :: TokenParsing m => m Integer 71 -- * Identifiers 72 , IdentifierStyle(..) 73 , liftIdentifierStyle -- :: (MonadTrans t, Monad m) => IdentifierStyle m -> IdentifierStyle (t m) 74 , ident -- :: (TokenParsing m, IsString s) => IdentifierStyle m -> m s 75 , reserve -- :: TokenParsing m => IdentifierStyle m -> String -> m () 76 , reserveText -- :: TokenParsing m => IdentifierStyle m -> Text -> m () 77 -- ** Lenses and Traversals 78 , styleName 79 , styleStart 80 , styleLetter 81 , styleChars 82 , styleReserved 83 , styleHighlight 84 , styleReservedHighlight 85 , styleHighlights 86 ) where 87 88import Control.Applicative 89import Control.Monad (MonadPlus(..), when) 90import Control.Monad.Trans.Class 91import Control.Monad.Trans.State.Lazy as Lazy 92import Control.Monad.Trans.State.Strict as Strict 93import Control.Monad.Trans.Writer.Lazy as Lazy 94import Control.Monad.Trans.Writer.Strict as Strict 95import Control.Monad.Trans.RWS.Lazy as Lazy 96import Control.Monad.Trans.RWS.Strict as Strict 97import Control.Monad.Trans.Reader 98import Control.Monad.Trans.Identity 99import Control.Monad.State.Class as Class 100import Control.Monad.Reader.Class as Class 101import Control.Monad.Writer.Class as Class 102import Data.Char 103import Data.Functor.Identity 104import qualified Data.HashSet as HashSet 105import Data.HashSet (HashSet) 106import Data.List (foldl', transpose) 107#if __GLASGOW_HASKELL__ < 710 108import Data.Monoid 109#endif 110import Data.Scientific ( Scientific ) 111import qualified Data.Scientific as Sci 112import Data.String 113import Data.Text hiding (empty,zip,foldl',take,map,length,splitAt,null,transpose) 114import Numeric (showIntAtBase) 115import qualified Text.ParserCombinators.ReadP as ReadP 116import Text.Parser.Char 117import Text.Parser.Combinators 118import Text.Parser.Token.Highlight 119 120#ifdef MIN_VERSION_parsec 121import qualified Text.Parsec as Parsec 122#endif 123 124#ifdef MIN_VERSION_attoparsec 125import qualified Data.Attoparsec.Types as Att 126#endif 127 128-- | Skip zero or more bytes worth of white space. More complex parsers are 129-- free to consider comments as white space. 130whiteSpace :: TokenParsing m => m () 131whiteSpace = someSpace <|> pure () 132{-# INLINE whiteSpace #-} 133 134-- | This token parser parses a single literal character. Returns the 135-- literal character value. This parsers deals correctly with escape 136-- sequences. The literal character is parsed according to the grammar 137-- rules defined in the Haskell report (which matches most programming 138-- languages quite closely). 139charLiteral :: forall m. TokenParsing m => m Char 140charLiteral = token (highlight CharLiteral lit) where 141 lit :: m Char 142 lit = between (char '\'') (char '\'' <?> "end of character") characterChar 143 <?> "character" 144{-# INLINE charLiteral #-} 145 146-- | This token parser parses a literal string. Returns the literal 147-- string value. This parsers deals correctly with escape sequences and 148-- gaps. The literal string is parsed according to the grammar rules 149-- defined in the Haskell report (which matches most programming 150-- languages quite closely). 151stringLiteral :: forall m s. (TokenParsing m, IsString s) => m s 152stringLiteral = fromString <$> token (highlight StringLiteral lit) where 153 lit :: m [Char] 154 lit = Prelude.foldr (maybe id (:)) "" 155 <$> between (char '"') (char '"' <?> "end of string") (many stringChar) 156 <?> "string" 157 158 stringChar :: m (Maybe Char) 159 stringChar = Just <$> stringLetter 160 <|> stringEscape 161 <?> "string character" 162 163 stringLetter :: m Char 164 stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026')) 165 166 stringEscape :: m (Maybe Char) 167 stringEscape = highlight EscapeCode $ char '\\' *> esc where 168 esc :: m (Maybe Char) 169 esc = Nothing <$ escapeGap 170 <|> Nothing <$ escapeEmpty 171 <|> Just <$> escapeCode 172 173 escapeEmpty, escapeGap :: m Char 174 escapeEmpty = char '&' 175 escapeGap = skipSome space *> (char '\\' <?> "end of string gap") 176{-# INLINE stringLiteral #-} 177 178-- | This token parser behaves as 'stringLiteral', but for single-quoted 179-- strings. 180stringLiteral' :: forall m s. (TokenParsing m, IsString s) => m s 181stringLiteral' = fromString <$> token (highlight StringLiteral lit) where 182 lit :: m [Char] 183 lit = Prelude.foldr (maybe id (:)) "" 184 <$> between (char '\'') (char '\'' <?> "end of string") (many stringChar) 185 <?> "string" 186 187 stringChar :: m (Maybe Char) 188 stringChar = Just <$> stringLetter 189 <|> stringEscape 190 <?> "string character" 191 192 stringLetter :: m Char 193 stringLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026')) 194 195 stringEscape :: m (Maybe Char) 196 stringEscape = highlight EscapeCode $ char '\\' *> esc where 197 esc :: m (Maybe Char) 198 esc = Nothing <$ escapeGap 199 <|> Nothing <$ escapeEmpty 200 <|> Just <$> escapeCode 201 202 escapeEmpty, escapeGap :: m Char 203 escapeEmpty = char '&' 204 escapeGap = skipSome space *> (char '\\' <?> "end of string gap") 205{-# INLINE stringLiteral' #-} 206 207-- | This token parser parses a natural number (a non-negative whole 208-- number). Returns the value of the number. The number can be 209-- specified in 'decimal', 'hexadecimal' or 210-- 'octal'. The number is parsed according to the grammar 211-- rules in the Haskell report. 212natural :: TokenParsing m => m Integer 213natural = token natural' 214{-# INLINE natural #-} 215 216-- | This token parser parses an integer (a whole number). This parser 217-- is like 'natural' except that it can be prefixed with 218-- sign (i.e. \'-\' or \'+\'). Returns the value of the number. The 219-- number can be specified in 'decimal', 'hexadecimal' 220-- or 'octal'. The number is parsed according 221-- to the grammar rules in the Haskell report. 222integer :: forall m. TokenParsing m => m Integer 223integer = token (token (highlight Operator sgn <*> natural')) <?> "integer" 224 where 225 sgn :: m (Integer -> Integer) 226 sgn = negate <$ char '-' 227 <|> id <$ char '+' 228 <|> pure id 229{-# INLINE integer #-} 230 231-- | This token parser parses a floating point value. Returns the value 232-- of the number. The number is parsed according to the grammar rules 233-- defined in the Haskell report. 234double :: TokenParsing m => m Double 235double = token (highlight Number (Sci.toRealFloat <$> floating) <?> "double") 236{-# INLINE double #-} 237 238-- | This token parser parses either 'natural' or a 'float'. 239-- Returns the value of the number. This parsers deals with 240-- any overlap in the grammar rules for naturals and floats. The number 241-- is parsed according to the grammar rules defined in the Haskell report. 242naturalOrDouble :: TokenParsing m => m (Either Integer Double) 243naturalOrDouble = fmap Sci.toRealFloat <$> naturalOrScientific 244{-# INLINE naturalOrDouble #-} 245 246-- | This token parser is like 'naturalOrDouble', but handles 247-- leading @-@ or @+@. 248integerOrDouble :: TokenParsing m => m (Either Integer Double) 249integerOrDouble = fmap Sci.toRealFloat <$> integerOrScientific 250{-# INLINE integerOrDouble #-} 251 252-- | This token parser parses a floating point value. Returns the value 253-- of the number. The number is parsed according to the grammar rules 254-- defined in the Haskell report. 255scientific :: TokenParsing m => m Scientific 256scientific = token (highlight Number floating <?> "scientific") 257{-# INLINE scientific #-} 258 259-- | This token parser parses either 'natural' or a 'scientific'. 260-- Returns the value of the number. This parsers deals with 261-- any overlap in the grammar rules for naturals and floats. The number 262-- is parsed according to the grammar rules defined in the Haskell report. 263naturalOrScientific :: TokenParsing m => m (Either Integer Scientific) 264naturalOrScientific = token (highlight Number natFloating <?> "number") 265{-# INLINE naturalOrScientific #-} 266 267-- | This token parser is like 'naturalOrScientific', but handles 268-- leading @-@ or @+@. 269integerOrScientific :: forall m. TokenParsing m => m (Either Integer Scientific) 270integerOrScientific = token (highlight Number ios <?> "number") 271 where ios :: m (Either Integer Scientific) 272 ios = mneg <$> optional (oneOf "+-") <*> natFloating 273 274 mneg (Just '-') nd = either (Left . negate) (Right . negate) nd 275 mneg _ nd = nd 276{-# INLINE integerOrScientific #-} 277 278 279-- | Token parser @symbol s@ parses 'string' @s@ and skips 280-- trailing white space. 281symbol :: TokenParsing m => String -> m String 282symbol name = token (highlight Symbol (string name)) 283{-# INLINE symbol #-} 284 285-- | Token parser @textSymbol t@ parses 'text' @s@ and skips 286-- trailing white space. 287textSymbol :: TokenParsing m => Text -> m Text 288textSymbol name = token (highlight Symbol (text name)) 289{-# INLINE textSymbol #-} 290 291-- | Token parser @symbolic s@ parses 'char' @s@ and skips 292-- trailing white space. 293symbolic :: TokenParsing m => Char -> m Char 294symbolic name = token (highlight Symbol (char name)) 295{-# INLINE symbolic #-} 296 297-- | Token parser @parens p@ parses @p@ enclosed in parenthesis, 298-- returning the value of @p@. 299parens :: TokenParsing m => m a -> m a 300parens = nesting . between (symbolic '(') (symbolic ')') 301{-# INLINE parens #-} 302 303-- | Token parser @braces p@ parses @p@ enclosed in braces (\'{\' and 304-- \'}\'), returning the value of @p@. 305braces :: TokenParsing m => m a -> m a 306braces = nesting . between (symbolic '{') (symbolic '}') 307{-# INLINE braces #-} 308 309-- | Token parser @angles p@ parses @p@ enclosed in angle brackets (\'\<\' 310-- and \'>\'), returning the value of @p@. 311angles :: TokenParsing m => m a -> m a 312angles = nesting . between (symbolic '<') (symbolic '>') 313{-# INLINE angles #-} 314 315-- | Token parser @brackets p@ parses @p@ enclosed in brackets (\'[\' 316-- and \']\'), returning the value of @p@. 317brackets :: TokenParsing m => m a -> m a 318brackets = nesting . between (symbolic '[') (symbolic ']') 319{-# INLINE brackets #-} 320 321-- | Token parser @comma@ parses the character \',\' and skips any 322-- trailing white space. Returns the string \",\". 323comma :: TokenParsing m => m Char 324comma = symbolic ',' 325{-# INLINE comma #-} 326 327-- | Token parser @colon@ parses the character \':\' and skips any 328-- trailing white space. Returns the string \":\". 329colon :: TokenParsing m => m Char 330colon = symbolic ':' 331{-# INLINE colon #-} 332 333-- | Token parser @dot@ parses the character \'.\' and skips any 334-- trailing white space. Returns the string \".\". 335dot :: TokenParsing m => m Char 336dot = symbolic '.' 337{-# INLINE dot #-} 338 339-- | Token parser @semiSep p@ parses /zero/ or more occurrences of @p@ 340-- separated by 'semi'. Returns a list of values returned by @p@. 341semiSep :: TokenParsing m => m a -> m [a] 342semiSep p = sepBy p semi 343{-# INLINE semiSep #-} 344 345-- | Token parser @semiSep1 p@ parses /one/ or more occurrences of @p@ 346-- separated by 'semi'. Returns a list of values returned by @p@. 347semiSep1 :: TokenParsing m => m a -> m [a] 348semiSep1 p = sepBy1 p semi 349{-# INLINE semiSep1 #-} 350 351-- | Token parser @commaSep p@ parses /zero/ or more occurrences of 352-- @p@ separated by 'comma'. Returns a list of values returned 353-- by @p@. 354commaSep :: TokenParsing m => m a -> m [a] 355commaSep p = sepBy p comma 356{-# INLINE commaSep #-} 357 358-- | Token parser @commaSep1 p@ parses /one/ or more occurrences of 359-- @p@ separated by 'comma'. Returns a list of values returned 360-- by @p@. 361commaSep1 :: TokenParsing m => m a -> m [a] 362commaSep1 p = sepBy1 p comma 363{-# INLINE commaSep1 #-} 364 365-- | Additional functionality that is needed to tokenize input while ignoring whitespace. 366class CharParsing m => TokenParsing m where 367 -- | Usually, someSpace consists of /one/ or more occurrences of a 'space'. 368 -- Some parsers may choose to recognize line comments or block (multi line) 369 -- comments as white space as well. 370 someSpace :: m () 371 someSpace = skipSome (satisfy isSpace) 372 {-# INLINE someSpace #-} 373 374 -- | Called when we enter a nested pair of symbols. 375 -- Overloadable to enable disabling layout 376 nesting :: m a -> m a 377 nesting = id 378 {-# INLINE nesting #-} 379 380 -- | The token parser |semi| parses the character \';\' and skips 381 -- any trailing white space. Returns the character \';\'. Overloadable to 382 -- permit automatic semicolon insertion or Haskell-style layout. 383 semi :: m Char 384 semi = token (satisfy (';'==) <?> ";") 385 {-# INLINE semi #-} 386 387 -- | Tag a region of parsed text with a bit of semantic information. 388 -- Most parsers won't use this, but it is indispensible for highlighters. 389 highlight :: Highlight -> m a -> m a 390 highlight _ a = a 391 {-# INLINE highlight #-} 392 393 -- | @token p@ first applies parser @p@ and then the 'whiteSpace' 394 -- parser, returning the value of @p@. Every lexical 395 -- token (token) is defined using @token@, this way every parse 396 -- starts at a point without white space. Parsers that use @token@ are 397 -- called /token/ parsers in this document. 398 -- 399 -- The only point where the 'whiteSpace' parser should be 400 -- called explicitly is the start of the main parser in order to skip 401 -- any leading white space. 402 -- 403 -- Alternatively, one might define 'token' as first parsing 'whiteSpace' 404 -- and then parser @p@. By parsing whiteSpace first, the parser is able 405 -- to return before parsing additional whiteSpace, improving laziness. 406 -- 407 -- > mainParser = sum <$ whiteSpace <*> many (token digit) <* eof 408 token :: m a -> m a 409 token p = p <* (someSpace <|> pure ()) 410 411instance (TokenParsing m, MonadPlus m) => TokenParsing (Lazy.StateT s m) where 412 nesting (Lazy.StateT m) = Lazy.StateT $ nesting . m 413 {-# INLINE nesting #-} 414 someSpace = lift someSpace 415 {-# INLINE someSpace #-} 416 semi = lift semi 417 {-# INLINE semi #-} 418 highlight h (Lazy.StateT m) = Lazy.StateT $ highlight h . m 419 {-# INLINE highlight #-} 420 421instance (TokenParsing m, MonadPlus m) => TokenParsing (Strict.StateT s m) where 422 nesting (Strict.StateT m) = Strict.StateT $ nesting . m 423 {-# INLINE nesting #-} 424 someSpace = lift someSpace 425 {-# INLINE someSpace #-} 426 semi = lift semi 427 {-# INLINE semi #-} 428 highlight h (Strict.StateT m) = Strict.StateT $ highlight h . m 429 {-# INLINE highlight #-} 430 431instance (TokenParsing m, MonadPlus m) => TokenParsing (ReaderT e m) where 432 nesting (ReaderT m) = ReaderT $ nesting . m 433 {-# INLINE nesting #-} 434 someSpace = lift someSpace 435 {-# INLINE someSpace #-} 436 semi = lift semi 437 {-# INLINE semi #-} 438 highlight h (ReaderT m) = ReaderT $ highlight h . m 439 {-# INLINE highlight #-} 440 441instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Strict.WriterT w m) where 442 nesting (Strict.WriterT m) = Strict.WriterT $ nesting m 443 {-# INLINE nesting #-} 444 someSpace = lift someSpace 445 {-# INLINE someSpace #-} 446 semi = lift semi 447 {-# INLINE semi #-} 448 highlight h (Strict.WriterT m) = Strict.WriterT $ highlight h m 449 {-# INLINE highlight #-} 450 451instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Lazy.WriterT w m) where 452 nesting (Lazy.WriterT m) = Lazy.WriterT $ nesting m 453 {-# INLINE nesting #-} 454 someSpace = lift someSpace 455 {-# INLINE someSpace #-} 456 semi = lift semi 457 {-# INLINE semi #-} 458 highlight h (Lazy.WriterT m) = Lazy.WriterT $ highlight h m 459 {-# INLINE highlight #-} 460 461instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Lazy.RWST r w s m) where 462 nesting (Lazy.RWST m) = Lazy.RWST $ \r s -> nesting (m r s) 463 {-# INLINE nesting #-} 464 someSpace = lift someSpace 465 {-# INLINE someSpace #-} 466 semi = lift semi 467 {-# INLINE semi #-} 468 highlight h (Lazy.RWST m) = Lazy.RWST $ \r s -> highlight h (m r s) 469 {-# INLINE highlight #-} 470 471instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Strict.RWST r w s m) where 472 nesting (Strict.RWST m) = Strict.RWST $ \r s -> nesting (m r s) 473 {-# INLINE nesting #-} 474 someSpace = lift someSpace 475 {-# INLINE someSpace #-} 476 semi = lift semi 477 {-# INLINE semi #-} 478 highlight h (Strict.RWST m) = Strict.RWST $ \r s -> highlight h (m r s) 479 {-# INLINE highlight #-} 480 481instance (TokenParsing m, MonadPlus m) => TokenParsing (IdentityT m) where 482 nesting = IdentityT . nesting . runIdentityT 483 {-# INLINE nesting #-} 484 someSpace = lift someSpace 485 {-# INLINE someSpace #-} 486 semi = lift semi 487 {-# INLINE semi #-} 488 highlight h = IdentityT . highlight h . runIdentityT 489 {-# INLINE highlight #-} 490 491-- | Used to describe an input style for constructors, values, operators, etc. 492data IdentifierStyle m = IdentifierStyle 493 { _styleName :: String 494 , _styleStart :: m Char 495 , _styleLetter :: m Char 496 , _styleReserved :: HashSet String 497 , _styleHighlight :: Highlight 498 , _styleReservedHighlight :: Highlight 499 } 500 501-- | This lens can be used to update the name for this style of identifier. 502-- 503-- @'styleName' :: Lens' ('IdentifierStyle' m) 'String'@ 504styleName :: Functor f => (String -> f String) -> IdentifierStyle m -> f (IdentifierStyle m) 505styleName f is = (\n -> is { _styleName = n }) <$> f (_styleName is) 506{-# INLINE styleName #-} 507 508-- | This lens can be used to update the action used to recognize the first letter in an identifier. 509-- 510-- @'styleStart' :: Lens' ('IdentifierStyle' m) (m 'Char')@ 511styleStart :: Functor f => (m Char -> f (m Char)) -> IdentifierStyle m -> f (IdentifierStyle m) 512styleStart f is = (\n -> is { _styleStart = n }) <$> f (_styleStart is) 513{-# INLINE styleStart #-} 514 515-- | This lens can be used to update the action used to recognize subsequent letters in an identifier. 516-- 517-- @'styleLetter' :: Lens' ('IdentifierStyle' m) (m 'Char')@ 518styleLetter :: Functor f => (m Char -> f (m Char)) -> IdentifierStyle m -> f (IdentifierStyle m) 519styleLetter f is = (\n -> is { _styleLetter = n }) <$> f (_styleLetter is) 520{-# INLINE styleLetter #-} 521 522-- | This is a traversal of both actions in contained in an 'IdentifierStyle'. 523-- 524-- @'styleChars' :: Traversal ('IdentifierStyle' m) ('IdentifierStyle' n) (m 'Char') (n 'Char')@ 525styleChars :: Applicative f => (m Char -> f (n Char)) -> IdentifierStyle m -> f (IdentifierStyle n) 526styleChars f is = (\n m -> is { _styleStart = n, _styleLetter = m }) <$> f (_styleStart is) <*> f (_styleLetter is) 527{-# INLINE styleChars #-} 528 529-- | This is a lens that can be used to modify the reserved identifier set. 530-- 531-- @'styleReserved' :: Lens' ('IdentifierStyle' m) ('HashSet' 'String')@ 532styleReserved :: Functor f => (HashSet String -> f (HashSet String)) -> IdentifierStyle m -> f (IdentifierStyle m) 533styleReserved f is = (\n -> is { _styleReserved = n }) <$> f (_styleReserved is) 534{-# INLINE styleReserved #-} 535 536-- | This is a lens that can be used to modify the highlight used for this identifier set. 537-- 538-- @'styleHighlight' :: Lens' ('IdentifierStyle' m) 'Highlight'@ 539styleHighlight :: Functor f => (Highlight -> f Highlight) -> IdentifierStyle m -> f (IdentifierStyle m) 540styleHighlight f is = (\n -> is { _styleHighlight = n }) <$> f (_styleHighlight is) 541{-# INLINE styleHighlight #-} 542 543-- | This is a lens that can be used to modify the highlight used for reserved identifiers in this identifier set. 544-- 545-- @'styleReservedHighlight' :: Lens' ('IdentifierStyle' m) 'Highlight'@ 546styleReservedHighlight :: Functor f => (Highlight -> f Highlight) -> IdentifierStyle m -> f (IdentifierStyle m) 547styleReservedHighlight f is = (\n -> is { _styleReservedHighlight = n }) <$> f (_styleReservedHighlight is) 548{-# INLINE styleReservedHighlight #-} 549 550-- | This is a traversal that can be used to modify the highlights used for both non-reserved and reserved identifiers in this identifier set. 551-- 552-- @'styleHighlights' :: Traversal' ('IdentifierStyle' m) 'Highlight'@ 553styleHighlights :: Applicative f => (Highlight -> f Highlight) -> IdentifierStyle m -> f (IdentifierStyle m) 554styleHighlights f is = (\n m -> is { _styleHighlight = n, _styleReservedHighlight = m }) <$> f (_styleHighlight is) <*> f (_styleReservedHighlight is) 555{-# INLINE styleHighlights #-} 556 557-- | Lift an identifier style into a monad transformer 558-- 559-- Using @over@ from the @lens@ package: 560-- 561-- @'liftIdentifierStyle' = over 'styleChars' 'lift'@ 562liftIdentifierStyle :: (MonadTrans t, Monad m) => IdentifierStyle m -> IdentifierStyle (t m) 563liftIdentifierStyle = runIdentity . styleChars (Identity . lift) 564{-# INLINE liftIdentifierStyle #-} 565 566-- | parse a reserved operator or identifier using a given style 567reserve :: (TokenParsing m, Monad m) => IdentifierStyle m -> String -> m () 568reserve s name = token $ try $ do 569 _ <- highlight (_styleReservedHighlight s) $ string name 570 notFollowedBy (_styleLetter s) <?> "end of " ++ show name 571{-# INLINE reserve #-} 572 573-- | parse a reserved operator or identifier using a given style given 'Text'. 574reserveText :: (TokenParsing m, Monad m) => IdentifierStyle m -> Text -> m () 575reserveText s name = token $ try $ do 576 _ <- highlight (_styleReservedHighlight s) $ text name 577 notFollowedBy (_styleLetter s) <?> "end of " ++ show name 578{-# INLINE reserveText #-} 579 580-- | Parse a non-reserved identifier or symbol 581ident :: (TokenParsing m, Monad m, IsString s) => IdentifierStyle m -> m s 582ident s = fmap fromString $ token $ try $ do 583 name <- highlight (_styleHighlight s) 584 ((:) <$> _styleStart s <*> many (_styleLetter s) <?> _styleName s) 585 when (HashSet.member name (_styleReserved s)) $ unexpected $ "reserved " ++ _styleName s ++ " " ++ show name 586 return name 587{-# INLINE ident #-} 588 589-- * Utilities 590 591-- | This parser parses a character literal without the surrounding quotation marks. 592-- 593-- This parser does NOT swallow trailing whitespace 594 595characterChar :: TokenParsing m => m Char 596 597charEscape, charLetter :: TokenParsing m => m Char 598characterChar = charLetter <|> charEscape <?> "literal character" 599{-# INLINE characterChar #-} 600 601charEscape = highlight EscapeCode $ char '\\' *> escapeCode 602{-# INLINE charEscape #-} 603 604charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026')) 605{-# INLINE charLetter #-} 606 607-- | This parser parses a literal string. Returns the literal 608-- string value. This parsers deals correctly with escape sequences and 609-- gaps. The literal string is parsed according to the grammar rules 610-- defined in the Haskell report (which matches most programming 611-- languages quite closely). 612-- 613-- This parser does NOT swallow trailing whitespace 614 615escapeCode :: forall m. TokenParsing m => m Char 616escapeCode = (charEsc <|> charNum <|> charAscii <|> charControl) <?> "escape code" 617 where 618 charControl, charNum :: m Char 619 charControl = (\c -> toEnum (fromEnum c - fromEnum '@')) <$> (char '^' *> (upper <|> char '@')) 620 charNum = toEnum <$> num 621 where 622 num :: m Int 623 num = bounded 10 maxchar 624 <|> (char 'o' *> bounded 8 maxchar) 625 <|> (char 'x' *> bounded 16 maxchar) 626 maxchar = fromEnum (maxBound :: Char) 627 628 bounded :: Int -> Int -> m Int 629 bounded base bnd = foldl' (\x d -> base * x + digitToInt d) 0 630 <$> bounded' (take base thedigits) (map digitToInt $ showIntAtBase base intToDigit bnd "") 631 where 632 thedigits :: [m Char] 633 thedigits = map char ['0'..'9'] ++ map oneOf (transpose [['A'..'F'],['a'..'f']]) 634 635 toomuch :: m a 636 toomuch = unexpected "out-of-range numeric escape sequence" 637 638 bounded', bounded'' :: [m Char] -> [Int] -> m [Char] 639 bounded' dps@(zero:_) bds = skipSome zero *> ([] <$ notFollowedBy (choice dps) <|> bounded'' dps bds) 640 <|> bounded'' dps bds 641 bounded' [] _ = error "bounded called with base 0" 642 bounded'' dps [] = [] <$ notFollowedBy (choice dps) <|> toomuch 643 bounded'' dps (bd : bds) = let anyd :: m Char 644 anyd = choice dps 645 646 nomore :: m () 647 nomore = notFollowedBy anyd <|> toomuch 648 (low, ex : high) = splitAt bd dps 649 in ((:) <$> choice low <*> atMost (length bds) anyd) <* nomore 650 <|> ((:) <$> ex <*> ([] <$ nomore <|> bounded'' dps bds)) 651 <|> if not (null bds) 652 then (:) <$> choice high <*> atMost (length bds - 1) anyd <* nomore 653 else empty 654 atMost n p | n <= 0 = pure [] 655 | otherwise = ((:) <$> p <*> atMost (n - 1) p) <|> pure [] 656 657 charEsc :: m Char 658 charEsc = choice $ parseEsc <$> escMap 659 660 parseEsc (c,code) = code <$ char c 661 escMap = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'" 662 663 charAscii :: m Char 664 charAscii = choice $ parseAscii <$> asciiMap 665 666 parseAscii (asc,code) = try $ code <$ string asc 667 asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2) 668 ascii2codes, ascii3codes :: [String] 669 ascii2codes = [ "BS","HT","LF","VT","FF","CR","SO" 670 , "SI","EM","FS","GS","RS","US","SP"] 671 ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK" 672 ,"BEL","DLE","DC1","DC2","DC3","DC4","NAK" 673 ,"SYN","ETB","CAN","SUB","ESC","DEL"] 674 ascii2, ascii3 :: String 675 ascii2 = "\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP" 676 ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL" 677 678-- | This parser parses a natural number (a non-negative whole 679-- number). Returns the value of the number. The number can be 680-- specified in 'decimal', 'hexadecimal' or 681-- 'octal'. The number is parsed according to the grammar 682-- rules in the Haskell report. 683-- 684-- This parser does NOT swallow trailing whitespace. 685natural' :: TokenParsing m => m Integer 686natural' = highlight Number nat <?> "natural" 687 688number :: TokenParsing m => Integer -> m Char -> m Integer 689number base baseDigit = 690 foldl' (\x d -> base*x + toInteger (digitToInt d)) 0 <$> some baseDigit 691 692-- | This parser parses an integer (a whole number). This parser 693-- is like 'natural' except that it can be prefixed with 694-- sign (i.e. \'-\' or \'+\'). Returns the value of the number. The 695-- number can be specified in 'decimal', 'hexadecimal' 696-- or 'octal'. The number is parsed according 697-- to the grammar rules in the Haskell report. 698-- 699-- This parser does NOT swallow trailing whitespace. 700-- 701-- Also, unlike the 'integer' parser, this parser does not admit spaces 702-- between the sign and the number. 703 704integer' :: TokenParsing m => m Integer 705integer' = int <?> "integer" 706{-# INLINE integer' #-} 707 708sign :: TokenParsing m => m (Integer -> Integer) 709sign = highlight Operator 710 $ negate <$ char '-' 711 <|> id <$ char '+' 712 <|> pure id 713 714int :: TokenParsing m => m Integer 715int = {-token-} sign <*> highlight Number nat 716nat, zeroNumber :: TokenParsing m => m Integer 717nat = zeroNumber <|> decimal 718zeroNumber = char '0' *> (hexadecimal <|> octal <|> decimal <|> pure 0) <?> "" 719 720floating :: TokenParsing m => m Scientific 721floating = decimal <**> fractExponent 722{-# INLINE floating #-} 723 724fractExponent :: forall m. TokenParsing m => m (Integer -> Scientific) 725fractExponent = (\fract expo n -> (fromInteger n + fract) * expo) <$> fraction <*> option 1 exponent' 726 <|> (\expo n -> fromInteger n * expo) <$> exponent' 727 where 728 fraction :: m Scientific 729 fraction = foldl' op 0 <$> (char '.' *> (some digit <?> "fraction")) 730 731 op f d = f + Sci.scientific (fromIntegral (digitToInt d)) (Sci.base10Exponent f - 1) 732 733 exponent' :: m Scientific 734 exponent' = ((\f e -> power (f e)) <$ oneOf "eE" <*> sign <*> (decimal <?> "exponent")) <?> "exponent" 735 736 power = Sci.scientific 1 . fromInteger 737 738 739natFloating, zeroNumFloat, decimalFloat :: TokenParsing m => m (Either Integer Scientific) 740natFloating 741 = char '0' *> zeroNumFloat 742 <|> decimalFloat 743zeroNumFloat 744 = Left <$> (hexadecimal <|> octal) 745 <|> decimalFloat 746 <|> pure 0 <**> try fractFloat 747 <|> pure (Left 0) 748decimalFloat = decimal <**> option Left (try fractFloat) 749 750fractFloat :: TokenParsing m => m (Integer -> Either Integer Scientific) 751fractFloat = (Right .) <$> fractExponent 752{-# INLINE fractFloat #-} 753 754-- | Parses a non-negative whole number in the decimal system. Returns the 755-- value of the number. 756-- 757-- This parser does NOT swallow trailing whitespace 758decimal :: TokenParsing m => m Integer 759decimal = number 10 digit 760{-# INLINE decimal #-} 761 762-- | Parses a non-negative whole number in the hexadecimal system. The number 763-- should be prefixed with \"x\" or \"X\". Returns the value of the 764-- number. 765-- 766-- This parser does NOT swallow trailing whitespace 767hexadecimal :: TokenParsing m => m Integer 768hexadecimal = oneOf "xX" *> number 16 hexDigit 769{-# INLINE hexadecimal #-} 770 771-- | Parses a non-negative whole number in the octal system. The number 772-- should be prefixed with \"o\" or \"O\". Returns the value of the 773-- number. 774-- 775-- This parser does NOT swallow trailing whitespace 776octal :: TokenParsing m => m Integer 777octal = oneOf "oO" *> number 8 octDigit 778{-# INLINE octal #-} 779 780-- | This is a parser transformer you can use to disable syntax highlighting 781-- over a range of text you are parsing. 782newtype Unhighlighted m a = Unhighlighted { runUnhighlighted :: m a } 783 deriving (Functor,Applicative,Alternative,Monad,MonadPlus,CharParsing) 784 785instance Parsing m => Parsing (Unhighlighted m) where 786 try (Unhighlighted m) = Unhighlighted $ try m 787 {-# INLINE try #-} 788 Unhighlighted m <?> l = Unhighlighted $ m <?> l 789 {-# INLINE (<?>) #-} 790 unexpected = Unhighlighted . unexpected 791 {-# INLINE unexpected #-} 792 eof = Unhighlighted eof 793 {-# INLINE eof #-} 794 notFollowedBy (Unhighlighted m) = Unhighlighted $ notFollowedBy m 795 {-# INLINE notFollowedBy #-} 796 797 798instance MonadTrans Unhighlighted where 799 lift = Unhighlighted 800 {-# INLINE lift #-} 801 802instance MonadState s m => MonadState s (Unhighlighted m) where 803 get = lift Class.get 804 {-# INLINE get #-} 805 put = lift . Class.put 806 {-# INLINE put #-} 807 808instance MonadReader e m => MonadReader e (Unhighlighted m) where 809 ask = lift Class.ask 810 {-# INLINE ask #-} 811 local f = Unhighlighted . Class.local f . runUnhighlighted 812 {-# INLINE local #-} 813 814instance MonadWriter e m => MonadWriter e (Unhighlighted m) where 815 tell = lift . Class.tell 816 {-# INLINE tell #-} 817 listen = Unhighlighted . Class.listen . runUnhighlighted 818 {-# INLINE listen #-} 819 pass = Unhighlighted . Class.pass . runUnhighlighted 820 {-# INLINE pass #-} 821 822instance TokenParsing m => TokenParsing (Unhighlighted m) where 823 nesting (Unhighlighted m) = Unhighlighted (nesting m) 824 {-# INLINE nesting #-} 825 someSpace = Unhighlighted someSpace 826 {-# INLINE someSpace #-} 827 semi = Unhighlighted semi 828 {-# INLINE semi #-} 829 highlight _ m = m 830 {-# INLINE highlight #-} 831 832-- | This is a parser transformer you can use to disable the automatic trailing 833-- space consumption of a Token parser. 834newtype Unspaced m a = Unspaced { runUnspaced :: m a } 835 deriving (Functor,Applicative,Alternative,Monad,MonadPlus,CharParsing) 836 837instance Parsing m => Parsing (Unspaced m) where 838 try (Unspaced m) = Unspaced $ try m 839 {-# INLINE try #-} 840 Unspaced m <?> l = Unspaced $ m <?> l 841 {-# INLINE (<?>) #-} 842 unexpected = Unspaced . unexpected 843 {-# INLINE unexpected #-} 844 eof = Unspaced eof 845 {-# INLINE eof #-} 846 notFollowedBy (Unspaced m) = Unspaced $ notFollowedBy m 847 {-# INLINE notFollowedBy #-} 848 849instance MonadTrans Unspaced where 850 lift = Unspaced 851 {-# INLINE lift #-} 852 853instance MonadState s m => MonadState s (Unspaced m) where 854 get = lift Class.get 855 {-# INLINE get #-} 856 put = lift . Class.put 857 {-# INLINE put #-} 858 859instance MonadReader e m => MonadReader e (Unspaced m) where 860 ask = lift Class.ask 861 {-# INLINE ask #-} 862 local f = Unspaced . Class.local f . runUnspaced 863 {-# INLINE local #-} 864 865instance MonadWriter e m => MonadWriter e (Unspaced m) where 866 tell = lift . Class.tell 867 {-# INLINE tell #-} 868 listen = Unspaced . Class.listen . runUnspaced 869 {-# INLINE listen #-} 870 pass = Unspaced . Class.pass . runUnspaced 871 {-# INLINE pass #-} 872 873instance TokenParsing m => TokenParsing (Unspaced m) where 874 nesting (Unspaced m) = Unspaced (nesting m) 875 {-# INLINE nesting #-} 876 someSpace = empty 877 {-# INLINE someSpace #-} 878 semi = Unspaced semi 879 {-# INLINE semi #-} 880 highlight h (Unspaced m) = Unspaced (highlight h m) 881 {-# INLINE highlight #-} 882 883-- | This is a parser transformer you can use to disable the automatic trailing 884-- newline (but not whitespace-in-general) consumption of a Token parser. 885newtype Unlined m a = Unlined { runUnlined :: m a } 886 deriving (Functor,Applicative,Alternative,Monad,MonadPlus,CharParsing) 887 888instance Parsing m => Parsing (Unlined m) where 889 try (Unlined m) = Unlined $ try m 890 {-# INLINE try #-} 891 Unlined m <?> l = Unlined $ m <?> l 892 {-# INLINE (<?>) #-} 893 unexpected = Unlined . unexpected 894 {-# INLINE unexpected #-} 895 eof = Unlined eof 896 {-# INLINE eof #-} 897 notFollowedBy (Unlined m) = Unlined $ notFollowedBy m 898 {-# INLINE notFollowedBy #-} 899 900instance MonadTrans Unlined where 901 lift = Unlined 902 {-# INLINE lift #-} 903 904instance MonadState s m => MonadState s (Unlined m) where 905 get = lift Class.get 906 {-# INLINE get #-} 907 put = lift . Class.put 908 {-# INLINE put #-} 909 910instance MonadReader e m => MonadReader e (Unlined m) where 911 ask = lift Class.ask 912 {-# INLINE ask #-} 913 local f = Unlined . Class.local f . runUnlined 914 {-# INLINE local #-} 915 916instance MonadWriter e m => MonadWriter e (Unlined m) where 917 tell = lift . Class.tell 918 {-# INLINE tell #-} 919 listen = Unlined . Class.listen . runUnlined 920 {-# INLINE listen #-} 921 pass = Unlined . Class.pass . runUnlined 922 {-# INLINE pass #-} 923 924instance TokenParsing m => TokenParsing (Unlined m) where 925 nesting (Unlined m) = Unlined (nesting m) 926 {-# INLINE nesting #-} 927 someSpace = skipMany (satisfy $ \c -> c /= '\n' && isSpace c) 928 {-# INLINE someSpace #-} 929 semi = Unlined semi 930 {-# INLINE semi #-} 931 highlight h (Unlined m) = Unlined (highlight h m) 932 {-# INLINE highlight #-} 933 934#ifdef MIN_VERSION_parsec 935instance Parsec.Stream s m Char => TokenParsing (Parsec.ParsecT s u m) 936#endif 937 938#ifdef MIN_VERSION_attoparsec 939instance Att.Chunk t => TokenParsing (Att.Parser t) 940#endif 941 942instance TokenParsing ReadP.ReadP 943