1{-# OPTIONS_HADDOCK hide #-} 2----------------------------------------------------------------------------- 3-- | 4-- Module : Language.Haskell.Exts.Annotated.InternalLexer 5-- Copyright : (c) The GHC Team, 1997-2000 6-- (c) Niklas Broberg, 2004-2009 7-- License : BSD-style (see the file LICENSE.txt) 8-- 9-- Maintainer : Niklas Broberg, d00nibro@chalmers.se 10-- Stability : stable 11-- Portability : portable 12-- 13-- Lexer for Haskell, with some extensions. 14-- 15----------------------------------------------------------------------------- 16 17-- ToDo: Introduce different tokens for decimal, octal and hexadecimal (?) 18-- ToDo: FloatTok should have three parts (integer part, fraction, exponent) (?) 19-- ToDo: Use a lexical analyser generator (lx?) 20 21module Language.Haskell.Exts.InternalLexer (Token(..), showToken, lexer, topLexer) where 22 23import Language.Haskell.Exts.ParseMonad 24import Language.Haskell.Exts.SrcLoc hiding (loc) 25import Language.Haskell.Exts.Comments 26import Language.Haskell.Exts.Extension 27import Language.Haskell.Exts.ExtScheme 28 29import Prelude hiding (id, exponent) 30import Data.Char 31import Data.Ratio 32import Data.List (intercalate, isPrefixOf) 33import Control.Monad (when) 34 35-- import Debug.Trace (trace) 36 37data Token 38 = VarId String 39 | LabelVarId String 40 | QVarId (String,String) 41 | IDupVarId (String) -- duplicable implicit parameter 42 | ILinVarId (String) -- linear implicit parameter 43 | ConId String 44 | QConId (String,String) 45 | DVarId [String] -- to enable varid's with '-' in them 46 | VarSym String 47 | ConSym String 48 | QVarSym (String,String) 49 | QConSym (String,String) 50 | IntTok (Integer, String) 51 | FloatTok (Rational, String) 52 | Character (Char, String) 53 | StringTok (String, String) 54 | IntTokHash (Integer, String) -- 1# 55 | WordTokHash (Integer, String) -- 1## 56 | FloatTokHash (Rational, String) -- 1.0# 57 | DoubleTokHash (Rational, String) -- 1.0## 58 | CharacterHash (Char, String) -- c# 59 | StringHash (String, String) -- "Hello world!"# 60 61-- Symbols 62 63 | LeftParen 64 | RightParen 65 | LeftHashParen 66 | RightHashParen 67 | SemiColon 68 | LeftCurly 69 | RightCurly 70 | VRightCurly -- a virtual close brace 71 | LeftSquare 72 | RightSquare 73 | ParArrayLeftSquare -- [: 74 | ParArrayRightSquare -- :] 75 | Comma 76 | Underscore 77 | BackQuote 78 79-- Reserved operators 80 81 | Dot -- reserved for use with 'forall x . x' 82 | DotDot 83 | Colon 84 | QuoteColon 85 | DoubleColon 86 | Equals 87 | Backslash 88 | Bar 89 | LeftArrow 90 | RightArrow 91 | At 92 | TApp -- '@' but have to check for preceeding whitespace 93 | Tilde 94 | DoubleArrow 95 | Minus 96 | Exclamation 97 | Star 98 | LeftArrowTail -- -< 99 | RightArrowTail -- >- 100 | LeftDblArrowTail -- -<< 101 | RightDblArrowTail -- >>- 102 | OpenArrowBracket -- (| 103 | CloseArrowBracket -- |) 104 105-- Template Haskell 106 | THExpQuote -- [| or [e| 107 | THTExpQuote -- [|| or [e|| 108 | THPatQuote -- [p| 109 | THDecQuote -- [d| 110 | THTypQuote -- [t| 111 | THCloseQuote -- |] 112 | THTCloseQuote -- ||] 113 | THIdEscape (String) -- dollar x 114 | THParenEscape -- dollar ( 115 | THTIdEscape String -- dollar dollar x 116 | THTParenEscape -- double dollar ( 117 | THVarQuote -- 'x (but without the x) 118 | THTyQuote -- ''T (but without the T) 119 | THQuasiQuote (String,String) -- [$...|...] 120 121-- HaRP 122 | RPGuardOpen -- (| 123 | RPGuardClose -- |) 124 | RPCAt -- @: 125 126-- Hsx 127 | XCodeTagOpen -- <% 128 | XCodeTagClose -- %> 129 | XStdTagOpen -- < 130 | XStdTagClose -- > 131 | XCloseTagOpen -- </ 132 | XEmptyTagClose -- /> 133 | XChildTagOpen -- <%> (note that close doesn't exist, it's XCloseTagOpen followed by XCodeTagClose) 134 | XPCDATA String 135 | XRPatOpen -- <[ 136 | XRPatClose -- ]> 137 138-- Pragmas 139 140 | PragmaEnd -- #-} 141 | RULES 142 | INLINE Bool 143 | INLINE_CONLIKE 144 | SPECIALISE 145 | SPECIALISE_INLINE Bool 146 | SOURCE 147 | DEPRECATED 148 | WARNING 149 | SCC 150 | GENERATED 151 | CORE 152 | UNPACK 153 | NOUNPACK 154 | OPTIONS (Maybe String,String) 155-- | CFILES String 156-- | INCLUDE String 157 | LANGUAGE 158 | ANN 159 | MINIMAL 160 | NO_OVERLAP 161 | OVERLAP 162 | OVERLAPPING 163 | OVERLAPPABLE 164 | OVERLAPS 165 | INCOHERENT 166 | COMPLETE 167 168-- Reserved Ids 169 170 | KW_As 171 | KW_By -- transform list comprehensions 172 | KW_Case 173 | KW_Class 174 | KW_Data 175 | KW_Default 176 | KW_Deriving 177 | KW_Do 178 | KW_MDo 179 | KW_Else 180 | KW_Family -- indexed type families 181 | KW_Forall -- universal/existential types 182 | KW_Group -- transform list comprehensions 183 | KW_Hiding 184 | KW_If 185 | KW_Import 186 | KW_In 187 | KW_Infix 188 | KW_InfixL 189 | KW_InfixR 190 | KW_Instance 191 | KW_Let 192 | KW_Module 193 | KW_NewType 194 | KW_Of 195 | KW_Proc -- arrows 196 | KW_Rec -- arrows 197 | KW_Role 198 | KW_Then 199 | KW_Type 200 | KW_Using -- transform list comprehensions 201 | KW_Where 202 | KW_Qualified 203 | KW_Pattern 204 | KW_Stock 205 | KW_Anyclass 206 | KW_Via 207 208 -- FFI 209 | KW_Foreign 210 | KW_Export 211 | KW_Safe 212 | KW_Unsafe 213 | KW_Threadsafe 214 | KW_Interruptible 215 | KW_StdCall 216 | KW_CCall 217 | KW_CPlusPlus 218 | KW_DotNet 219 | KW_Jvm 220 | KW_Js 221 | KW_JavaScript 222 | KW_CApi 223 224 | EOF 225 deriving (Eq,Show) 226 227reserved_ops :: [(String,(Token, Maybe ExtScheme))] 228reserved_ops = [ 229 ( "..", (DotDot, Nothing) ), 230 ( ":", (Colon, Nothing) ), 231 ( "::", (DoubleColon, Nothing) ), 232 ( "=", (Equals, Nothing) ), 233 ( "\\", (Backslash, Nothing) ), 234 ( "|", (Bar, Nothing) ), 235 ( "<-", (LeftArrow, Nothing) ), 236 ( "->", (RightArrow, Nothing) ), 237 ( "@", (At, Nothing) ), 238 ( "@:", (RPCAt, Just (Any [RegularPatterns])) ), 239 ( "~", (Tilde, Nothing) ), 240 ( "=>", (DoubleArrow, Nothing) ), 241 ( "*", (Star, Just (Any [KindSignatures])) ), 242 -- Parallel arrays 243 ( "[:", (ParArrayLeftSquare, Just (Any [ParallelArrays])) ), 244 ( ":]", (ParArrayRightSquare, Just (Any [ParallelArrays])) ), 245 -- Arrows notation 246 ( "-<", (LeftArrowTail, Just (Any [Arrows])) ), 247 ( ">-", (RightArrowTail, Just (Any [Arrows])) ), 248 ( "-<<", (LeftDblArrowTail, Just (Any [Arrows])) ), 249 ( ">>-", (RightDblArrowTail, Just (Any [Arrows])) ), 250 -- Unicode notation 251 ( "\x2190", (LeftArrow, Just (Any [UnicodeSyntax])) ), 252 ( "\x2192", (RightArrow, Just (Any [UnicodeSyntax])) ), 253 ( "\x21d2", (DoubleArrow, Just (Any [UnicodeSyntax])) ), 254 ( "\x2237", (DoubleColon, Just (Any [UnicodeSyntax])) ), 255 ( "\x2919", (LeftArrowTail, Just (All [UnicodeSyntax, Arrows])) ), 256 ( "\x291a", (RightArrowTail, Just (All [UnicodeSyntax, Arrows])) ), 257 ( "\x291b", (LeftDblArrowTail, Just (All [UnicodeSyntax, Arrows])) ), 258 ( "\x291c", (RightDblArrowTail, Just (All [UnicodeSyntax, Arrows])) ), 259 ( "\x2605", (Star, Just (All [UnicodeSyntax, KindSignatures])) ), 260 ( "\x2200", (KW_Forall, Just (All [UnicodeSyntax, ExplicitForAll])) ) 261 ] 262 263special_varops :: [(String,(Token, Maybe ExtScheme))] 264special_varops = [ 265 -- the dot is only a special symbol together with forall, but can still be used as function composition 266 ( ".", (Dot, Just (Any [ExplicitForAll, ExistentialQuantification])) ), 267 ( "-", (Minus, Nothing) ), 268 ( "!", (Exclamation, Nothing) ) 269 ] 270 271reserved_ids :: [(String,(Token, Maybe ExtScheme))] 272reserved_ids = [ 273 ( "_", (Underscore, Nothing) ), 274 ( "by", (KW_By, Just (Any [TransformListComp])) ), 275 ( "case", (KW_Case, Nothing) ), 276 ( "class", (KW_Class, Nothing) ), 277 ( "data", (KW_Data, Nothing) ), 278 ( "default", (KW_Default, Nothing) ), 279 ( "deriving", (KW_Deriving, Nothing) ), 280 ( "do", (KW_Do, Nothing) ), 281 ( "else", (KW_Else, Nothing) ), 282 ( "family", (KW_Family, Just (Any [TypeFamilies])) ), -- indexed type families 283 ( "forall", (KW_Forall, Just (Any [ExplicitForAll, ExistentialQuantification])) ), -- universal/existential quantification 284 ( "group", (KW_Group, Just (Any [TransformListComp])) ), 285 ( "if", (KW_If, Nothing) ), 286 ( "import", (KW_Import, Nothing) ), 287 ( "in", (KW_In, Nothing) ), 288 ( "infix", (KW_Infix, Nothing) ), 289 ( "infixl", (KW_InfixL, Nothing) ), 290 ( "infixr", (KW_InfixR, Nothing) ), 291 ( "instance", (KW_Instance, Nothing) ), 292 ( "let", (KW_Let, Nothing) ), 293 ( "mdo", (KW_MDo, Just (Any [RecursiveDo])) ), 294 ( "module", (KW_Module, Nothing) ), 295 ( "newtype", (KW_NewType, Nothing) ), 296 ( "of", (KW_Of, Nothing) ), 297 ( "proc", (KW_Proc, Just (Any [Arrows])) ), 298 ( "rec", (KW_Rec, Just (Any [Arrows, RecursiveDo, DoRec])) ), 299 ( "then", (KW_Then, Nothing) ), 300 ( "type", (KW_Type, Nothing) ), 301 ( "using", (KW_Using, Just (Any [TransformListComp])) ), 302 ( "where", (KW_Where, Nothing) ), 303 ( "role", (KW_Role, Just (Any [RoleAnnotations]))), 304 ( "pattern", (KW_Pattern, Just (Any [PatternSynonyms]))), 305 ( "stock", (KW_Stock, Just (Any [DerivingStrategies]))), 306 ( "anyclass", (KW_Anyclass, Just (Any [DerivingStrategies]))), 307 ( "via", (KW_Via, Just (Any [DerivingVia]))), 308 309-- FFI 310 ( "foreign", (KW_Foreign, Just (Any [ForeignFunctionInterface])) ) 311 ] 312 313 314special_varids :: [(String,(Token, Maybe ExtScheme))] 315special_varids = [ 316 ( "as", (KW_As, Nothing) ), 317 ( "qualified", (KW_Qualified, Nothing) ), 318 ( "hiding", (KW_Hiding, Nothing) ), 319 320-- FFI 321 ( "export", (KW_Export, Just (Any [ForeignFunctionInterface])) ), 322 ( "safe", (KW_Safe, Just (Any [ForeignFunctionInterface, SafeImports, Safe, Trustworthy])) ), 323 ( "unsafe", (KW_Unsafe, Just (Any [ForeignFunctionInterface])) ), 324 ( "threadsafe", (KW_Threadsafe, Just (Any [ForeignFunctionInterface])) ), 325 ( "interruptible", (KW_Interruptible, Just (Any [InterruptibleFFI])) ), 326 ( "stdcall", (KW_StdCall, Just (Any [ForeignFunctionInterface])) ), 327 ( "ccall", (KW_CCall, Just (Any [ForeignFunctionInterface])) ), 328 ( "cplusplus", (KW_CPlusPlus, Just (Any [ForeignFunctionInterface])) ), 329 ( "dotnet", (KW_DotNet, Just (Any [ForeignFunctionInterface])) ), 330 ( "jvm", (KW_Jvm, Just (Any [ForeignFunctionInterface])) ), 331 ( "js", (KW_Js, Just (Any [ForeignFunctionInterface])) ), 332 ( "javascript", (KW_JavaScript, Just (Any [ForeignFunctionInterface])) ), 333 ( "capi", (KW_CApi, Just (Any [CApiFFI])) ) 334 ] 335 336pragmas :: [(String,Token)] 337pragmas = [ 338 ( "rules", RULES ), 339 ( "inline", INLINE True ), 340 ( "noinline", INLINE False ), 341 ( "notinline", INLINE False ), 342 ( "specialise", SPECIALISE ), 343 ( "specialize", SPECIALISE ), 344 ( "source", SOURCE ), 345 ( "deprecated", DEPRECATED ), 346 ( "warning", WARNING ), 347 ( "ann", ANN ), 348 ( "scc", SCC ), 349 ( "generated", GENERATED ), 350 ( "core", CORE ), 351 ( "unpack", UNPACK ), 352 ( "nounpack", NOUNPACK ), 353 ( "language", LANGUAGE ), 354 ( "minimal", MINIMAL ), 355 ( "no_overlap", NO_OVERLAP ), 356 ( "overlap", OVERLAP ), 357 ( "overlaps", OVERLAPS ), 358 ( "overlapping", OVERLAPPING ), 359 ( "overlappable", OVERLAPPABLE ), 360 ( "incoherent", INCOHERENT ), 361 ( "complete", COMPLETE ), 362 ( "options", OPTIONS undefined ) -- we'll tweak it before use - promise! 363-- ( "cfiles", CFILES undefined ), -- same here... 364-- ( "include", INCLUDE undefined ) -- ...and here! 365 ] 366 367isIdent, isHSymbol, isPragmaChar :: Char -> Bool 368isIdent c = isAlphaNum c || c == '\'' || c == '_' 369 370isHSymbol c = c `elem` ":!#%&*./?@\\-" || ((isSymbol c || isPunctuation c) && not (c `elem` "(),;[]`{}_\"'")) 371 372isPragmaChar c = isAlphaNum c || c == '_' 373 374isIdentStart :: Char -> Bool 375isIdentStart c = isAlpha c && not (isUpper c) || c == '_' 376 377 378-- Used in the lexing of type applications 379-- Why is it like this? I don't know exactly but this is how it is in 380-- GHC's parser. 381isOpSymbol :: Char -> Bool 382isOpSymbol c = c `elem` "!#$%&*+./<=>?@\\^|-~" 383 384-- | Checks whether the character would be legal in some position of a qvar. 385-- Means that '..' and "AAA" will pass the test. 386isPossiblyQvar :: Char -> Bool 387isPossiblyQvar c = isIdent (toLower c) || c == '.' 388 389matchChar :: Char -> String -> Lex a () 390matchChar c msg = do 391 s <- getInput 392 if null s || head s /= c then fail msg else discard 1 393 394-- The top-level lexer. 395-- We need to know whether we are at the beginning of the line to decide 396-- whether to insert layout tokens. 397 398lexer :: (Loc Token -> P a) -> P a 399lexer = runL topLexer 400 401topLexer :: Lex a (Loc Token) 402topLexer = do 403 b <- pullCtxtFlag 404 if b then -- trace (show cf ++ ": " ++ show VRightCurly) $ 405 -- the lex context state flags that we must do an empty {} - UGLY 406 setBOL >> getSrcLocL >>= \l -> return (Loc (mkSrcSpan l l) VRightCurly) 407 else do 408 bol <- checkBOL 409 (bol', ws) <- lexWhiteSpace bol 410 -- take care of whitespace in PCDATA 411 ec <- getExtContext 412 case ec of 413 -- if there was no linebreak, and we are lexing PCDATA, 414 -- then we want to care about the whitespace. 415 -- We don't bother to test for XmlSyntax, since we 416 -- couldn't end up in ChildCtxt otherwise. 417 Just ChildCtxt | not bol' && ws -> getSrcLocL >>= \l -> return $ Loc (mkSrcSpan l l) $ XPCDATA " " 418 _ -> do startToken 419 sl <- getSrcLocL 420 t <- if bol' then lexBOL -- >>= \t -> trace ("BOL: " ++ show t) (return t) 421 else lexToken -- >>= \t -> trace (show t) (return t) 422 el <- getSrcLocL 423 return $ Loc (mkSrcSpan sl el) t 424 425lexWhiteSpace :: Bool -> Lex a (Bool, Bool) 426lexWhiteSpace bol = do 427 s <- getInput 428 ignL <- ignoreLinePragmasL 429 case s of 430 -- If we find a recognised pragma, we don't want to treat it as a comment. 431 '{':'-':'#':rest | isRecognisedPragma rest -> return (bol, False) 432 | isLinePragma rest && not ignL -> do 433 (l, fn) <- lexLinePragma 434 setSrcLineL l 435 setLineFilenameL fn 436 lexWhiteSpace True 437 '{':'-':_ -> do 438 loc <- getSrcLocL 439 discard 2 440 (bol1, c) <- lexNestedComment bol "" 441 loc2 <- getSrcLocL 442 pushComment $ Comment True (mkSrcSpan loc loc2) (reverse c) 443 (bol2, _) <- lexWhiteSpace bol1 444 return (bol2, True) 445 '-':'-':s1 | all (== '-') (takeWhile isHSymbol s1) -> do 446 loc <- getSrcLocL 447 discard 2 448 dashes <- lexWhile (== '-') 449 rest <- lexWhile (/= '\n') 450 s' <- getInput 451 loc2 <- getSrcLocL 452 let com = Comment False (mkSrcSpan loc loc2) $ dashes ++ rest 453 case s' of 454 [] -> pushComment com >> return (False, True) 455 _ -> do 456 pushComment com 457 lexNewline 458 lexWhiteSpace_ True 459 return (True, True) 460 '\n':_ -> do 461 lexNewline 462 lexWhiteSpace_ True 463 return (True, True) 464 '\t':_ -> do 465 lexTab 466 (bol', _) <- lexWhiteSpace bol 467 return (bol', True) 468 c:_ | isSpace c -> do 469 discard 1 470 (bol', _) <- lexWhiteSpace bol 471 return (bol', True) 472 _ -> return (bol, False) 473 474-- | lexWhiteSpace without the return value. 475lexWhiteSpace_ :: Bool -> Lex a () 476lexWhiteSpace_ bol = do _ <- lexWhiteSpace bol 477 return () 478 479isRecognisedPragma, isLinePragma :: String -> Bool 480isRecognisedPragma str = let pragma = takeWhile isPragmaChar . dropWhile isSpace $ str 481 in case lookupKnownPragma pragma of 482 Nothing -> False 483 _ -> True 484 485isLinePragma str = let pragma = map toLower . takeWhile isAlphaNum . dropWhile isSpace $ str 486 in case pragma of 487 "line" -> True 488 _ -> False 489 490lexLinePragma :: Lex a (Int, String) 491lexLinePragma = do 492 discard 3 -- {-# 493 lexWhile_ isSpace 494 discard 4 -- LINE 495 lexWhile_ isSpace 496 i <- lexWhile isDigit 497 when (null i) $ fail "Improperly formatted LINE pragma" 498 lexWhile_ isSpace 499 matchChar '"' "Improperly formatted LINE pragma" 500 fn <- lexWhile (/= '"') 501 matchChar '"' "Impossible - lexLinePragma" 502 lexWhile_ isSpace 503 mapM_ (flip matchChar "Improperly formatted LINE pragma") "#-}" 504 lexNewline 505 return (read i, fn) 506 507lexNestedComment :: Bool -> String -> Lex a (Bool, String) 508lexNestedComment bol str = do 509 s <- getInput 510 case s of 511 '-':'}':_ -> discard 2 >> return (bol, str) 512 '{':'-':_ -> do 513 discard 2 514 (bol', c) <- lexNestedComment bol ("-{" ++ str) -- rest of the subcomment 515 lexNestedComment bol' ("}-" ++ c ) -- rest of this comment 516 '\t':_ -> lexTab >> lexNestedComment bol ('\t':str) 517 '\n':_ -> lexNewline >> lexNestedComment True ('\n':str) 518 c:_ -> discard 1 >> lexNestedComment bol (c:str) 519 [] -> fail "Unterminated nested comment" 520 521-- When we are lexing the first token of a line, check whether we need to 522-- insert virtual semicolons or close braces due to layout. 523 524lexBOL :: Lex a Token 525lexBOL = do 526 pos <- getOffside 527 -- trace ("Off: " ++ (show pos)) $ do 528 case pos of 529 LT -> do 530 -- trace "layout: inserting '}'\n" $ 531 -- Set col to 0, indicating that we're still at the 532 -- beginning of the line, in case we need a semi-colon too. 533 -- Also pop the context here, so that we don't insert 534 -- another close brace before the parser can pop it. 535 setBOL 536 popContextL "lexBOL" 537 return VRightCurly 538 EQ -> 539 -- trace "layout: inserting ';'\n" $ 540 return SemiColon 541 GT -> lexToken 542 543lexToken :: Lex a Token 544lexToken = do 545 ec <- getExtContext 546 -- we don't bother to check XmlSyntax since we couldn't 547 -- have ended up in a non-Nothing context if it wasn't 548 -- enabled. 549 case ec of 550 Just HarpCtxt -> lexHarpToken 551 Just TagCtxt -> lexTagCtxt 552 Just CloseTagCtxt -> lexCloseTagCtxt 553 Just ChildCtxt -> lexChildCtxt 554 Just CodeTagCtxt -> lexCodeTagCtxt 555 _ -> lexStdToken 556 557 558lexChildCtxt :: Lex a Token 559lexChildCtxt = do 560 -- if we ever end up here, then XmlSyntax must be on. 561 s <- getInput 562 case s of 563 '<':'%':'>':_ -> do discard 3 564 pushExtContextL ChildCtxt 565 return XChildTagOpen 566 '<':'%':_ -> do discard 2 567 pushExtContextL CodeTagCtxt 568 return XCodeTagOpen 569 '<':'/':_ -> do discard 2 570 popExtContextL "lexChildCtxt" 571 pushExtContextL CloseTagCtxt 572 return XCloseTagOpen 573 '<':'[':_ -> do discard 2 574 pushExtContextL HarpCtxt 575 return XRPatOpen 576 '<':_ -> do discard 1 577 pushExtContextL TagCtxt 578 return XStdTagOpen 579 _ -> lexPCDATA 580 581 582lexPCDATA :: Lex a Token 583lexPCDATA = do 584 -- if we ever end up here, then XmlSyntax must be on. 585 s <- getInput 586 case s of 587 [] -> return EOF 588 _ -> case s of 589 '\n':_ -> do 590 x <- lexNewline >> lexPCDATA 591 case x of 592 XPCDATA p -> return $ XPCDATA $ '\n':p 593 EOF -> return EOF 594 _ -> fail $ "lexPCDATA: unexpected token: " ++ show x 595 '<':_ -> return $ XPCDATA "" 596 _ -> do let pcd = takeWhile (\c -> c `notElem` "<\n") s 597 l = length pcd 598 discard l 599 x <- lexPCDATA 600 case x of 601 XPCDATA pcd' -> return $ XPCDATA $ pcd ++ pcd' 602 EOF -> return EOF 603 _ -> fail $ "lexPCDATA: unexpected token: " ++ show x 604 605 606lexCodeTagCtxt :: Lex a Token 607lexCodeTagCtxt = do 608 -- if we ever end up here, then XmlSyntax must be on. 609 s <- getInput 610 case s of 611 '%':'>':_ -> do discard 2 612 popExtContextL "lexCodeTagContext" 613 return XCodeTagClose 614 _ -> lexStdToken 615 616lexCloseTagCtxt :: Lex a Token 617lexCloseTagCtxt = do 618 -- if we ever end up here, then XmlSyntax must be on. 619 s <- getInput 620 case s of 621 '%':'>':_ -> do discard 2 622 popExtContextL "lexCloseTagCtxt" 623 return XCodeTagClose 624 '>':_ -> do discard 1 625 popExtContextL "lexCloseTagCtxt" 626 return XStdTagClose 627 _ -> lexStdToken 628 629lexTagCtxt :: Lex a Token 630lexTagCtxt = do 631 -- if we ever end up here, then XmlSyntax must be on. 632 s <- getInput 633 case s of 634 '/':'>':_ -> do discard 2 635 popExtContextL "lexTagCtxt: Empty tag" 636 return XEmptyTagClose 637 '>':_ -> do discard 1 638 popExtContextL "lexTagCtxt: Standard tag" 639 pushExtContextL ChildCtxt 640 return XStdTagClose 641 _ -> lexStdToken 642 643lexHarpToken :: Lex a Token 644lexHarpToken = do 645 -- if we ever end up here, then RegularPatterns must be on. 646 s <- getInput 647 case s of 648 ']':'>':_ -> do discard 2 649 popExtContextL "lexHarpToken" 650 return XRPatClose 651 _ -> lexStdToken 652 653lexStdToken :: Lex a Token 654lexStdToken = do 655 s <- getInput 656 exts <- getExtensionsL 657 let intHash = lexHash IntTok IntTokHash (Right WordTokHash) 658 case s of 659 [] -> return EOF 660 661 '0':c:d:_ | toLower c == 'o' && isOctDigit d -> do 662 discard 2 663 (n, str) <- lexOctal 664 con <- intHash 665 return (con (n, '0':c:str)) 666 | toLower c == 'b' && isBinDigit d && BinaryLiterals `elem` exts -> do 667 discard 2 668 (n, str) <- lexBinary 669 con <- intHash 670 return (con (n, '0':c:str)) 671 | toLower c == 'x' && isHexDigit d -> do 672 discard 2 673 (n, str) <- lexHexadecimal 674 con <- intHash 675 return (con (n, '0':c:str)) 676 677 -- implicit parameters 678 '?':c:_ | isIdentStart c && ImplicitParams `elem` exts -> do 679 discard 1 680 id <- lexWhile isIdent 681 return $ IDupVarId id 682 683 '%':c:_ | isIdentStart c && ImplicitParams `elem` exts -> do 684 discard 1 685 id <- lexWhile isIdent 686 return $ ILinVarId id 687 -- end implicit parameters 688 689 -- harp 690 '(':'|':c:_ | RegularPatterns `elem` exts && not (isHSymbol c) -> 691 discard 2 >> return RPGuardOpen 692 | Arrows `elem` exts && not (isHSymbol c) -> 693 discard 2 >> return OpenArrowBracket 694 '|':')':_ | RegularPatterns `elem` exts -> discard 2 >> return RPGuardClose 695 | Arrows `elem` exts -> discard 2 >> return CloseArrowBracket 696 {- This is handled by the reserved_ops above. 697 '@':':':_ | RegularPatterns `elem` exts -> 698 do discard 2 699 return RPCAt -} 700 701 702 -- template haskell 703 '[':'|':'|':_ | TemplateHaskell `elem` exts -> do 704 discard 3 705 return THTExpQuote 706 707 '[':'e':'|':'|':_ | TemplateHaskell `elem` exts -> do 708 discard 4 709 return THTExpQuote 710 711 '[':'|':_ | TemplateHaskell `elem` exts -> do 712 discard 2 713 return THExpQuote 714 715 '[':c:'|':_ | c == 'e' && TemplateHaskell `elem` exts -> do 716 discard 3 717 return THExpQuote 718 | c == 'p' && TemplateHaskell `elem` exts -> do 719 discard 3 720 return THPatQuote 721 | c == 'd' && TemplateHaskell `elem` exts -> do 722 discard 3 723 return THDecQuote 724 | c == 't' && TemplateHaskell `elem` exts -> do 725 discard 3 726 return THTypQuote 727 '[':'$':c:_ | isIdentStart c && QuasiQuotes `elem` exts -> 728 discard 2 >> lexQuasiQuote c 729 730 '[':c:s' | isIdentStart c && QuasiQuotes `elem` exts && case dropWhile isIdent s' of { '|':_ -> True;_->False} -> 731 discard 1 >> lexQuasiQuote c 732 | isUpper c && QuasiQuotes `elem` exts && case dropWhile isPossiblyQvar s' of { '|':_ -> True;_->False} -> 733 discard 1 >> lexQuasiQuote c 734 735 '|':'|':']':_ | TemplateHaskell `elem` exts -> do 736 discard 3 737 return THTCloseQuote 738 '|':']':_ | TemplateHaskell `elem` exts -> do 739 discard 2 740 return THCloseQuote 741 742 '$':c1:c2:_ | isIdentStart c1 && TemplateHaskell `elem` exts -> do 743 discard 1 744 id <- lexWhile isIdent 745 return $ THIdEscape id 746 | c1 == '(' && TemplateHaskell `elem` exts -> do 747 discard 2 748 return THParenEscape 749 | c1 == '$' && isIdentStart c2 && TemplateHaskell `elem` exts -> do 750 discard 2 751 id <- lexWhile isIdent 752 return $ THTIdEscape id 753 | c1 == '$' && c2 == '(' && TemplateHaskell `elem` exts -> do 754 discard 3 755 return THTParenEscape 756 -- end template haskell 757 758 -- hsx 759 '<':'%':c:_ | XmlSyntax `elem` exts -> 760 case c of 761 '>' -> do discard 3 762 pushExtContextL ChildCtxt 763 return XChildTagOpen 764 _ -> do discard 2 765 pushExtContextL CodeTagCtxt 766 return XCodeTagOpen 767 '<':c:_ | isAlpha c && XmlSyntax `elem` exts -> do 768 discard 1 769 pushExtContextL TagCtxt 770 return XStdTagOpen 771 -- end hsx 772 773 '(':'#':c:_ | unboxed exts && not (isHSymbol c) -> discard 2 >> return LeftHashParen 774 775 '#':')':_ | unboxed exts -> discard 2 >> return RightHashParen 776 777 -- pragmas 778 779 '{':'-':'#':_ -> saveExtensionsL >> discard 3 >> lexPragmaStart 780 781 '#':'-':'}':_ -> restoreExtensionsL >> discard 3 >> return PragmaEnd 782 783 -- Parallel arrays 784 785 '[':':':_ | ParallelArrays `elem` exts -> discard 2 >> return ParArrayLeftSquare 786 787 ':':']':_ | ParallelArrays `elem` exts -> discard 2 >> return ParArrayRightSquare 788 789 -- Lexed seperately to deal with visible type applciation 790 791 '@':c:_ | TypeApplications `elem` exts 792 -- Operator starting with an '@' 793 && not (isOpSymbol c) -> do 794 lc <- getLastChar 795 if isIdent lc 796 then discard 1 >> return At 797 else discard 1 >> return TApp 798 799 '#':c:_ | OverloadedLabels `elem` exts 800 && isIdentStart c -> do 801 discard 1 802 [ident] <- lexIdents 803 return $ LabelVarId ident 804 805 806 c:_ | isDigit c -> lexDecimalOrFloat 807 808 | isUpper c -> lexConIdOrQual "" 809 810 | isIdentStart c -> do 811 idents <- lexIdents 812 case idents of 813 [ident] -> case lookup ident (reserved_ids ++ special_varids) of 814 Just (keyword, scheme) -> 815 -- check if an extension keyword is enabled 816 if isEnabled scheme exts 817 then flagKW keyword >> return keyword 818 else return $ VarId ident 819 Nothing -> return $ VarId ident 820 _ -> return $ DVarId idents 821 822 | isHSymbol c -> do 823 sym <- lexWhile isHSymbol 824 return $ case lookup sym (reserved_ops ++ special_varops) of 825 Just (t , scheme) -> 826 -- check if an extension op is enabled 827 if isEnabled scheme exts 828 then t 829 else case c of 830 ':' -> ConSym sym 831 _ -> VarSym sym 832 Nothing -> case c of 833 ':' -> ConSym sym 834 _ -> VarSym sym 835 836 | otherwise -> do 837 discard 1 838 case c of 839 840 -- First the special symbols 841 '(' -> return LeftParen 842 ')' -> return RightParen 843 ',' -> return Comma 844 ';' -> return SemiColon 845 '[' -> return LeftSquare 846 ']' -> return RightSquare 847 '`' -> return BackQuote 848 '{' -> do 849 pushContextL NoLayout 850 return LeftCurly 851 '}' -> do 852 popContextL "lexStdToken" 853 return RightCurly 854 855 '\'' -> lexCharacter 856 '"' -> lexString 857 858 _ -> fail ("Illegal character \'" ++ show c ++ "\'\n") 859 860 where lexIdents :: Lex a [String] 861 lexIdents = do 862 ident <- lexWhile isIdent 863 s <- getInput 864 exts <- getExtensionsL 865 case s of 866 -- This is the only way we can get more than one ident in the list 867 -- and it requires XmlSyntax to be on. 868 '-':c:_ | XmlSyntax `elem` exts && isAlpha c -> do 869 discard 1 870 idents <- lexIdents 871 return $ ident : idents 872 '#':_ | MagicHash `elem` exts -> do 873 hashes <- lexWhile (== '#') 874 return [ident ++ hashes] 875 _ -> return [ident] 876 877 lexQuasiQuote :: Char -> Lex a Token 878 lexQuasiQuote c = do 879 -- We've seen and dropped [$ already 880 ident <- lexQuoter 881 matchChar '|' "Malformed quasi-quote quoter" 882 body <- lexQQBody 883 return $ THQuasiQuote (ident, body) 884 where lexQuoter 885 | isIdentStart c = lexWhile isIdent 886 | otherwise = do 887 qualThing <- lexConIdOrQual "" 888 case qualThing of 889 QVarId (s1,s2) -> return $ s1 ++ '.':s2 890 QVarSym (s1, s2) -> return $ s1 ++ '.':s2 891 _ -> fail "Malformed quasi-quote quoter" 892 893 lexQQBody :: Lex a String 894 lexQQBody = do 895 s <- getInput 896 case s of 897 '\\':']':_ -> do discard 2 898 str <- lexQQBody 899 return (']':str) 900 '\\':'|':_ -> do discard 2 901 str <- lexQQBody 902 return ('|':str) 903 '|':']':_ -> discard 2 >> return "" 904 '|':_ -> do discard 1 905 str <- lexQQBody 906 return ('|':str) 907 ']':_ -> do discard 1 908 str <- lexQQBody 909 return (']':str) 910 '\\':_ -> do discard 1 911 str <- lexQQBody 912 return ('\\':str) 913 '\n':_ -> do lexNewline 914 str <- lexQQBody 915 return ('\n':str) 916 [] -> fail "Unexpected end of input while lexing quasi-quoter" 917 _ -> do str <- lexWhile (not . (`elem` "\\|\n")) 918 rest <- lexQQBody 919 return (str++rest) 920 921unboxed :: [KnownExtension] -> Bool 922unboxed exts = UnboxedSums `elem` exts || UnboxedTuples `elem` exts 923 924-- Underscores are used in some pragmas. Options pragmas are a special case 925-- with our representation: the thing after the underscore is a parameter. 926-- Strip off the parameters to option pragmas by hand here, everything else 927-- sits in the pragmas map. 928lookupKnownPragma :: String -> Maybe Token 929lookupKnownPragma s = 930 case map toLower s of 931 x | "options_" `isPrefixOf` x -> Just $ OPTIONS (Just $ drop 8 s, undefined) 932 | "options" == x -> Just $ OPTIONS (Nothing, undefined) 933 | otherwise -> lookup x pragmas 934 935lexPragmaStart :: Lex a Token 936lexPragmaStart = do 937 lexWhile_ isSpace 938 pr <- lexWhile isPragmaChar 939 case lookupKnownPragma pr of 940 Just (INLINE True) -> do 941 s <- getInput 942 case map toLower s of 943 ' ':'c':'o':'n':'l':'i':'k':'e':_ -> do 944 discard 8 945 return INLINE_CONLIKE 946 _ -> return $ INLINE True 947 Just SPECIALISE -> do 948 s <- getInput 949 case dropWhile isSpace $ map toLower s of 950 'i':'n':'l':'i':'n':'e':_ -> do 951 lexWhile_ isSpace 952 discard 6 953 return $ SPECIALISE_INLINE True 954 'n':'o':'i':'n':'l':'i':'n':'e':_ -> do 955 lexWhile_ isSpace 956 discard 8 957 return $ SPECIALISE_INLINE False 958 'n':'o':'t':'i':'n':'l':'i':'n':'e':_ -> do 959 lexWhile_ isSpace 960 discard 9 961 return $ SPECIALISE_INLINE False 962 _ -> return SPECIALISE 963 964 Just (OPTIONS opt) -> -- see, I promised we'd mask out the 'undefined' 965 -- We do not want to store necessary whitespace in the datatype 966 -- but if the pragma starts with a newline then we must keep 967 -- it to differentiate the two cases. 968 let dropIfSpace (' ':xs) = xs 969 dropIfSpace xs = xs 970 in 971 case fst opt of 972 Just opt' -> do 973 rest <- lexRawPragma 974 return $ OPTIONS (Just opt', dropIfSpace rest) 975 Nothing -> do 976 s <- getInput 977 case s of 978 x:_ | isSpace x -> do 979 rest <- lexRawPragma 980 return $ OPTIONS (Nothing, dropIfSpace rest) 981 _ -> fail "Malformed Options pragma" 982 Just RULES -> do -- Rules enable ScopedTypeVariables locally. 983 addExtensionL ScopedTypeVariables 984 return RULES 985{- Just (CFILES _) -> do 986 rest <- lexRawPragma 987 return $ CFILES rest 988 Just (INCLUDE _) -> do 989 rest <- lexRawPragma 990 return $ INCLUDE rest -} 991 Just p -> return p 992 993 _ -> fail "Internal error: Unrecognised recognised pragma" 994 -- do rawStr <- lexRawPragma 995 -- return $ PragmaUnknown (pr, rawStr) -- no support for unrecognized pragmas, treat as comment 996 -- discard 3 -- #-} 997 -- topLexer -- we just discard it as a comment for now and restart -} 998 999lexRawPragma :: Lex a String 1000lexRawPragma = lexRawPragmaAux 1001 where lexRawPragmaAux = do 1002 rpr <- lexWhile (/='#') 1003 s <- getInput 1004 case s of 1005 '#':'-':'}':_ -> return rpr 1006 "" -> fail "End-of-file inside pragma" 1007 _ -> do 1008 discard 1 1009 rpr' <- lexRawPragma 1010 return $ rpr ++ '#':rpr' 1011 1012lexDecimalOrFloat :: Lex a Token 1013lexDecimalOrFloat = do 1014 ds <- lexWhile isDigit 1015 rest <- getInput 1016 exts <- getExtensionsL 1017 case rest of 1018 ('.':d:_) | isDigit d -> do 1019 discard 1 1020 frac <- lexWhile isDigit 1021 let num = parseInteger 10 (ds ++ frac) 1022 decimals = toInteger (length frac) 1023 (exponent, estr) <- do 1024 rest2 <- getInput 1025 case rest2 of 1026 'e':_ -> lexExponent 1027 'E':_ -> lexExponent 1028 _ -> return (0,"") 1029 con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash) 1030 return $ con ((num%1) * 10^^(exponent - decimals), ds ++ '.':frac ++ estr) 1031 e:_ | toLower e == 'e' -> do 1032 (exponent, estr) <- lexExponent 1033 con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash) 1034 return $ con ((parseInteger 10 ds%1) * 10^^exponent, ds ++ estr) 1035 '#':'#':_ | MagicHash `elem` exts -> discard 2 >> return (WordTokHash (parseInteger 10 ds, ds)) 1036 '#':_ | MagicHash `elem` exts -> discard 1 >> return (IntTokHash (parseInteger 10 ds, ds)) 1037 _ -> return (IntTok (parseInteger 10 ds, ds)) 1038 1039 where 1040 lexExponent :: Lex a (Integer, String) 1041 lexExponent = do 1042 (e:r) <- getInput 1043 discard 1 -- 'e' or 'E' 1044 case r of 1045 '+':d:_ | isDigit d -> do 1046 discard 1 1047 (n, str) <- lexDecimal 1048 return (n, e:'+':str) 1049 '-':d:_ | isDigit d -> do 1050 discard 1 1051 (n, str) <- lexDecimal 1052 return (negate n, e:'-':str) 1053 d:_ | isDigit d -> lexDecimal >>= \(n,str) -> return (n, e:str) 1054 _ -> fail "Float with missing exponent" 1055 1056lexHash :: (b -> Token) -> (b -> Token) -> Either String (b -> Token) -> Lex a (b -> Token) 1057lexHash a b c = do 1058 exts <- getExtensionsL 1059 if MagicHash `elem` exts 1060 then do 1061 r <- getInput 1062 case r of 1063 '#':'#':_ -> case c of 1064 Right c' -> discard 2 >> return c' 1065 Left s -> fail s 1066 '#':_ -> discard 1 >> return b 1067 _ -> return a 1068 else return a 1069 1070lexConIdOrQual :: String -> Lex a Token 1071lexConIdOrQual qual = do 1072 con <- lexWhile isIdent 1073 let conid | null qual = ConId con 1074 | otherwise = QConId (qual,con) 1075 qual' | null qual = con 1076 | otherwise = qual ++ '.':con 1077 just_a_conid <- alternative (return conid) 1078 rest <- getInput 1079 exts <- getExtensionsL 1080 case rest of 1081 '.':c:_ 1082 | isIdentStart c -> do -- qualified varid? 1083 discard 1 1084 ident <- lexWhile isIdent 1085 s <- getInput 1086 exts' <- getExtensionsL 1087 ident' <- case s of 1088 '#':_ | MagicHash `elem` exts' -> discard 1 >> return (ident ++ "#") 1089 _ -> return ident 1090 case lookup ident' reserved_ids of 1091 -- cannot qualify a reserved word 1092 Just (_,scheme) | isEnabled scheme exts' -> just_a_conid 1093 _ -> return (QVarId (qual', ident')) 1094 1095 | isUpper c -> do -- qualified conid? 1096 discard 1 1097 lexConIdOrQual qual' 1098 1099 | isHSymbol c -> do -- qualified symbol? 1100 discard 1 1101 sym <- lexWhile isHSymbol 1102 exts' <- getExtensionsL 1103 case lookup sym reserved_ops of 1104 -- cannot qualify a reserved operator 1105 Just (_,scheme) | isEnabled scheme exts' -> just_a_conid 1106 _ -> return $ case c of 1107 ':' -> QConSym (qual', sym) 1108 _ -> QVarSym (qual', sym) 1109 1110 '#':cs 1111 | null cs || 1112 not (isHSymbol $ head cs) && 1113 not (isIdent $ head cs) && MagicHash `elem` exts -> do 1114 discard 1 1115 case conid of 1116 ConId con' -> return $ ConId $ con' ++ "#" 1117 QConId (q,con') -> return $ QConId (q,con' ++ "#") 1118 _ -> fail $ "lexConIdOrQual: unexpected token: " ++ show conid 1119 _ -> return conid -- not a qualified thing 1120 1121lexCharacter :: Lex a Token 1122lexCharacter = do -- We need to keep track of not only character constants but also TH 'x and ''T 1123 -- We've seen ' so far 1124 s <- getInput 1125 exts <- getExtensionsL 1126 case s of 1127 '\'':_ | TemplateHaskell `elem` exts -> discard 1 >> return THTyQuote 1128 '\\':_ -> do 1129 (c,raw) <- lexEscape 1130 matchQuote 1131 con <- lexHash Character CharacterHash 1132 (Left "Double hash not available for character literals") 1133 return (con (c, '\\':raw)) 1134 c:'\'':_ -> do 1135 discard 2 1136 con <- lexHash Character CharacterHash 1137 (Left "Double hash not available for character literals") 1138 return (con (c, [c])) 1139 _ | any (`elem` exts) [TemplateHaskell, DataKinds] -> return THVarQuote 1140 _ -> fail "Improper character constant or misplaced \'" 1141 1142 where matchQuote = matchChar '\'' "Improperly terminated character constant" 1143 1144 1145lexString :: Lex a Token 1146lexString = loop ("","") 1147 where 1148 loop (s,raw) = do 1149 r <- getInput 1150 exts <- getExtensionsL 1151 case r of 1152 '\\':'&':_ -> do 1153 discard 2 1154 loop (s, '&':'\\':raw) 1155 '\\':c:_ | isSpace c -> do 1156 discard 1 1157 wcs <- lexWhiteChars 1158 matchChar '\\' "Illegal character in string gap" 1159 loop (s, '\\':reverse wcs ++ '\\':raw) 1160 | otherwise -> do 1161 (ce, str) <- lexEscape 1162 loop (ce:s, reverse str ++ '\\':raw) 1163 '"':'#':_ | MagicHash `elem` exts -> do 1164 discard 2 1165 return (StringHash (reverse s, reverse raw)) 1166 '"':_ -> do 1167 discard 1 1168 return (StringTok (reverse s, reverse raw)) 1169 c:_ | c /= '\n' -> do 1170 discard 1 1171 loop (c:s, c:raw) 1172 _ -> fail "Improperly terminated string" 1173 1174 lexWhiteChars :: Lex a String 1175 lexWhiteChars = do 1176 s <- getInput 1177 case s of 1178 '\n':_ -> do 1179 lexNewline 1180 wcs <- lexWhiteChars 1181 return $ '\n':wcs 1182 '\t':_ -> do 1183 lexTab 1184 wcs <- lexWhiteChars 1185 return $ '\t':wcs 1186 c:_ | isSpace c -> do 1187 discard 1 1188 wcs <- lexWhiteChars 1189 return $ c:wcs 1190 _ -> return "" 1191 1192lexEscape :: Lex a (Char, String) 1193lexEscape = do 1194 discard 1 1195 r <- getInput 1196 case r of 1197 1198-- Production charesc from section B.2 (Note: \& is handled by caller) 1199 1200 'a':_ -> discard 1 >> return ('\a', "a") 1201 'b':_ -> discard 1 >> return ('\b', "b") 1202 'f':_ -> discard 1 >> return ('\f', "f") 1203 'n':_ -> discard 1 >> return ('\n', "n") 1204 'r':_ -> discard 1 >> return ('\r', "r") 1205 't':_ -> discard 1 >> return ('\t', "t") 1206 'v':_ -> discard 1 >> return ('\v', "v") 1207 '\\':_ -> discard 1 >> return ('\\', "\\") 1208 '"':_ -> discard 1 >> return ('\"', "\"") 1209 '\'':_ -> discard 1 >> return ('\'', "\'") 1210 1211-- Production ascii from section B.2 1212 1213 '^':c:_ -> discard 2 >> cntrl c 1214 'N':'U':'L':_ -> discard 3 >> return ('\NUL', "NUL") 1215 'S':'O':'H':_ -> discard 3 >> return ('\SOH', "SOH") 1216 'S':'T':'X':_ -> discard 3 >> return ('\STX', "STX") 1217 'E':'T':'X':_ -> discard 3 >> return ('\ETX', "ETX") 1218 'E':'O':'T':_ -> discard 3 >> return ('\EOT', "EOT") 1219 'E':'N':'Q':_ -> discard 3 >> return ('\ENQ', "ENQ") 1220 'A':'C':'K':_ -> discard 3 >> return ('\ACK', "ACK") 1221 'B':'E':'L':_ -> discard 3 >> return ('\BEL', "BEL") 1222 'B':'S':_ -> discard 2 >> return ('\BS', "BS") 1223 'H':'T':_ -> discard 2 >> return ('\HT', "HT") 1224 'L':'F':_ -> discard 2 >> return ('\LF', "LF") 1225 'V':'T':_ -> discard 2 >> return ('\VT', "VT") 1226 'F':'F':_ -> discard 2 >> return ('\FF', "FF") 1227 'C':'R':_ -> discard 2 >> return ('\CR', "CR") 1228 'S':'O':_ -> discard 2 >> return ('\SO', "SO") 1229 'S':'I':_ -> discard 2 >> return ('\SI', "SI") 1230 'D':'L':'E':_ -> discard 3 >> return ('\DLE', "DLE") 1231 'D':'C':'1':_ -> discard 3 >> return ('\DC1', "DC1") 1232 'D':'C':'2':_ -> discard 3 >> return ('\DC2', "DC2") 1233 'D':'C':'3':_ -> discard 3 >> return ('\DC3', "DC3") 1234 'D':'C':'4':_ -> discard 3 >> return ('\DC4', "DC4") 1235 'N':'A':'K':_ -> discard 3 >> return ('\NAK', "NAK") 1236 'S':'Y':'N':_ -> discard 3 >> return ('\SYN', "SYN") 1237 'E':'T':'B':_ -> discard 3 >> return ('\ETB', "ETB") 1238 'C':'A':'N':_ -> discard 3 >> return ('\CAN', "CAN") 1239 'E':'M':_ -> discard 2 >> return ('\EM', "EM") 1240 'S':'U':'B':_ -> discard 3 >> return ('\SUB', "SUB") 1241 'E':'S':'C':_ -> discard 3 >> return ('\ESC', "ESC") 1242 'F':'S':_ -> discard 2 >> return ('\FS', "FS") 1243 'G':'S':_ -> discard 2 >> return ('\GS', "GS") 1244 'R':'S':_ -> discard 2 >> return ('\RS', "RS") 1245 'U':'S':_ -> discard 2 >> return ('\US', "US") 1246 'S':'P':_ -> discard 2 >> return ('\SP', "SP") 1247 'D':'E':'L':_ -> discard 3 >> return ('\DEL', "DEL") 1248 1249-- Escaped numbers 1250 1251 'o':c:_ | isOctDigit c -> do 1252 discard 1 1253 (n, raw) <- lexOctal 1254 n' <- checkChar n 1255 return (n', 'o':raw) 1256 'x':c:_ | isHexDigit c -> do 1257 discard 1 1258 (n, raw) <- lexHexadecimal 1259 n' <- checkChar n 1260 return (n', 'x':raw) 1261 c:_ | isDigit c -> do 1262 (n, raw) <- lexDecimal 1263 n' <- checkChar n 1264 return (n', raw) 1265 1266 _ -> fail "Illegal escape sequence" 1267 1268 where 1269 checkChar n | n <= 0x10FFFF = return (chr (fromInteger n)) 1270 checkChar _ = fail "Character constant out of range" 1271 1272-- Production cntrl from section B.2 1273 1274 cntrl :: Char -> Lex a (Char, String) 1275 cntrl c | c >= '@' && c <= '_' = return (chr (ord c - ord '@'), '^':c:[]) 1276 cntrl _ = fail "Illegal control character" 1277 1278-- assumes at least one octal digit 1279lexOctal :: Lex a (Integer, String) 1280lexOctal = do 1281 ds <- lexWhile isOctDigit 1282 return (parseInteger 8 ds, ds) 1283 1284-- assumes at least one binary digit 1285lexBinary :: Lex a (Integer, String) 1286lexBinary = do 1287 ds <- lexWhile isBinDigit 1288 return (parseInteger 2 ds, ds) 1289 1290-- assumes at least one hexadecimal digit 1291lexHexadecimal :: Lex a (Integer, String) 1292lexHexadecimal = do 1293 ds <- lexWhile isHexDigit 1294 return (parseInteger 16 ds, ds) 1295 1296-- assumes at least one decimal digit 1297lexDecimal :: Lex a (Integer, String) 1298lexDecimal = do 1299 ds <- lexWhile isDigit 1300 return (parseInteger 10 ds, ds) 1301 1302-- Stolen from Hugs's Prelude 1303parseInteger :: Integer -> String -> Integer 1304parseInteger radix ds = 1305 foldl1 (\n d -> n * radix + d) (map (toInteger . digitToInt) ds) 1306 1307flagKW :: Token -> Lex a () 1308flagKW t = 1309 when (t `elem` [KW_Do, KW_MDo]) $ do 1310 exts <- getExtensionsL 1311 when (NondecreasingIndentation `elem` exts) flagDo 1312 1313-- | Selects ASCII binary digits, i.e. @\'0\'@..@\'1\'@. 1314isBinDigit :: Char -> Bool 1315isBinDigit c = c >= '0' && c <= '1' 1316------------------------------------------------------------------ 1317-- "Pretty" printing for tokens 1318 1319showToken :: Token -> String 1320showToken t = case t of 1321 VarId s -> s 1322 LabelVarId s -> '#':s 1323 QVarId (q,s) -> q ++ '.':s 1324 IDupVarId s -> '?':s 1325 ILinVarId s -> '%':s 1326 ConId s -> s 1327 QConId (q,s) -> q ++ '.':s 1328 DVarId ss -> intercalate "-" ss 1329 VarSym s -> s 1330 ConSym s -> s 1331 QVarSym (q,s) -> q ++ '.':s 1332 QConSym (q,s) -> q ++ '.':s 1333 IntTok (_, s) -> s 1334 FloatTok (_, s) -> s 1335 Character (_, s) -> '\'':s ++ "'" 1336 StringTok (_, s) -> '"':s ++ "\"" 1337 IntTokHash (_, s) -> s ++ "#" 1338 WordTokHash (_, s) -> s ++ "##" 1339 FloatTokHash (_, s) -> s ++ "#" 1340 DoubleTokHash (_, s) -> s ++ "##" 1341 CharacterHash (_, s) -> '\'':s ++ "'#" 1342 StringHash (_, s) -> '"':s ++ "\"#" 1343 LeftParen -> "(" 1344 RightParen -> ")" 1345 LeftHashParen -> "(#" 1346 RightHashParen -> "#)" 1347 SemiColon -> ";" 1348 LeftCurly -> "{" 1349 RightCurly -> "}" 1350 VRightCurly -> "virtual }" 1351 LeftSquare -> "[" 1352 RightSquare -> "]" 1353 ParArrayLeftSquare -> "[:" 1354 ParArrayRightSquare -> ":]" 1355 Comma -> "," 1356 Underscore -> "_" 1357 BackQuote -> "`" 1358 QuoteColon -> "':" 1359 Dot -> "." 1360 DotDot -> ".." 1361 Colon -> ":" 1362 DoubleColon -> "::" 1363 Equals -> "=" 1364 Backslash -> "\\" 1365 Bar -> "|" 1366 LeftArrow -> "<-" 1367 RightArrow -> "->" 1368 At -> "@" 1369 TApp -> "@" 1370 Tilde -> "~" 1371 DoubleArrow -> "=>" 1372 Minus -> "-" 1373 Exclamation -> "!" 1374 Star -> "*" 1375 LeftArrowTail -> "-<" 1376 RightArrowTail -> ">-" 1377 LeftDblArrowTail -> "-<<" 1378 RightDblArrowTail -> ">>-" 1379 OpenArrowBracket -> "(|" 1380 CloseArrowBracket -> "|)" 1381 THExpQuote -> "[|" 1382 THTExpQuote -> "[||" 1383 THPatQuote -> "[p|" 1384 THDecQuote -> "[d|" 1385 THTypQuote -> "[t|" 1386 THCloseQuote -> "|]" 1387 THTCloseQuote -> "||]" 1388 THIdEscape s -> '$':s 1389 THParenEscape -> "$(" 1390 THTIdEscape s -> "$$" ++ s 1391 THTParenEscape -> "$$(" 1392 THVarQuote -> "'" 1393 THTyQuote -> "''" 1394 THQuasiQuote (n,q) -> "[$" ++ n ++ "|" ++ q ++ "]" 1395 RPGuardOpen -> "(|" 1396 RPGuardClose -> "|)" 1397 RPCAt -> "@:" 1398 XCodeTagOpen -> "<%" 1399 XCodeTagClose -> "%>" 1400 XStdTagOpen -> "<" 1401 XStdTagClose -> ">" 1402 XCloseTagOpen -> "</" 1403 XEmptyTagClose -> "/>" 1404 XPCDATA s -> "PCDATA " ++ s 1405 XRPatOpen -> "<[" 1406 XRPatClose -> "]>" 1407 PragmaEnd -> "#-}" 1408 RULES -> "{-# RULES" 1409 INLINE b -> "{-# " ++ if b then "INLINE" else "NOINLINE" 1410 INLINE_CONLIKE -> "{-# " ++ "INLINE CONLIKE" 1411 SPECIALISE -> "{-# SPECIALISE" 1412 SPECIALISE_INLINE b -> "{-# SPECIALISE " ++ if b then "INLINE" else "NOINLINE" 1413 SOURCE -> "{-# SOURCE" 1414 DEPRECATED -> "{-# DEPRECATED" 1415 WARNING -> "{-# WARNING" 1416 SCC -> "{-# SCC" 1417 GENERATED -> "{-# GENERATED" 1418 CORE -> "{-# CORE" 1419 UNPACK -> "{-# UNPACK" 1420 NOUNPACK -> "{-# NOUNPACK" 1421 OPTIONS (mt,_) -> "{-# OPTIONS" ++ maybe "" (':':) mt ++ " ..." 1422-- CFILES s -> "{-# CFILES ..." 1423-- INCLUDE s -> "{-# INCLUDE ..." 1424 LANGUAGE -> "{-# LANGUAGE" 1425 ANN -> "{-# ANN" 1426 MINIMAL -> "{-# MINIMAL" 1427 NO_OVERLAP -> "{-# NO_OVERLAP" 1428 OVERLAP -> "{-# OVERLAP" 1429 OVERLAPPING -> "{-# OVERLAPPING" 1430 OVERLAPPABLE -> "{-# OVERLAPPABLE" 1431 OVERLAPS -> "{-# OVERLAPS" 1432 INCOHERENT -> "{-# INCOHERENT" 1433 COMPLETE -> "{-# COMPLETE" 1434 KW_As -> "as" 1435 KW_By -> "by" 1436 KW_Case -> "case" 1437 KW_Class -> "class" 1438 KW_Data -> "data" 1439 KW_Default -> "default" 1440 KW_Deriving -> "deriving" 1441 KW_Do -> "do" 1442 KW_MDo -> "mdo" 1443 KW_Else -> "else" 1444 KW_Family -> "family" 1445 KW_Forall -> "forall" 1446 KW_Group -> "group" 1447 KW_Hiding -> "hiding" 1448 KW_If -> "if" 1449 KW_Import -> "import" 1450 KW_In -> "in" 1451 KW_Infix -> "infix" 1452 KW_InfixL -> "infixl" 1453 KW_InfixR -> "infixr" 1454 KW_Instance -> "instance" 1455 KW_Let -> "let" 1456 KW_Module -> "module" 1457 KW_NewType -> "newtype" 1458 KW_Of -> "of" 1459 KW_Proc -> "proc" 1460 KW_Rec -> "rec" 1461 KW_Then -> "then" 1462 KW_Type -> "type" 1463 KW_Using -> "using" 1464 KW_Where -> "where" 1465 KW_Qualified -> "qualified" 1466 KW_Foreign -> "foreign" 1467 KW_Export -> "export" 1468 KW_Safe -> "safe" 1469 KW_Unsafe -> "unsafe" 1470 KW_Threadsafe -> "threadsafe" 1471 KW_Interruptible -> "interruptible" 1472 KW_StdCall -> "stdcall" 1473 KW_CCall -> "ccall" 1474 XChildTagOpen -> "<%>" 1475 KW_CPlusPlus -> "cplusplus" 1476 KW_DotNet -> "dotnet" 1477 KW_Jvm -> "jvm" 1478 KW_Js -> "js" 1479 KW_JavaScript -> "javascript" 1480 KW_CApi -> "capi" 1481 KW_Role -> "role" 1482 KW_Pattern -> "pattern" 1483 KW_Stock -> "stock" 1484 KW_Anyclass -> "anyclass" 1485 KW_Via -> "via" 1486 1487 EOF -> "EOF" 1488