1----------------------------------------------------------------------------- 2-- (c) The University of Glasgow, 2006 3-- 4-- GHC's lexer for Haskell 2010 [1]. 5-- 6-- This is a combination of an Alex-generated lexer [2] from a regex 7-- definition, with some hand-coded bits. [3] 8-- 9-- Completely accurate information about token-spans within the source 10-- file is maintained. Every token has a start and end RealSrcLoc 11-- attached to it. 12-- 13-- References: 14-- [1] https://www.haskell.org/onlinereport/haskell2010/haskellch2.html 15-- [2] http://www.haskell.org/alex/ 16-- [3] https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/parser 17-- 18----------------------------------------------------------------------------- 19 20-- ToDo / known bugs: 21-- - parsing integers is a bit slow 22-- - readRational is a bit slow 23-- 24-- Known bugs, that were also in the previous version: 25-- - M... should be 3 tokens, not 1. 26-- - pragma-end should be only valid in a pragma 27 28-- qualified operator NOTES. 29-- 30-- - If M.(+) is a single lexeme, then.. 31-- - Probably (+) should be a single lexeme too, for consistency. 32-- Otherwise ( + ) would be a prefix operator, but M.( + ) would not be. 33-- - But we have to rule out reserved operators, otherwise (..) becomes 34-- a different lexeme. 35-- - Should we therefore also rule out reserved operators in the qualified 36-- form? This is quite difficult to achieve. We don't do it for 37-- qualified varids. 38 39 40-- ----------------------------------------------------------------------------- 41-- Alex "Haskell code fragment top" 42 43{ 44{-# LANGUAGE CPP #-} 45{-# LANGUAGE BangPatterns #-} 46{-# LANGUAGE LambdaCase #-} 47 48{-# OPTIONS_GHC -funbox-strict-fields #-} 49 50module Lexer ( 51 Token(..), lexer, pragState, mkPState, mkPStatePure, PState(..), 52 P(..), ParseResult(..), mkParserFlags, mkParserFlags', ParserFlags(..), 53 appendWarning, 54 appendError, 55 allocateComments, 56 MonadP(..), 57 getRealSrcLoc, getPState, withThisPackage, 58 failLocMsgP, srcParseFail, 59 getErrorMessages, getMessages, 60 popContext, pushModuleContext, setLastToken, setSrcLoc, 61 activeContext, nextIsEOF, 62 getLexState, popLexState, pushLexState, 63 ExtBits(..), 64 xtest, 65 lexTokenStream, 66 AddAnn(..),mkParensApiAnn, 67 addAnnsAt, 68 commentToAnnotation 69 ) where 70 71import GhcPrelude 72 73-- base 74import Control.Monad 75import Control.Monad.Fail as MonadFail 76import Data.Bits 77import Data.Char 78import Data.List 79import Data.Maybe 80import Data.Word 81 82import EnumSet (EnumSet) 83import qualified EnumSet 84 85-- ghc-boot 86import qualified GHC.LanguageExtensions as LangExt 87 88-- bytestring 89import Data.ByteString (ByteString) 90 91-- containers 92import Data.Map (Map) 93import qualified Data.Map as Map 94 95-- compiler/utils 96import Bag 97import Outputable 98import StringBuffer 99import FastString 100import UniqFM 101import Util ( readRational, readHexRational ) 102 103-- compiler/main 104import ErrUtils 105import DynFlags 106 107-- compiler/basicTypes 108import SrcLoc 109import Module 110import BasicTypes ( InlineSpec(..), RuleMatchInfo(..), 111 IntegralLit(..), FractionalLit(..), 112 SourceText(..) ) 113 114-- compiler/parser 115import Ctype 116 117import ApiAnnotation 118} 119 120-- ----------------------------------------------------------------------------- 121-- Alex "Character set macros" 122 123-- NB: The logic behind these definitions is also reflected in basicTypes/Lexeme.hs 124-- Any changes here should likely be reflected there. 125$unispace = \x05 -- Trick Alex into handling Unicode. See [Unicode in Alex]. 126$nl = [\n\r\f] 127$whitechar = [$nl\v\ $unispace] 128$white_no_nl = $whitechar # \n -- TODO #8424 129$tab = \t 130 131$ascdigit = 0-9 132$unidigit = \x03 -- Trick Alex into handling Unicode. See [Unicode in Alex]. 133$decdigit = $ascdigit -- for now, should really be $digit (ToDo) 134$digit = [$ascdigit $unidigit] 135 136$special = [\(\)\,\;\[\]\`\{\}] 137$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:] 138$unisymbol = \x04 -- Trick Alex into handling Unicode. See [Unicode in Alex]. 139$symbol = [$ascsymbol $unisymbol] # [$special \_\"\'] 140 141$unilarge = \x01 -- Trick Alex into handling Unicode. See [Unicode in Alex]. 142$asclarge = [A-Z] 143$large = [$asclarge $unilarge] 144 145$unismall = \x02 -- Trick Alex into handling Unicode. See [Unicode in Alex]. 146$ascsmall = [a-z] 147$small = [$ascsmall $unismall \_] 148 149$unigraphic = \x06 -- Trick Alex into handling Unicode. See [Unicode in Alex]. 150$graphic = [$small $large $symbol $digit $special $unigraphic \"\'] 151 152$binit = 0-1 153$octit = 0-7 154$hexit = [$decdigit A-F a-f] 155 156$uniidchar = \x07 -- Trick Alex into handling Unicode. See [Unicode in Alex]. 157$idchar = [$small $large $digit $uniidchar \'] 158 159$pragmachar = [$small $large $digit] 160 161$docsym = [\| \^ \* \$] 162 163 164-- ----------------------------------------------------------------------------- 165-- Alex "Regular expression macros" 166 167@varid = $small $idchar* -- variable identifiers 168@conid = $large $idchar* -- constructor identifiers 169 170@varsym = ($symbol # \:) $symbol* -- variable (operator) symbol 171@consym = \: $symbol* -- constructor (operator) symbol 172 173-- See Note [Lexing NumericUnderscores extension] and #14473 174@numspc = _* -- numeric spacer (#14473) 175@decimal = $decdigit(@numspc $decdigit)* 176@binary = $binit(@numspc $binit)* 177@octal = $octit(@numspc $octit)* 178@hexadecimal = $hexit(@numspc $hexit)* 179@exponent = @numspc [eE] [\-\+]? @decimal 180@bin_exponent = @numspc [pP] [\-\+]? @decimal 181 182@qual = (@conid \.)+ 183@qvarid = @qual @varid 184@qconid = @qual @conid 185@qvarsym = @qual @varsym 186@qconsym = @qual @consym 187 188@floating_point = @numspc @decimal \. @decimal @exponent? | @numspc @decimal @exponent 189@hex_floating_point = @numspc @hexadecimal \. @hexadecimal @bin_exponent? | @numspc @hexadecimal @bin_exponent 190 191-- normal signed numerical literals can only be explicitly negative, 192-- not explicitly positive (contrast @exponent) 193@negative = \- 194@signed = @negative ? 195 196 197-- ----------------------------------------------------------------------------- 198-- Alex "Identifier" 199 200haskell :- 201 202 203-- ----------------------------------------------------------------------------- 204-- Alex "Rules" 205 206-- everywhere: skip whitespace 207$white_no_nl+ ; 208$tab { warnTab } 209 210-- Everywhere: deal with nested comments. We explicitly rule out 211-- pragmas, "{-#", so that we don't accidentally treat them as comments. 212-- (this can happen even though pragmas will normally take precedence due to 213-- longest-match, because pragmas aren't valid in every state, but comments 214-- are). We also rule out nested Haddock comments, if the -haddock flag is 215-- set. 216 217"{-" / { isNormalComment } { nested_comment lexToken } 218 219-- Single-line comments are a bit tricky. Haskell 98 says that two or 220-- more dashes followed by a symbol should be parsed as a varsym, so we 221-- have to exclude those. 222 223-- Since Haddock comments aren't valid in every state, we need to rule them 224-- out here. 225 226-- The following two rules match comments that begin with two dashes, but 227-- continue with a different character. The rules test that this character 228-- is not a symbol (in which case we'd have a varsym), and that it's not a 229-- space followed by a Haddock comment symbol (docsym) (in which case we'd 230-- have a Haddock comment). The rules then munch the rest of the line. 231 232"-- " ~$docsym .* { lineCommentToken } 233"--" [^$symbol \ ] .* { lineCommentToken } 234 235-- Next, match Haddock comments if no -haddock flag 236 237"-- " $docsym .* / { alexNotPred (ifExtension HaddockBit) } { lineCommentToken } 238 239-- Now, when we've matched comments that begin with 2 dashes and continue 240-- with a different character, we need to match comments that begin with three 241-- or more dashes (which clearly can't be Haddock comments). We only need to 242-- make sure that the first non-dash character isn't a symbol, and munch the 243-- rest of the line. 244 245"---"\-* ~$symbol .* { lineCommentToken } 246 247-- Since the previous rules all match dashes followed by at least one 248-- character, we also need to match a whole line filled with just dashes. 249 250"--"\-* / { atEOL } { lineCommentToken } 251 252-- We need this rule since none of the other single line comment rules 253-- actually match this case. 254 255"-- " / { atEOL } { lineCommentToken } 256 257-- 'bol' state: beginning of a line. Slurp up all the whitespace (including 258-- blank lines) until we find a non-whitespace character, then do layout 259-- processing. 260-- 261-- One slight wibble here: what if the line begins with {-#? In 262-- theory, we have to lex the pragma to see if it's one we recognise, 263-- and if it is, then we backtrack and do_bol, otherwise we treat it 264-- as a nested comment. We don't bother with this: if the line begins 265-- with {-#, then we'll assume it's a pragma we know about and go for do_bol. 266<bol> { 267 \n ; 268 ^\# line { begin line_prag1 } 269 ^\# / { followedByDigit } { begin line_prag1 } 270 ^\# pragma .* \n ; -- GCC 3.3 CPP generated, apparently 271 ^\# \! .* \n ; -- #!, for scripts 272 () { do_bol } 273} 274 275-- after a layout keyword (let, where, do, of), we begin a new layout 276-- context if the curly brace is missing. 277-- Careful! This stuff is quite delicate. 278<layout, layout_do, layout_if> { 279 \{ / { notFollowedBy '-' } { hopefully_open_brace } 280 -- we might encounter {-# here, but {- has been handled already 281 \n ; 282 ^\# (line)? { begin line_prag1 } 283} 284 285-- after an 'if', a vertical bar starts a layout context for MultiWayIf 286<layout_if> { 287 \| / { notFollowedBySymbol } { new_layout_context True dontGenerateSemic ITvbar } 288 () { pop } 289} 290 291-- do is treated in a subtly different way, see new_layout_context 292<layout> () { new_layout_context True generateSemic ITvocurly } 293<layout_do> () { new_layout_context False generateSemic ITvocurly } 294 295-- after a new layout context which was found to be to the left of the 296-- previous context, we have generated a '{' token, and we now need to 297-- generate a matching '}' token. 298<layout_left> () { do_layout_left } 299 300<0,option_prags> \n { begin bol } 301 302"{-#" $whitechar* $pragmachar+ / { known_pragma linePrags } 303 { dispatch_pragmas linePrags } 304 305-- single-line line pragmas, of the form 306-- # <line> "<file>" <extra-stuff> \n 307<line_prag1> { 308 @decimal $white_no_nl+ \" [$graphic \ ]* \" { setLineAndFile line_prag1a } 309 () { failLinePrag1 } 310} 311<line_prag1a> .* { popLinePrag1 } 312 313-- Haskell-style line pragmas, of the form 314-- {-# LINE <line> "<file>" #-} 315<line_prag2> { 316 @decimal $white_no_nl+ \" [$graphic \ ]* \" { setLineAndFile line_prag2a } 317} 318<line_prag2a> "#-}"|"-}" { pop } 319 -- NOTE: accept -} at the end of a LINE pragma, for compatibility 320 -- with older versions of GHC which generated these. 321 322-- Haskell-style column pragmas, of the form 323-- {-# COLUMN <column> #-} 324<column_prag> @decimal $whitechar* "#-}" { setColumn } 325 326<0,option_prags> { 327 "{-#" $whitechar* $pragmachar+ 328 $whitechar+ $pragmachar+ / { known_pragma twoWordPrags } 329 { dispatch_pragmas twoWordPrags } 330 331 "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags } 332 { dispatch_pragmas oneWordPrags } 333 334 -- We ignore all these pragmas, but don't generate a warning for them 335 "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags } 336 { dispatch_pragmas ignoredPrags } 337 338 -- ToDo: should only be valid inside a pragma: 339 "#-}" { endPrag } 340} 341 342<option_prags> { 343 "{-#" $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags } 344 { dispatch_pragmas fileHeaderPrags } 345} 346 347<0> { 348 -- In the "0" mode we ignore these pragmas 349 "{-#" $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags } 350 { nested_comment lexToken } 351} 352 353<0,option_prags> { 354 "{-#" { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma") 355 (nested_comment lexToken) } 356} 357 358-- '0' state: ordinary lexemes 359 360-- Haddock comments 361 362<0,option_prags> { 363 "-- " $docsym / { ifExtension HaddockBit } { multiline_doc_comment } 364 "{-" \ ? $docsym / { ifExtension HaddockBit } { nested_doc_comment } 365} 366 367-- "special" symbols 368 369<0> { 370 "[|" / { ifExtension ThQuotesBit } { token (ITopenExpQuote NoE NormalSyntax) } 371 "[||" / { ifExtension ThQuotesBit } { token (ITopenTExpQuote NoE) } 372 "[e|" / { ifExtension ThQuotesBit } { token (ITopenExpQuote HasE NormalSyntax) } 373 "[e||" / { ifExtension ThQuotesBit } { token (ITopenTExpQuote HasE) } 374 "[p|" / { ifExtension ThQuotesBit } { token ITopenPatQuote } 375 "[d|" / { ifExtension ThQuotesBit } { layout_token ITopenDecQuote } 376 "[t|" / { ifExtension ThQuotesBit } { token ITopenTypQuote } 377 "|]" / { ifExtension ThQuotesBit } { token (ITcloseQuote NormalSyntax) } 378 "||]" / { ifExtension ThQuotesBit } { token ITcloseTExpQuote } 379 \$ @varid / { ifExtension ThBit } { skip_one_varid ITidEscape } 380 "$$" @varid / { ifExtension ThBit } { skip_two_varid ITidTyEscape } 381 "$(" / { ifExtension ThBit } { token ITparenEscape } 382 "$$(" / { ifExtension ThBit } { token ITparenTyEscape } 383 384 "[" @varid "|" / { ifExtension QqBit } { lex_quasiquote_tok } 385 386 -- qualified quasi-quote (#5555) 387 "[" @qvarid "|" / { ifExtension QqBit } { lex_qquasiquote_tok } 388 389 $unigraphic -- ⟦ 390 / { ifCurrentChar '⟦' `alexAndPred` 391 ifExtension UnicodeSyntaxBit `alexAndPred` 392 ifExtension ThQuotesBit } 393 { token (ITopenExpQuote NoE UnicodeSyntax) } 394 $unigraphic -- ⟧ 395 / { ifCurrentChar '⟧' `alexAndPred` 396 ifExtension UnicodeSyntaxBit `alexAndPred` 397 ifExtension ThQuotesBit } 398 { token (ITcloseQuote UnicodeSyntax) } 399} 400 401 -- See Note [Lexing type applications] 402<0> { 403 [^ $idchar \) ] ^ 404 "@" 405 / { ifExtension TypeApplicationsBit `alexAndPred` notFollowedBySymbol } 406 { token ITtypeApp } 407} 408 409<0> { 410 "(|" 411 / { ifExtension ArrowsBit `alexAndPred` 412 notFollowedBySymbol } 413 { special (IToparenbar NormalSyntax) } 414 "|)" 415 / { ifExtension ArrowsBit } 416 { special (ITcparenbar NormalSyntax) } 417 418 $unigraphic -- ⦇ 419 / { ifCurrentChar '⦇' `alexAndPred` 420 ifExtension UnicodeSyntaxBit `alexAndPred` 421 ifExtension ArrowsBit } 422 { special (IToparenbar UnicodeSyntax) } 423 $unigraphic -- ⦈ 424 / { ifCurrentChar '⦈' `alexAndPred` 425 ifExtension UnicodeSyntaxBit `alexAndPred` 426 ifExtension ArrowsBit } 427 { special (ITcparenbar UnicodeSyntax) } 428} 429 430<0> { 431 \? @varid / { ifExtension IpBit } { skip_one_varid ITdupipvarid } 432} 433 434<0> { 435 "#" @varid / { ifExtension OverloadedLabelsBit } { skip_one_varid ITlabelvarid } 436} 437 438<0> { 439 "(#" / { ifExtension UnboxedTuplesBit `alexOrPred` 440 ifExtension UnboxedSumsBit } 441 { token IToubxparen } 442 "#)" / { ifExtension UnboxedTuplesBit `alexOrPred` 443 ifExtension UnboxedSumsBit } 444 { token ITcubxparen } 445} 446 447<0,option_prags> { 448 \( { special IToparen } 449 \) { special ITcparen } 450 \[ { special ITobrack } 451 \] { special ITcbrack } 452 \, { special ITcomma } 453 \; { special ITsemi } 454 \` { special ITbackquote } 455 456 \{ { open_brace } 457 \} { close_brace } 458} 459 460<0,option_prags> { 461 @qvarid { idtoken qvarid } 462 @qconid { idtoken qconid } 463 @varid { varid } 464 @conid { idtoken conid } 465} 466 467<0> { 468 @qvarid "#"+ / { ifExtension MagicHashBit } { idtoken qvarid } 469 @qconid "#"+ / { ifExtension MagicHashBit } { idtoken qconid } 470 @varid "#"+ / { ifExtension MagicHashBit } { varid } 471 @conid "#"+ / { ifExtension MagicHashBit } { idtoken conid } 472} 473 474-- ToDo: - move `var` and (sym) into lexical syntax? 475-- - remove backquote from $special? 476<0> { 477 @qvarsym { idtoken qvarsym } 478 @qconsym { idtoken qconsym } 479 @varsym { varsym } 480 @consym { consym } 481} 482 483-- For the normal boxed literals we need to be careful 484-- when trying to be close to Haskell98 485 486-- Note [Lexing NumericUnderscores extension] (#14473) 487-- 488-- NumericUnderscores extension allows underscores in numeric literals. 489-- Multiple underscores are represented with @numspc macro. 490-- To be simpler, we have only the definitions with underscores. 491-- And then we have a separate function (tok_integral and tok_frac) 492-- that validates the literals. 493-- If extensions are not enabled, check that there are no underscores. 494-- 495<0> { 496 -- Normal integral literals (:: Num a => a, from Integer) 497 @decimal { tok_num positive 0 0 decimal } 498 0[bB] @numspc @binary / { ifExtension BinaryLiteralsBit } { tok_num positive 2 2 binary } 499 0[oO] @numspc @octal { tok_num positive 2 2 octal } 500 0[xX] @numspc @hexadecimal { tok_num positive 2 2 hexadecimal } 501 @negative @decimal / { ifExtension NegativeLiteralsBit } { tok_num negative 1 1 decimal } 502 @negative 0[bB] @numspc @binary / { ifExtension NegativeLiteralsBit `alexAndPred` 503 ifExtension BinaryLiteralsBit } { tok_num negative 3 3 binary } 504 @negative 0[oO] @numspc @octal / { ifExtension NegativeLiteralsBit } { tok_num negative 3 3 octal } 505 @negative 0[xX] @numspc @hexadecimal / { ifExtension NegativeLiteralsBit } { tok_num negative 3 3 hexadecimal } 506 507 -- Normal rational literals (:: Fractional a => a, from Rational) 508 @floating_point { tok_frac 0 tok_float } 509 @negative @floating_point / { ifExtension NegativeLiteralsBit } { tok_frac 0 tok_float } 510 0[xX] @numspc @hex_floating_point / { ifExtension HexFloatLiteralsBit } { tok_frac 0 tok_hex_float } 511 @negative 0[xX] @numspc @hex_floating_point 512 / { ifExtension HexFloatLiteralsBit `alexAndPred` 513 ifExtension NegativeLiteralsBit } { tok_frac 0 tok_hex_float } 514} 515 516<0> { 517 -- Unboxed ints (:: Int#) and words (:: Word#) 518 -- It's simpler (and faster?) to give separate cases to the negatives, 519 -- especially considering octal/hexadecimal prefixes. 520 @decimal \# / { ifExtension MagicHashBit } { tok_primint positive 0 1 decimal } 521 0[bB] @numspc @binary \# / { ifExtension MagicHashBit `alexAndPred` 522 ifExtension BinaryLiteralsBit } { tok_primint positive 2 3 binary } 523 0[oO] @numspc @octal \# / { ifExtension MagicHashBit } { tok_primint positive 2 3 octal } 524 0[xX] @numspc @hexadecimal \# / { ifExtension MagicHashBit } { tok_primint positive 2 3 hexadecimal } 525 @negative @decimal \# / { ifExtension MagicHashBit } { tok_primint negative 1 2 decimal } 526 @negative 0[bB] @numspc @binary \# / { ifExtension MagicHashBit `alexAndPred` 527 ifExtension BinaryLiteralsBit } { tok_primint negative 3 4 binary } 528 @negative 0[oO] @numspc @octal \# / { ifExtension MagicHashBit } { tok_primint negative 3 4 octal } 529 @negative 0[xX] @numspc @hexadecimal \# 530 / { ifExtension MagicHashBit } { tok_primint negative 3 4 hexadecimal } 531 532 @decimal \# \# / { ifExtension MagicHashBit } { tok_primword 0 2 decimal } 533 0[bB] @numspc @binary \# \# / { ifExtension MagicHashBit `alexAndPred` 534 ifExtension BinaryLiteralsBit } { tok_primword 2 4 binary } 535 0[oO] @numspc @octal \# \# / { ifExtension MagicHashBit } { tok_primword 2 4 octal } 536 0[xX] @numspc @hexadecimal \# \# / { ifExtension MagicHashBit } { tok_primword 2 4 hexadecimal } 537 538 -- Unboxed floats and doubles (:: Float#, :: Double#) 539 -- prim_{float,double} work with signed literals 540 @signed @floating_point \# / { ifExtension MagicHashBit } { tok_frac 1 tok_primfloat } 541 @signed @floating_point \# \# / { ifExtension MagicHashBit } { tok_frac 2 tok_primdouble } 542} 543 544-- Strings and chars are lexed by hand-written code. The reason is 545-- that even if we recognise the string or char here in the regex 546-- lexer, we would still have to parse the string afterward in order 547-- to convert it to a String. 548<0> { 549 \' { lex_char_tok } 550 \" { lex_string_tok } 551} 552 553-- Note [Lexing type applications] 554-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 555-- The desired syntax for type applications is to prefix the type application 556-- with '@', like this: 557-- 558-- foo @Int @Bool baz bum 559-- 560-- This, of course, conflicts with as-patterns. The conflict arises because 561-- expressions and patterns use the same parser, and also because we want 562-- to allow type patterns within expression patterns. 563-- 564-- Disambiguation is accomplished by requiring *something* to appear between 565-- type application and the preceding token. This something must end with 566-- a character that cannot be the end of the variable bound in an as-pattern. 567-- Currently (June 2015), this means that the something cannot end with a 568-- $idchar or a close-paren. (The close-paren is necessary if the as-bound 569-- identifier is symbolic.) 570-- 571-- Note that looking for whitespace before the '@' is insufficient, because 572-- of this pathological case: 573-- 574-- foo {- hi -}@Int 575-- 576-- This design is predicated on the fact that as-patterns are generally 577-- whitespace-free, and also that this whole thing is opt-in, with the 578-- TypeApplications extension. 579 580-- ----------------------------------------------------------------------------- 581-- Alex "Haskell code fragment bottom" 582 583{ 584 585-- ----------------------------------------------------------------------------- 586-- The token type 587 588data Token 589 = ITas -- Haskell keywords 590 | ITcase 591 | ITclass 592 | ITdata 593 | ITdefault 594 | ITderiving 595 | ITdo 596 | ITelse 597 | IThiding 598 | ITforeign 599 | ITif 600 | ITimport 601 | ITin 602 | ITinfix 603 | ITinfixl 604 | ITinfixr 605 | ITinstance 606 | ITlet 607 | ITmodule 608 | ITnewtype 609 | ITof 610 | ITqualified 611 | ITthen 612 | ITtype 613 | ITwhere 614 615 | ITforall IsUnicodeSyntax -- GHC extension keywords 616 | ITexport 617 | ITlabel 618 | ITdynamic 619 | ITsafe 620 | ITinterruptible 621 | ITunsafe 622 | ITstdcallconv 623 | ITccallconv 624 | ITcapiconv 625 | ITprimcallconv 626 | ITjavascriptcallconv 627 | ITmdo 628 | ITfamily 629 | ITrole 630 | ITgroup 631 | ITby 632 | ITusing 633 | ITpattern 634 | ITstatic 635 | ITstock 636 | ITanyclass 637 | ITvia 638 639 -- Backpack tokens 640 | ITunit 641 | ITsignature 642 | ITdependency 643 | ITrequires 644 645 -- Pragmas, see note [Pragma source text] in BasicTypes 646 | ITinline_prag SourceText InlineSpec RuleMatchInfo 647 | ITspec_prag SourceText -- SPECIALISE 648 | ITspec_inline_prag SourceText Bool -- SPECIALISE INLINE (or NOINLINE) 649 | ITsource_prag SourceText 650 | ITrules_prag SourceText 651 | ITwarning_prag SourceText 652 | ITdeprecated_prag SourceText 653 | ITline_prag SourceText -- not usually produced, see 'UsePosPragsBit' 654 | ITcolumn_prag SourceText -- not usually produced, see 'UsePosPragsBit' 655 | ITscc_prag SourceText 656 | ITgenerated_prag SourceText 657 | ITcore_prag SourceText -- hdaume: core annotations 658 | ITunpack_prag SourceText 659 | ITnounpack_prag SourceText 660 | ITann_prag SourceText 661 | ITcomplete_prag SourceText 662 | ITclose_prag 663 | IToptions_prag String 664 | ITinclude_prag String 665 | ITlanguage_prag 666 | ITminimal_prag SourceText 667 | IToverlappable_prag SourceText -- instance overlap mode 668 | IToverlapping_prag SourceText -- instance overlap mode 669 | IToverlaps_prag SourceText -- instance overlap mode 670 | ITincoherent_prag SourceText -- instance overlap mode 671 | ITctype SourceText 672 | ITcomment_line_prag -- See Note [Nested comment line pragmas] 673 674 | ITdotdot -- reserved symbols 675 | ITcolon 676 | ITdcolon IsUnicodeSyntax 677 | ITequal 678 | ITlam 679 | ITlcase 680 | ITvbar 681 | ITlarrow IsUnicodeSyntax 682 | ITrarrow IsUnicodeSyntax 683 | ITat 684 | ITtilde 685 | ITdarrow IsUnicodeSyntax 686 | ITminus 687 | ITbang 688 | ITstar IsUnicodeSyntax 689 | ITdot 690 691 | ITbiglam -- GHC-extension symbols 692 693 | ITocurly -- special symbols 694 | ITccurly 695 | ITvocurly 696 | ITvccurly 697 | ITobrack 698 | ITopabrack -- [:, for parallel arrays with -XParallelArrays 699 | ITcpabrack -- :], for parallel arrays with -XParallelArrays 700 | ITcbrack 701 | IToparen 702 | ITcparen 703 | IToubxparen 704 | ITcubxparen 705 | ITsemi 706 | ITcomma 707 | ITunderscore 708 | ITbackquote 709 | ITsimpleQuote -- ' 710 711 | ITvarid FastString -- identifiers 712 | ITconid FastString 713 | ITvarsym FastString 714 | ITconsym FastString 715 | ITqvarid (FastString,FastString) 716 | ITqconid (FastString,FastString) 717 | ITqvarsym (FastString,FastString) 718 | ITqconsym (FastString,FastString) 719 720 | ITdupipvarid FastString -- GHC extension: implicit param: ?x 721 | ITlabelvarid FastString -- Overloaded label: #x 722 723 | ITchar SourceText Char -- Note [Literal source text] in BasicTypes 724 | ITstring SourceText FastString -- Note [Literal source text] in BasicTypes 725 | ITinteger IntegralLit -- Note [Literal source text] in BasicTypes 726 | ITrational FractionalLit 727 728 | ITprimchar SourceText Char -- Note [Literal source text] in BasicTypes 729 | ITprimstring SourceText ByteString -- Note [Literal source text] @BasicTypes 730 | ITprimint SourceText Integer -- Note [Literal source text] in BasicTypes 731 | ITprimword SourceText Integer -- Note [Literal source text] in BasicTypes 732 | ITprimfloat FractionalLit 733 | ITprimdouble FractionalLit 734 735 -- Template Haskell extension tokens 736 | ITopenExpQuote HasE IsUnicodeSyntax -- [| or [e| 737 | ITopenPatQuote -- [p| 738 | ITopenDecQuote -- [d| 739 | ITopenTypQuote -- [t| 740 | ITcloseQuote IsUnicodeSyntax -- |] 741 | ITopenTExpQuote HasE -- [|| or [e|| 742 | ITcloseTExpQuote -- ||] 743 | ITidEscape FastString -- $x 744 | ITparenEscape -- $( 745 | ITidTyEscape FastString -- $$x 746 | ITparenTyEscape -- $$( 747 | ITtyQuote -- '' 748 | ITquasiQuote (FastString,FastString,RealSrcSpan) 749 -- ITquasiQuote(quoter, quote, loc) 750 -- represents a quasi-quote of the form 751 -- [quoter| quote |] 752 | ITqQuasiQuote (FastString,FastString,FastString,RealSrcSpan) 753 -- ITqQuasiQuote(Qual, quoter, quote, loc) 754 -- represents a qualified quasi-quote of the form 755 -- [Qual.quoter| quote |] 756 757 -- Arrow notation extension 758 | ITproc 759 | ITrec 760 | IToparenbar IsUnicodeSyntax -- ^ @(|@ 761 | ITcparenbar IsUnicodeSyntax -- ^ @|)@ 762 | ITlarrowtail IsUnicodeSyntax -- ^ @-<@ 763 | ITrarrowtail IsUnicodeSyntax -- ^ @>-@ 764 | ITLarrowtail IsUnicodeSyntax -- ^ @-<<@ 765 | ITRarrowtail IsUnicodeSyntax -- ^ @>>-@ 766 767 -- | Type application '@' (lexed differently than as-pattern '@', 768 -- due to checking for preceding whitespace) 769 | ITtypeApp 770 771 772 | ITunknown String -- ^ Used when the lexer can't make sense of it 773 | ITeof -- ^ end of file token 774 775 -- Documentation annotations 776 | ITdocCommentNext String -- ^ something beginning @-- |@ 777 | ITdocCommentPrev String -- ^ something beginning @-- ^@ 778 | ITdocCommentNamed String -- ^ something beginning @-- $@ 779 | ITdocSection Int String -- ^ a section heading 780 | ITdocOptions String -- ^ doc options (prune, ignore-exports, etc) 781 | ITlineComment String -- ^ comment starting by "--" 782 | ITblockComment String -- ^ comment in {- -} 783 784 deriving Show 785 786instance Outputable Token where 787 ppr x = text (show x) 788 789 790-- the bitmap provided as the third component indicates whether the 791-- corresponding extension keyword is valid under the extension options 792-- provided to the compiler; if the extension corresponding to *any* of the 793-- bits set in the bitmap is enabled, the keyword is valid (this setup 794-- facilitates using a keyword in two different extensions that can be 795-- activated independently) 796-- 797reservedWordsFM :: UniqFM (Token, ExtsBitmap) 798reservedWordsFM = listToUFM $ 799 map (\(x, y, z) -> (mkFastString x, (y, z))) 800 [( "_", ITunderscore, 0 ), 801 ( "as", ITas, 0 ), 802 ( "case", ITcase, 0 ), 803 ( "class", ITclass, 0 ), 804 ( "data", ITdata, 0 ), 805 ( "default", ITdefault, 0 ), 806 ( "deriving", ITderiving, 0 ), 807 ( "do", ITdo, 0 ), 808 ( "else", ITelse, 0 ), 809 ( "hiding", IThiding, 0 ), 810 ( "if", ITif, 0 ), 811 ( "import", ITimport, 0 ), 812 ( "in", ITin, 0 ), 813 ( "infix", ITinfix, 0 ), 814 ( "infixl", ITinfixl, 0 ), 815 ( "infixr", ITinfixr, 0 ), 816 ( "instance", ITinstance, 0 ), 817 ( "let", ITlet, 0 ), 818 ( "module", ITmodule, 0 ), 819 ( "newtype", ITnewtype, 0 ), 820 ( "of", ITof, 0 ), 821 ( "qualified", ITqualified, 0 ), 822 ( "then", ITthen, 0 ), 823 ( "type", ITtype, 0 ), 824 ( "where", ITwhere, 0 ), 825 826 ( "forall", ITforall NormalSyntax, 0), 827 ( "mdo", ITmdo, xbit RecursiveDoBit), 828 -- See Note [Lexing type pseudo-keywords] 829 ( "family", ITfamily, 0 ), 830 ( "role", ITrole, 0 ), 831 ( "pattern", ITpattern, xbit PatternSynonymsBit), 832 ( "static", ITstatic, xbit StaticPointersBit ), 833 ( "stock", ITstock, 0 ), 834 ( "anyclass", ITanyclass, 0 ), 835 ( "via", ITvia, 0 ), 836 ( "group", ITgroup, xbit TransformComprehensionsBit), 837 ( "by", ITby, xbit TransformComprehensionsBit), 838 ( "using", ITusing, xbit TransformComprehensionsBit), 839 840 ( "foreign", ITforeign, xbit FfiBit), 841 ( "export", ITexport, xbit FfiBit), 842 ( "label", ITlabel, xbit FfiBit), 843 ( "dynamic", ITdynamic, xbit FfiBit), 844 ( "safe", ITsafe, xbit FfiBit .|. 845 xbit SafeHaskellBit), 846 ( "interruptible", ITinterruptible, xbit InterruptibleFfiBit), 847 ( "unsafe", ITunsafe, xbit FfiBit), 848 ( "stdcall", ITstdcallconv, xbit FfiBit), 849 ( "ccall", ITccallconv, xbit FfiBit), 850 ( "capi", ITcapiconv, xbit CApiFfiBit), 851 ( "prim", ITprimcallconv, xbit FfiBit), 852 ( "javascript", ITjavascriptcallconv, xbit FfiBit), 853 854 ( "unit", ITunit, 0 ), 855 ( "dependency", ITdependency, 0 ), 856 ( "signature", ITsignature, 0 ), 857 858 ( "rec", ITrec, xbit ArrowsBit .|. 859 xbit RecursiveDoBit), 860 ( "proc", ITproc, xbit ArrowsBit) 861 ] 862 863{----------------------------------- 864Note [Lexing type pseudo-keywords] 865~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 866 867One might think that we wish to treat 'family' and 'role' as regular old 868varids whenever -XTypeFamilies and -XRoleAnnotations are off, respectively. 869But, there is no need to do so. These pseudo-keywords are not stolen syntax: 870they are only used after the keyword 'type' at the top-level, where varids are 871not allowed. Furthermore, checks further downstream (TcTyClsDecls) ensure that 872type families and role annotations are never declared without their extensions 873on. In fact, by unconditionally lexing these pseudo-keywords as special, we 874can get better error messages. 875 876Also, note that these are included in the `varid` production in the parser -- 877a key detail to make all this work. 878-------------------------------------} 879 880reservedSymsFM :: UniqFM (Token, IsUnicodeSyntax, ExtsBitmap) 881reservedSymsFM = listToUFM $ 882 map (\ (x,w,y,z) -> (mkFastString x,(w,y,z))) 883 [ ("..", ITdotdot, NormalSyntax, 0 ) 884 -- (:) is a reserved op, meaning only list cons 885 ,(":", ITcolon, NormalSyntax, 0 ) 886 ,("::", ITdcolon NormalSyntax, NormalSyntax, 0 ) 887 ,("=", ITequal, NormalSyntax, 0 ) 888 ,("\\", ITlam, NormalSyntax, 0 ) 889 ,("|", ITvbar, NormalSyntax, 0 ) 890 ,("<-", ITlarrow NormalSyntax, NormalSyntax, 0 ) 891 ,("->", ITrarrow NormalSyntax, NormalSyntax, 0 ) 892 ,("@", ITat, NormalSyntax, 0 ) 893 ,("~", ITtilde, NormalSyntax, 0 ) 894 ,("=>", ITdarrow NormalSyntax, NormalSyntax, 0 ) 895 ,("-", ITminus, NormalSyntax, 0 ) 896 ,("!", ITbang, NormalSyntax, 0 ) 897 898 ,("*", ITstar NormalSyntax, NormalSyntax, xbit StarIsTypeBit) 899 900 -- For 'forall a . t' 901 ,(".", ITdot, NormalSyntax, 0 ) 902 903 ,("-<", ITlarrowtail NormalSyntax, NormalSyntax, xbit ArrowsBit) 904 ,(">-", ITrarrowtail NormalSyntax, NormalSyntax, xbit ArrowsBit) 905 ,("-<<", ITLarrowtail NormalSyntax, NormalSyntax, xbit ArrowsBit) 906 ,(">>-", ITRarrowtail NormalSyntax, NormalSyntax, xbit ArrowsBit) 907 908 ,("∷", ITdcolon UnicodeSyntax, UnicodeSyntax, 0 ) 909 ,("⇒", ITdarrow UnicodeSyntax, UnicodeSyntax, 0 ) 910 ,("∀", ITforall UnicodeSyntax, UnicodeSyntax, 0 ) 911 ,("→", ITrarrow UnicodeSyntax, UnicodeSyntax, 0 ) 912 ,("←", ITlarrow UnicodeSyntax, UnicodeSyntax, 0 ) 913 914 ,("⤙", ITlarrowtail UnicodeSyntax, UnicodeSyntax, xbit ArrowsBit) 915 ,("⤚", ITrarrowtail UnicodeSyntax, UnicodeSyntax, xbit ArrowsBit) 916 ,("⤛", ITLarrowtail UnicodeSyntax, UnicodeSyntax, xbit ArrowsBit) 917 ,("⤜", ITRarrowtail UnicodeSyntax, UnicodeSyntax, xbit ArrowsBit) 918 919 ,("★", ITstar UnicodeSyntax, UnicodeSyntax, xbit StarIsTypeBit) 920 921 -- ToDo: ideally, → and ∷ should be "specials", so that they cannot 922 -- form part of a large operator. This would let us have a better 923 -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe). 924 ] 925 926-- ----------------------------------------------------------------------------- 927-- Lexer actions 928 929type Action = RealSrcSpan -> StringBuffer -> Int -> P (RealLocated Token) 930 931special :: Token -> Action 932special tok span _buf _len = return (L span tok) 933 934token, layout_token :: Token -> Action 935token t span _buf _len = return (L span t) 936layout_token t span _buf _len = pushLexState layout >> return (L span t) 937 938idtoken :: (StringBuffer -> Int -> Token) -> Action 939idtoken f span buf len = return (L span $! (f buf len)) 940 941skip_one_varid :: (FastString -> Token) -> Action 942skip_one_varid f span buf len 943 = return (L span $! f (lexemeToFastString (stepOn buf) (len-1))) 944 945skip_two_varid :: (FastString -> Token) -> Action 946skip_two_varid f span buf len 947 = return (L span $! f (lexemeToFastString (stepOn (stepOn buf)) (len-2))) 948 949strtoken :: (String -> Token) -> Action 950strtoken f span buf len = 951 return (L span $! (f $! lexemeToString buf len)) 952 953begin :: Int -> Action 954begin code _span _str _len = do pushLexState code; lexToken 955 956pop :: Action 957pop _span _buf _len = do _ <- popLexState 958 lexToken 959-- See Note [Nested comment line pragmas] 960failLinePrag1 :: Action 961failLinePrag1 span _buf _len = do 962 b <- getBit InNestedCommentBit 963 if b then return (L span ITcomment_line_prag) 964 else lexError "lexical error in pragma" 965 966-- See Note [Nested comment line pragmas] 967popLinePrag1 :: Action 968popLinePrag1 span _buf _len = do 969 b <- getBit InNestedCommentBit 970 if b then return (L span ITcomment_line_prag) else do 971 _ <- popLexState 972 lexToken 973 974hopefully_open_brace :: Action 975hopefully_open_brace span buf len 976 = do relaxed <- getBit RelaxedLayoutBit 977 ctx <- getContext 978 (AI l _) <- getInput 979 let offset = srcLocCol l 980 isOK = relaxed || 981 case ctx of 982 Layout prev_off _ : _ -> prev_off < offset 983 _ -> True 984 if isOK then pop_and open_brace span buf len 985 else addFatalError (RealSrcSpan span) (text "Missing block") 986 987pop_and :: Action -> Action 988pop_and act span buf len = do _ <- popLexState 989 act span buf len 990 991{-# INLINE nextCharIs #-} 992nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool 993nextCharIs buf p = not (atEnd buf) && p (currentChar buf) 994 995{-# INLINE nextCharIsNot #-} 996nextCharIsNot :: StringBuffer -> (Char -> Bool) -> Bool 997nextCharIsNot buf p = not (nextCharIs buf p) 998 999notFollowedBy :: Char -> AlexAccPred ExtsBitmap 1000notFollowedBy char _ _ _ (AI _ buf) 1001 = nextCharIsNot buf (== char) 1002 1003notFollowedBySymbol :: AlexAccPred ExtsBitmap 1004notFollowedBySymbol _ _ _ (AI _ buf) 1005 = nextCharIsNot buf (`elem` "!#$%&*+./<=>?@\\^|-~") 1006 1007followedByDigit :: AlexAccPred ExtsBitmap 1008followedByDigit _ _ _ (AI _ buf) 1009 = afterOptionalSpace buf (\b -> nextCharIs b (`elem` ['0'..'9'])) 1010 1011ifCurrentChar :: Char -> AlexAccPred ExtsBitmap 1012ifCurrentChar char _ (AI _ buf) _ _ 1013 = nextCharIs buf (== char) 1014 1015-- We must reject doc comments as being ordinary comments everywhere. 1016-- In some cases the doc comment will be selected as the lexeme due to 1017-- maximal munch, but not always, because the nested comment rule is 1018-- valid in all states, but the doc-comment rules are only valid in 1019-- the non-layout states. 1020isNormalComment :: AlexAccPred ExtsBitmap 1021isNormalComment bits _ _ (AI _ buf) 1022 | HaddockBit `xtest` bits = notFollowedByDocOrPragma 1023 | otherwise = nextCharIsNot buf (== '#') 1024 where 1025 notFollowedByDocOrPragma 1026 = afterOptionalSpace buf (\b -> nextCharIsNot b (`elem` "|^*$#")) 1027 1028afterOptionalSpace :: StringBuffer -> (StringBuffer -> Bool) -> Bool 1029afterOptionalSpace buf p 1030 = if nextCharIs buf (== ' ') 1031 then p (snd (nextChar buf)) 1032 else p buf 1033 1034atEOL :: AlexAccPred ExtsBitmap 1035atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n' 1036 1037ifExtension :: ExtBits -> AlexAccPred ExtsBitmap 1038ifExtension extBits bits _ _ _ = extBits `xtest` bits 1039 1040alexNotPred p userState in1 len in2 1041 = not (p userState in1 len in2) 1042 1043alexOrPred p1 p2 userState in1 len in2 1044 = p1 userState in1 len in2 || p2 userState in1 len in2 1045 1046multiline_doc_comment :: Action 1047multiline_doc_comment span buf _len = withLexedDocType (worker "") 1048 where 1049 worker commentAcc input docType checkNextLine = case alexGetChar' input of 1050 Just ('\n', input') 1051 | checkNextLine -> case checkIfCommentLine input' of 1052 Just input -> worker ('\n':commentAcc) input docType checkNextLine 1053 Nothing -> docCommentEnd input commentAcc docType buf span 1054 | otherwise -> docCommentEnd input commentAcc docType buf span 1055 Just (c, input) -> worker (c:commentAcc) input docType checkNextLine 1056 Nothing -> docCommentEnd input commentAcc docType buf span 1057 1058 -- Check if the next line of input belongs to this doc comment as well. 1059 -- A doc comment continues onto the next line when the following 1060 -- conditions are met: 1061 -- * The line starts with "--" 1062 -- * The line doesn't start with "---". 1063 -- * The line doesn't start with "-- $", because that would be the 1064 -- start of a /new/ named haddock chunk (#10398). 1065 checkIfCommentLine :: AlexInput -> Maybe AlexInput 1066 checkIfCommentLine input = check (dropNonNewlineSpace input) 1067 where 1068 check input = do 1069 ('-', input) <- alexGetChar' input 1070 ('-', input) <- alexGetChar' input 1071 (c, after_c) <- alexGetChar' input 1072 case c of 1073 '-' -> Nothing 1074 ' ' -> case alexGetChar' after_c of 1075 Just ('$', _) -> Nothing 1076 _ -> Just input 1077 _ -> Just input 1078 1079 dropNonNewlineSpace input = case alexGetChar' input of 1080 Just (c, input') 1081 | isSpace c && c /= '\n' -> dropNonNewlineSpace input' 1082 | otherwise -> input 1083 Nothing -> input 1084 1085lineCommentToken :: Action 1086lineCommentToken span buf len = do 1087 b <- getBit RawTokenStreamBit 1088 if b then strtoken ITlineComment span buf len else lexToken 1089 1090{- 1091 nested comments require traversing by hand, they can't be parsed 1092 using regular expressions. 1093-} 1094nested_comment :: P (RealLocated Token) -> Action 1095nested_comment cont span buf len = do 1096 input <- getInput 1097 go (reverse $ lexemeToString buf len) (1::Int) input 1098 where 1099 go commentAcc 0 input = do 1100 setInput input 1101 b <- getBit RawTokenStreamBit 1102 if b 1103 then docCommentEnd input commentAcc ITblockComment buf span 1104 else cont 1105 go commentAcc n input = case alexGetChar' input of 1106 Nothing -> errBrace input span 1107 Just ('-',input) -> case alexGetChar' input of 1108 Nothing -> errBrace input span 1109 Just ('\125',input) -> go ('\125':'-':commentAcc) (n-1) input -- '}' 1110 Just (_,_) -> go ('-':commentAcc) n input 1111 Just ('\123',input) -> case alexGetChar' input of -- '{' char 1112 Nothing -> errBrace input span 1113 Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input 1114 Just (_,_) -> go ('\123':commentAcc) n input 1115 -- See Note [Nested comment line pragmas] 1116 Just ('\n',input) -> case alexGetChar' input of 1117 Nothing -> errBrace input span 1118 Just ('#',_) -> do (parsedAcc,input) <- parseNestedPragma input 1119 go (parsedAcc ++ '\n':commentAcc) n input 1120 Just (_,_) -> go ('\n':commentAcc) n input 1121 Just (c,input) -> go (c:commentAcc) n input 1122 1123nested_doc_comment :: Action 1124nested_doc_comment span buf _len = withLexedDocType (go "") 1125 where 1126 go commentAcc input docType _ = case alexGetChar' input of 1127 Nothing -> errBrace input span 1128 Just ('-',input) -> case alexGetChar' input of 1129 Nothing -> errBrace input span 1130 Just ('\125',input) -> 1131 docCommentEnd input commentAcc docType buf span 1132 Just (_,_) -> go ('-':commentAcc) input docType False 1133 Just ('\123', input) -> case alexGetChar' input of 1134 Nothing -> errBrace input span 1135 Just ('-',input) -> do 1136 setInput input 1137 let cont = do input <- getInput; go commentAcc input docType False 1138 nested_comment cont span buf _len 1139 Just (_,_) -> go ('\123':commentAcc) input docType False 1140 -- See Note [Nested comment line pragmas] 1141 Just ('\n',input) -> case alexGetChar' input of 1142 Nothing -> errBrace input span 1143 Just ('#',_) -> do (parsedAcc,input) <- parseNestedPragma input 1144 go (parsedAcc ++ '\n':commentAcc) input docType False 1145 Just (_,_) -> go ('\n':commentAcc) input docType False 1146 Just (c,input) -> go (c:commentAcc) input docType False 1147 1148-- See Note [Nested comment line pragmas] 1149parseNestedPragma :: AlexInput -> P (String,AlexInput) 1150parseNestedPragma input@(AI _ buf) = do 1151 origInput <- getInput 1152 setInput input 1153 setExts (.|. xbit InNestedCommentBit) 1154 pushLexState bol 1155 lt <- lexToken 1156 _ <- popLexState 1157 setExts (.&. complement (xbit InNestedCommentBit)) 1158 postInput@(AI _ postBuf) <- getInput 1159 setInput origInput 1160 case unRealSrcSpan lt of 1161 ITcomment_line_prag -> do 1162 let bytes = byteDiff buf postBuf 1163 diff = lexemeToString buf bytes 1164 return (reverse diff, postInput) 1165 lt' -> panic ("parseNestedPragma: unexpected token" ++ (show lt')) 1166 1167{- 1168Note [Nested comment line pragmas] 1169~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1170We used to ignore cpp-preprocessor-generated #line pragmas if they were inside 1171nested comments. 1172 1173Now, when parsing a nested comment, if we encounter a line starting with '#' we 1174call parseNestedPragma, which executes the following: 11751. Save the current lexer input (loc, buf) for later 11762. Set the current lexer input to the beginning of the line starting with '#' 11773. Turn the 'InNestedComment' extension on 11784. Push the 'bol' lexer state 11795. Lex a token. Due to (2), (3), and (4), this should always lex a single line 1180 or less and return the ITcomment_line_prag token. This may set source line 1181 and file location if a #line pragma is successfully parsed 11826. Restore lexer input and state to what they were before we did all this 11837. Return control to the function parsing a nested comment, informing it of 1184 what the lexer parsed 1185 1186Regarding (5) above: 1187Every exit from the 'bol' lexer state (do_bol, popLinePrag1, failLinePrag1) 1188checks if the 'InNestedComment' extension is set. If it is, that function will 1189return control to parseNestedPragma by returning the ITcomment_line_prag token. 1190 1191See #314 for more background on the bug this fixes. 1192-} 1193 1194withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (RealLocated Token)) 1195 -> P (RealLocated Token) 1196withLexedDocType lexDocComment = do 1197 input@(AI _ buf) <- getInput 1198 case prevChar buf ' ' of 1199 -- The `Bool` argument to lexDocComment signals whether or not the next 1200 -- line of input might also belong to this doc comment. 1201 '|' -> lexDocComment input ITdocCommentNext True 1202 '^' -> lexDocComment input ITdocCommentPrev True 1203 '$' -> lexDocComment input ITdocCommentNamed True 1204 '*' -> lexDocSection 1 input 1205 _ -> panic "withLexedDocType: Bad doc type" 1206 where 1207 lexDocSection n input = case alexGetChar' input of 1208 Just ('*', input) -> lexDocSection (n+1) input 1209 Just (_, _) -> lexDocComment input (ITdocSection n) False 1210 Nothing -> do setInput input; lexToken -- eof reached, lex it normally 1211 1212-- RULES pragmas turn on the forall and '.' keywords, and we turn them 1213-- off again at the end of the pragma. 1214rulePrag :: Action 1215rulePrag span buf len = do 1216 setExts (.|. xbit InRulePragBit) 1217 let !src = lexemeToString buf len 1218 return (L span (ITrules_prag (SourceText src))) 1219 1220-- When 'UsePosPragsBit' is not set, it is expected that we emit a token instead 1221-- of updating the position in 'PState' 1222linePrag :: Action 1223linePrag span buf len = do 1224 usePosPrags <- getBit UsePosPragsBit 1225 if usePosPrags 1226 then begin line_prag2 span buf len 1227 else let !src = lexemeToString buf len 1228 in return (L span (ITline_prag (SourceText src))) 1229 1230-- When 'UsePosPragsBit' is not set, it is expected that we emit a token instead 1231-- of updating the position in 'PState' 1232columnPrag :: Action 1233columnPrag span buf len = do 1234 usePosPrags <- getBit UsePosPragsBit 1235 let !src = lexemeToString buf len 1236 if usePosPrags 1237 then begin column_prag span buf len 1238 else let !src = lexemeToString buf len 1239 in return (L span (ITcolumn_prag (SourceText src))) 1240 1241endPrag :: Action 1242endPrag span _buf _len = do 1243 setExts (.&. complement (xbit InRulePragBit)) 1244 return (L span ITclose_prag) 1245 1246-- docCommentEnd 1247------------------------------------------------------------------------------- 1248-- This function is quite tricky. We can't just return a new token, we also 1249-- need to update the state of the parser. Why? Because the token is longer 1250-- than what was lexed by Alex, and the lexToken function doesn't know this, so 1251-- it writes the wrong token length to the parser state. This function is 1252-- called afterwards, so it can just update the state. 1253 1254docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer -> 1255 RealSrcSpan -> P (RealLocated Token) 1256docCommentEnd input commentAcc docType buf span = do 1257 setInput input 1258 let (AI loc nextBuf) = input 1259 comment = reverse commentAcc 1260 span' = mkRealSrcSpan (realSrcSpanStart span) loc 1261 last_len = byteDiff buf nextBuf 1262 1263 span `seq` setLastToken span' last_len 1264 return (L span' (docType comment)) 1265 1266errBrace :: AlexInput -> RealSrcSpan -> P a 1267errBrace (AI end _) span = failLocMsgP (realSrcSpanStart span) end "unterminated `{-'" 1268 1269open_brace, close_brace :: Action 1270open_brace span _str _len = do 1271 ctx <- getContext 1272 setContext (NoLayout:ctx) 1273 return (L span ITocurly) 1274close_brace span _str _len = do 1275 popContext 1276 return (L span ITccurly) 1277 1278qvarid, qconid :: StringBuffer -> Int -> Token 1279qvarid buf len = ITqvarid $! splitQualName buf len False 1280qconid buf len = ITqconid $! splitQualName buf len False 1281 1282splitQualName :: StringBuffer -> Int -> Bool -> (FastString,FastString) 1283-- takes a StringBuffer and a length, and returns the module name 1284-- and identifier parts of a qualified name. Splits at the *last* dot, 1285-- because of hierarchical module names. 1286splitQualName orig_buf len parens = split orig_buf orig_buf 1287 where 1288 split buf dot_buf 1289 | orig_buf `byteDiff` buf >= len = done dot_buf 1290 | c == '.' = found_dot buf' 1291 | otherwise = split buf' dot_buf 1292 where 1293 (c,buf') = nextChar buf 1294 1295 -- careful, we might get names like M.... 1296 -- so, if the character after the dot is not upper-case, this is 1297 -- the end of the qualifier part. 1298 found_dot buf -- buf points after the '.' 1299 | isUpper c = split buf' buf 1300 | otherwise = done buf 1301 where 1302 (c,buf') = nextChar buf 1303 1304 done dot_buf = 1305 (lexemeToFastString orig_buf (qual_size - 1), 1306 if parens -- Prelude.(+) 1307 then lexemeToFastString (stepOn dot_buf) (len - qual_size - 2) 1308 else lexemeToFastString dot_buf (len - qual_size)) 1309 where 1310 qual_size = orig_buf `byteDiff` dot_buf 1311 1312varid :: Action 1313varid span buf len = 1314 case lookupUFM reservedWordsFM fs of 1315 Just (ITcase, _) -> do 1316 lastTk <- getLastTk 1317 keyword <- case lastTk of 1318 Just ITlam -> do 1319 lambdaCase <- getBit LambdaCaseBit 1320 unless lambdaCase $ do 1321 pState <- getPState 1322 addError (RealSrcSpan (last_loc pState)) $ text 1323 "Illegal lambda-case (use LambdaCase)" 1324 return ITlcase 1325 _ -> return ITcase 1326 maybe_layout keyword 1327 return $ L span keyword 1328 Just (keyword, 0) -> do 1329 maybe_layout keyword 1330 return $ L span keyword 1331 Just (keyword, i) -> do 1332 exts <- getExts 1333 if exts .&. i /= 0 1334 then do 1335 maybe_layout keyword 1336 return $ L span keyword 1337 else 1338 return $ L span $ ITvarid fs 1339 Nothing -> 1340 return $ L span $ ITvarid fs 1341 where 1342 !fs = lexemeToFastString buf len 1343 1344conid :: StringBuffer -> Int -> Token 1345conid buf len = ITconid $! lexemeToFastString buf len 1346 1347qvarsym, qconsym :: StringBuffer -> Int -> Token 1348qvarsym buf len = ITqvarsym $! splitQualName buf len False 1349qconsym buf len = ITqconsym $! splitQualName buf len False 1350 1351varsym, consym :: Action 1352varsym = sym ITvarsym 1353consym = sym ITconsym 1354 1355sym :: (FastString -> Token) -> Action 1356sym con span buf len = 1357 case lookupUFM reservedSymsFM fs of 1358 Just (keyword, NormalSyntax, 0) -> 1359 return $ L span keyword 1360 Just (keyword, NormalSyntax, i) -> do 1361 exts <- getExts 1362 if exts .&. i /= 0 1363 then return $ L span keyword 1364 else return $ L span (con fs) 1365 Just (keyword, UnicodeSyntax, 0) -> do 1366 exts <- getExts 1367 if xtest UnicodeSyntaxBit exts 1368 then return $ L span keyword 1369 else return $ L span (con fs) 1370 Just (keyword, UnicodeSyntax, i) -> do 1371 exts <- getExts 1372 if exts .&. i /= 0 && xtest UnicodeSyntaxBit exts 1373 then return $ L span keyword 1374 else return $ L span (con fs) 1375 Nothing -> 1376 return $ L span $! con fs 1377 where 1378 !fs = lexemeToFastString buf len 1379 1380-- Variations on the integral numeric literal. 1381tok_integral :: (SourceText -> Integer -> Token) 1382 -> (Integer -> Integer) 1383 -> Int -> Int 1384 -> (Integer, (Char -> Int)) 1385 -> Action 1386tok_integral itint transint transbuf translen (radix,char_to_int) span buf len = do 1387 numericUnderscores <- getBit NumericUnderscoresBit -- #14473 1388 let src = lexemeToString buf len 1389 when ((not numericUnderscores) && ('_' `elem` src)) $ do 1390 pState <- getPState 1391 addError (RealSrcSpan (last_loc pState)) $ text 1392 "Use NumericUnderscores to allow underscores in integer literals" 1393 return $ L span $ itint (SourceText src) 1394 $! transint $ parseUnsignedInteger 1395 (offsetBytes transbuf buf) (subtract translen len) radix char_to_int 1396 1397tok_num :: (Integer -> Integer) 1398 -> Int -> Int 1399 -> (Integer, (Char->Int)) -> Action 1400tok_num = tok_integral $ \case 1401 st@(SourceText ('-':_)) -> itint st (const True) 1402 st@(SourceText _) -> itint st (const False) 1403 st@NoSourceText -> itint st (< 0) 1404 where 1405 itint :: SourceText -> (Integer -> Bool) -> Integer -> Token 1406 itint !st is_negative !val = ITinteger ((IL st $! is_negative val) val) 1407 1408tok_primint :: (Integer -> Integer) 1409 -> Int -> Int 1410 -> (Integer, (Char->Int)) -> Action 1411tok_primint = tok_integral ITprimint 1412 1413 1414tok_primword :: Int -> Int 1415 -> (Integer, (Char->Int)) -> Action 1416tok_primword = tok_integral ITprimword positive 1417positive, negative :: (Integer -> Integer) 1418positive = id 1419negative = negate 1420decimal, octal, hexadecimal :: (Integer, Char -> Int) 1421decimal = (10,octDecDigit) 1422binary = (2,octDecDigit) 1423octal = (8,octDecDigit) 1424hexadecimal = (16,hexDigit) 1425 1426-- readRational can understand negative rationals, exponents, everything. 1427tok_frac :: Int -> (String -> Token) -> Action 1428tok_frac drop f span buf len = do 1429 numericUnderscores <- getBit NumericUnderscoresBit -- #14473 1430 let src = lexemeToString buf (len-drop) 1431 when ((not numericUnderscores) && ('_' `elem` src)) $ do 1432 pState <- getPState 1433 addError (RealSrcSpan (last_loc pState)) $ text 1434 "Use NumericUnderscores to allow underscores in floating literals" 1435 return (L span $! (f $! src)) 1436 1437tok_float, tok_primfloat, tok_primdouble :: String -> Token 1438tok_float str = ITrational $! readFractionalLit str 1439tok_hex_float str = ITrational $! readHexFractionalLit str 1440tok_primfloat str = ITprimfloat $! readFractionalLit str 1441tok_primdouble str = ITprimdouble $! readFractionalLit str 1442 1443readFractionalLit :: String -> FractionalLit 1444readFractionalLit str = ((FL $! (SourceText str)) $! is_neg) $! readRational str 1445 where is_neg = case str of ('-':_) -> True 1446 _ -> False 1447readHexFractionalLit :: String -> FractionalLit 1448readHexFractionalLit str = 1449 FL { fl_text = SourceText str 1450 , fl_neg = case str of 1451 '-' : _ -> True 1452 _ -> False 1453 , fl_value = readHexRational str 1454 } 1455 1456-- ----------------------------------------------------------------------------- 1457-- Layout processing 1458 1459-- we're at the first token on a line, insert layout tokens if necessary 1460do_bol :: Action 1461do_bol span _str _len = do 1462 -- See Note [Nested comment line pragmas] 1463 b <- getBit InNestedCommentBit 1464 if b then return (L span ITcomment_line_prag) else do 1465 (pos, gen_semic) <- getOffside 1466 case pos of 1467 LT -> do 1468 --trace "layout: inserting '}'" $ do 1469 popContext 1470 -- do NOT pop the lex state, we might have a ';' to insert 1471 return (L span ITvccurly) 1472 EQ | gen_semic -> do 1473 --trace "layout: inserting ';'" $ do 1474 _ <- popLexState 1475 return (L span ITsemi) 1476 _ -> do 1477 _ <- popLexState 1478 lexToken 1479 1480-- certain keywords put us in the "layout" state, where we might 1481-- add an opening curly brace. 1482maybe_layout :: Token -> P () 1483maybe_layout t = do -- If the alternative layout rule is enabled then 1484 -- we never create an implicit layout context here. 1485 -- Layout is handled XXX instead. 1486 -- The code for closing implicit contexts, or 1487 -- inserting implicit semi-colons, is therefore 1488 -- irrelevant as it only applies in an implicit 1489 -- context. 1490 alr <- getBit AlternativeLayoutRuleBit 1491 unless alr $ f t 1492 where f ITdo = pushLexState layout_do 1493 f ITmdo = pushLexState layout_do 1494 f ITof = pushLexState layout 1495 f ITlcase = pushLexState layout 1496 f ITlet = pushLexState layout 1497 f ITwhere = pushLexState layout 1498 f ITrec = pushLexState layout 1499 f ITif = pushLexState layout_if 1500 f _ = return () 1501 1502-- Pushing a new implicit layout context. If the indentation of the 1503-- next token is not greater than the previous layout context, then 1504-- Haskell 98 says that the new layout context should be empty; that is 1505-- the lexer must generate {}. 1506-- 1507-- We are slightly more lenient than this: when the new context is started 1508-- by a 'do', then we allow the new context to be at the same indentation as 1509-- the previous context. This is what the 'strict' argument is for. 1510new_layout_context :: Bool -> Bool -> Token -> Action 1511new_layout_context strict gen_semic tok span _buf len = do 1512 _ <- popLexState 1513 (AI l _) <- getInput 1514 let offset = srcLocCol l - len 1515 ctx <- getContext 1516 nondecreasing <- getBit NondecreasingIndentationBit 1517 let strict' = strict || not nondecreasing 1518 case ctx of 1519 Layout prev_off _ : _ | 1520 (strict' && prev_off >= offset || 1521 not strict' && prev_off > offset) -> do 1522 -- token is indented to the left of the previous context. 1523 -- we must generate a {} sequence now. 1524 pushLexState layout_left 1525 return (L span tok) 1526 _ -> do setContext (Layout offset gen_semic : ctx) 1527 return (L span tok) 1528 1529do_layout_left :: Action 1530do_layout_left span _buf _len = do 1531 _ <- popLexState 1532 pushLexState bol -- we must be at the start of a line 1533 return (L span ITvccurly) 1534 1535-- ----------------------------------------------------------------------------- 1536-- LINE pragmas 1537 1538setLineAndFile :: Int -> Action 1539setLineAndFile code span buf len = do 1540 let src = lexemeToString buf (len - 1) -- drop trailing quotation mark 1541 linenumLen = length $ head $ words src 1542 linenum = parseUnsignedInteger buf linenumLen 10 octDecDigit 1543 file = mkFastString $ go $ drop 1 $ dropWhile (/= '"') src 1544 -- skip everything through first quotation mark to get to the filename 1545 where go ('\\':c:cs) = c : go cs 1546 go (c:cs) = c : go cs 1547 go [] = [] 1548 -- decode escapes in the filename. e.g. on Windows 1549 -- when our filenames have backslashes in, gcc seems to 1550 -- escape the backslashes. One symptom of not doing this 1551 -- is that filenames in error messages look a bit strange: 1552 -- C:\\foo\bar.hs 1553 -- only the first backslash is doubled, because we apply 1554 -- System.FilePath.normalise before printing out 1555 -- filenames and it does not remove duplicate 1556 -- backslashes after the drive letter (should it?). 1557 setAlrLastLoc $ alrInitialLoc file 1558 setSrcLoc (mkRealSrcLoc file (fromIntegral linenum - 1) (srcSpanEndCol span)) 1559 -- subtract one: the line number refers to the *following* line 1560 addSrcFile file 1561 _ <- popLexState 1562 pushLexState code 1563 lexToken 1564 1565setColumn :: Action 1566setColumn span buf len = do 1567 let column = 1568 case reads (lexemeToString buf len) of 1569 [(column, _)] -> column 1570 _ -> error "setColumn: expected integer" -- shouldn't happen 1571 setSrcLoc (mkRealSrcLoc (srcSpanFile span) (srcSpanEndLine span) 1572 (fromIntegral (column :: Integer))) 1573 _ <- popLexState 1574 lexToken 1575 1576alrInitialLoc :: FastString -> RealSrcSpan 1577alrInitialLoc file = mkRealSrcSpan loc loc 1578 where -- This is a hack to ensure that the first line in a file 1579 -- looks like it is after the initial location: 1580 loc = mkRealSrcLoc file (-1) (-1) 1581 1582-- ----------------------------------------------------------------------------- 1583-- Options, includes and language pragmas. 1584 1585lex_string_prag :: (String -> Token) -> Action 1586lex_string_prag mkTok span _buf _len 1587 = do input <- getInput 1588 start <- getRealSrcLoc 1589 tok <- go [] input 1590 end <- getRealSrcLoc 1591 return (L (mkRealSrcSpan start end) tok) 1592 where go acc input 1593 = if isString input "#-}" 1594 then do setInput input 1595 return (mkTok (reverse acc)) 1596 else case alexGetChar input of 1597 Just (c,i) -> go (c:acc) i 1598 Nothing -> err input 1599 isString _ [] = True 1600 isString i (x:xs) 1601 = case alexGetChar i of 1602 Just (c,i') | c == x -> isString i' xs 1603 _other -> False 1604 err (AI end _) = failLocMsgP (realSrcSpanStart span) end "unterminated options pragma" 1605 1606 1607-- ----------------------------------------------------------------------------- 1608-- Strings & Chars 1609 1610-- This stuff is horrible. I hates it. 1611 1612lex_string_tok :: Action 1613lex_string_tok span buf _len = do 1614 tok <- lex_string "" 1615 (AI end bufEnd) <- getInput 1616 let 1617 tok' = case tok of 1618 ITprimstring _ bs -> ITprimstring (SourceText src) bs 1619 ITstring _ s -> ITstring (SourceText src) s 1620 _ -> panic "lex_string_tok" 1621 src = lexemeToString buf (cur bufEnd - cur buf) 1622 return (L (mkRealSrcSpan (realSrcSpanStart span) end) tok') 1623 1624lex_string :: String -> P Token 1625lex_string s = do 1626 i <- getInput 1627 case alexGetChar' i of 1628 Nothing -> lit_error i 1629 1630 Just ('"',i) -> do 1631 setInput i 1632 let s' = reverse s 1633 magicHash <- getBit MagicHashBit 1634 if magicHash 1635 then do 1636 i <- getInput 1637 case alexGetChar' i of 1638 Just ('#',i) -> do 1639 setInput i 1640 when (any (> '\xFF') s') $ do 1641 pState <- getPState 1642 addError (RealSrcSpan (last_loc pState)) $ text 1643 "primitive string literal must contain only characters <= \'\\xFF\'" 1644 return (ITprimstring (SourceText s') (unsafeMkByteString s')) 1645 _other -> 1646 return (ITstring (SourceText s') (mkFastString s')) 1647 else 1648 return (ITstring (SourceText s') (mkFastString s')) 1649 1650 Just ('\\',i) 1651 | Just ('&',i) <- next -> do 1652 setInput i; lex_string s 1653 | Just (c,i) <- next, c <= '\x7f' && is_space c -> do 1654 -- is_space only works for <= '\x7f' (#3751, #5425) 1655 setInput i; lex_stringgap s 1656 where next = alexGetChar' i 1657 1658 Just (c, i1) -> do 1659 case c of 1660 '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s) 1661 c | isAny c -> do setInput i1; lex_string (c:s) 1662 _other -> lit_error i 1663 1664lex_stringgap :: String -> P Token 1665lex_stringgap s = do 1666 i <- getInput 1667 c <- getCharOrFail i 1668 case c of 1669 '\\' -> lex_string s 1670 c | c <= '\x7f' && is_space c -> lex_stringgap s 1671 -- is_space only works for <= '\x7f' (#3751, #5425) 1672 _other -> lit_error i 1673 1674 1675lex_char_tok :: Action 1676-- Here we are basically parsing character literals, such as 'x' or '\n' 1677-- but we additionally spot 'x and ''T, returning ITsimpleQuote and 1678-- ITtyQuote respectively, but WITHOUT CONSUMING the x or T part 1679-- (the parser does that). 1680-- So we have to do two characters of lookahead: when we see 'x we need to 1681-- see if there's a trailing quote 1682lex_char_tok span buf _len = do -- We've seen ' 1683 i1 <- getInput -- Look ahead to first character 1684 let loc = realSrcSpanStart span 1685 case alexGetChar' i1 of 1686 Nothing -> lit_error i1 1687 1688 Just ('\'', i2@(AI end2 _)) -> do -- We've seen '' 1689 setInput i2 1690 return (L (mkRealSrcSpan loc end2) ITtyQuote) 1691 1692 Just ('\\', i2@(AI _end2 _)) -> do -- We've seen 'backslash 1693 setInput i2 1694 lit_ch <- lex_escape 1695 i3 <- getInput 1696 mc <- getCharOrFail i3 -- Trailing quote 1697 if mc == '\'' then finish_char_tok buf loc lit_ch 1698 else lit_error i3 1699 1700 Just (c, i2@(AI _end2 _)) 1701 | not (isAny c) -> lit_error i1 1702 | otherwise -> 1703 1704 -- We've seen 'x, where x is a valid character 1705 -- (i.e. not newline etc) but not a quote or backslash 1706 case alexGetChar' i2 of -- Look ahead one more character 1707 Just ('\'', i3) -> do -- We've seen 'x' 1708 setInput i3 1709 finish_char_tok buf loc c 1710 _other -> do -- We've seen 'x not followed by quote 1711 -- (including the possibility of EOF) 1712 -- Just parse the quote only 1713 let (AI end _) = i1 1714 return (L (mkRealSrcSpan loc end) ITsimpleQuote) 1715 1716finish_char_tok :: StringBuffer -> RealSrcLoc -> Char -> P (RealLocated Token) 1717finish_char_tok buf loc ch -- We've already seen the closing quote 1718 -- Just need to check for trailing # 1719 = do magicHash <- getBit MagicHashBit 1720 i@(AI end bufEnd) <- getInput 1721 let src = lexemeToString buf (cur bufEnd - cur buf) 1722 if magicHash then do 1723 case alexGetChar' i of 1724 Just ('#',i@(AI end _)) -> do 1725 setInput i 1726 return (L (mkRealSrcSpan loc end) 1727 (ITprimchar (SourceText src) ch)) 1728 _other -> 1729 return (L (mkRealSrcSpan loc end) 1730 (ITchar (SourceText src) ch)) 1731 else do 1732 return (L (mkRealSrcSpan loc end) (ITchar (SourceText src) ch)) 1733 1734isAny :: Char -> Bool 1735isAny c | c > '\x7f' = isPrint c 1736 | otherwise = is_any c 1737 1738lex_escape :: P Char 1739lex_escape = do 1740 i0 <- getInput 1741 c <- getCharOrFail i0 1742 case c of 1743 'a' -> return '\a' 1744 'b' -> return '\b' 1745 'f' -> return '\f' 1746 'n' -> return '\n' 1747 'r' -> return '\r' 1748 't' -> return '\t' 1749 'v' -> return '\v' 1750 '\\' -> return '\\' 1751 '"' -> return '\"' 1752 '\'' -> return '\'' 1753 '^' -> do i1 <- getInput 1754 c <- getCharOrFail i1 1755 if c >= '@' && c <= '_' 1756 then return (chr (ord c - ord '@')) 1757 else lit_error i1 1758 1759 'x' -> readNum is_hexdigit 16 hexDigit 1760 'o' -> readNum is_octdigit 8 octDecDigit 1761 x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x) 1762 1763 c1 -> do 1764 i <- getInput 1765 case alexGetChar' i of 1766 Nothing -> lit_error i0 1767 Just (c2,i2) -> 1768 case alexGetChar' i2 of 1769 Nothing -> do lit_error i0 1770 Just (c3,i3) -> 1771 let str = [c1,c2,c3] in 1772 case [ (c,rest) | (p,c) <- silly_escape_chars, 1773 Just rest <- [stripPrefix p str] ] of 1774 (escape_char,[]):_ -> do 1775 setInput i3 1776 return escape_char 1777 (escape_char,_:_):_ -> do 1778 setInput i2 1779 return escape_char 1780 [] -> lit_error i0 1781 1782readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char 1783readNum is_digit base conv = do 1784 i <- getInput 1785 c <- getCharOrFail i 1786 if is_digit c 1787 then readNum2 is_digit base conv (conv c) 1788 else lit_error i 1789 1790readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char 1791readNum2 is_digit base conv i = do 1792 input <- getInput 1793 read i input 1794 where read i input = do 1795 case alexGetChar' input of 1796 Just (c,input') | is_digit c -> do 1797 let i' = i*base + conv c 1798 if i' > 0x10ffff 1799 then setInput input >> lexError "numeric escape sequence out of range" 1800 else read i' input' 1801 _other -> do 1802 setInput input; return (chr i) 1803 1804 1805silly_escape_chars :: [(String, Char)] 1806silly_escape_chars = [ 1807 ("NUL", '\NUL'), 1808 ("SOH", '\SOH'), 1809 ("STX", '\STX'), 1810 ("ETX", '\ETX'), 1811 ("EOT", '\EOT'), 1812 ("ENQ", '\ENQ'), 1813 ("ACK", '\ACK'), 1814 ("BEL", '\BEL'), 1815 ("BS", '\BS'), 1816 ("HT", '\HT'), 1817 ("LF", '\LF'), 1818 ("VT", '\VT'), 1819 ("FF", '\FF'), 1820 ("CR", '\CR'), 1821 ("SO", '\SO'), 1822 ("SI", '\SI'), 1823 ("DLE", '\DLE'), 1824 ("DC1", '\DC1'), 1825 ("DC2", '\DC2'), 1826 ("DC3", '\DC3'), 1827 ("DC4", '\DC4'), 1828 ("NAK", '\NAK'), 1829 ("SYN", '\SYN'), 1830 ("ETB", '\ETB'), 1831 ("CAN", '\CAN'), 1832 ("EM", '\EM'), 1833 ("SUB", '\SUB'), 1834 ("ESC", '\ESC'), 1835 ("FS", '\FS'), 1836 ("GS", '\GS'), 1837 ("RS", '\RS'), 1838 ("US", '\US'), 1839 ("SP", '\SP'), 1840 ("DEL", '\DEL') 1841 ] 1842 1843-- before calling lit_error, ensure that the current input is pointing to 1844-- the position of the error in the buffer. This is so that we can report 1845-- a correct location to the user, but also so we can detect UTF-8 decoding 1846-- errors if they occur. 1847lit_error :: AlexInput -> P a 1848lit_error i = do setInput i; lexError "lexical error in string/character literal" 1849 1850getCharOrFail :: AlexInput -> P Char 1851getCharOrFail i = do 1852 case alexGetChar' i of 1853 Nothing -> lexError "unexpected end-of-file in string/character literal" 1854 Just (c,i) -> do setInput i; return c 1855 1856-- ----------------------------------------------------------------------------- 1857-- QuasiQuote 1858 1859lex_qquasiquote_tok :: Action 1860lex_qquasiquote_tok span buf len = do 1861 let (qual, quoter) = splitQualName (stepOn buf) (len - 2) False 1862 quoteStart <- getRealSrcLoc 1863 quote <- lex_quasiquote quoteStart "" 1864 end <- getRealSrcLoc 1865 return (L (mkRealSrcSpan (realSrcSpanStart span) end) 1866 (ITqQuasiQuote (qual, 1867 quoter, 1868 mkFastString (reverse quote), 1869 mkRealSrcSpan quoteStart end))) 1870 1871lex_quasiquote_tok :: Action 1872lex_quasiquote_tok span buf len = do 1873 let quoter = tail (lexemeToString buf (len - 1)) 1874 -- 'tail' drops the initial '[', 1875 -- while the -1 drops the trailing '|' 1876 quoteStart <- getRealSrcLoc 1877 quote <- lex_quasiquote quoteStart "" 1878 end <- getRealSrcLoc 1879 return (L (mkRealSrcSpan (realSrcSpanStart span) end) 1880 (ITquasiQuote (mkFastString quoter, 1881 mkFastString (reverse quote), 1882 mkRealSrcSpan quoteStart end))) 1883 1884lex_quasiquote :: RealSrcLoc -> String -> P String 1885lex_quasiquote start s = do 1886 i <- getInput 1887 case alexGetChar' i of 1888 Nothing -> quasiquote_error start 1889 1890 -- NB: The string "|]" terminates the quasiquote, 1891 -- with absolutely no escaping. See the extensive 1892 -- discussion on #5348 for why there is no 1893 -- escape handling. 1894 Just ('|',i) 1895 | Just (']',i) <- alexGetChar' i 1896 -> do { setInput i; return s } 1897 1898 Just (c, i) -> do 1899 setInput i; lex_quasiquote start (c : s) 1900 1901quasiquote_error :: RealSrcLoc -> P a 1902quasiquote_error start = do 1903 (AI end buf) <- getInput 1904 reportLexError start end buf "unterminated quasiquotation" 1905 1906-- ----------------------------------------------------------------------------- 1907-- Warnings 1908 1909warnTab :: Action 1910warnTab srcspan _buf _len = do 1911 addTabWarning srcspan 1912 lexToken 1913 1914warnThen :: WarningFlag -> SDoc -> Action -> Action 1915warnThen option warning action srcspan buf len = do 1916 addWarning option (RealSrcSpan srcspan) warning 1917 action srcspan buf len 1918 1919-- ----------------------------------------------------------------------------- 1920-- The Parse Monad 1921 1922-- | Do we want to generate ';' layout tokens? In some cases we just want to 1923-- generate '}', e.g. in MultiWayIf we don't need ';'s because '|' separates 1924-- alternatives (unlike a `case` expression where we need ';' to as a separator 1925-- between alternatives). 1926type GenSemic = Bool 1927 1928generateSemic, dontGenerateSemic :: GenSemic 1929generateSemic = True 1930dontGenerateSemic = False 1931 1932data LayoutContext 1933 = NoLayout 1934 | Layout !Int !GenSemic 1935 deriving Show 1936 1937-- | The result of running a parser. 1938data ParseResult a 1939 = POk -- ^ The parser has consumed a (possibly empty) prefix 1940 -- of the input and produced a result. Use 'getMessages' 1941 -- to check for accumulated warnings and non-fatal errors. 1942 PState -- ^ The resulting parsing state. Can be used to resume parsing. 1943 a -- ^ The resulting value. 1944 | PFailed -- ^ The parser has consumed a (possibly empty) prefix 1945 -- of the input and failed. 1946 PState -- ^ The parsing state right before failure, including the fatal 1947 -- parse error. 'getMessages' and 'getErrorMessages' must return 1948 -- a non-empty bag of errors. 1949 1950-- | Test whether a 'WarningFlag' is set 1951warnopt :: WarningFlag -> ParserFlags -> Bool 1952warnopt f options = f `EnumSet.member` pWarningFlags options 1953 1954-- | The subset of the 'DynFlags' used by the parser. 1955-- See 'mkParserFlags' or 'mkParserFlags'' for ways to construct this. 1956data ParserFlags = ParserFlags { 1957 pWarningFlags :: EnumSet WarningFlag 1958 , pThisPackage :: UnitId -- ^ key of package currently being compiled 1959 , pExtsBitmap :: !ExtsBitmap -- ^ bitmap of permitted extensions 1960 } 1961 1962data PState = PState { 1963 buffer :: StringBuffer, 1964 options :: ParserFlags, 1965 -- This needs to take DynFlags as an argument until 1966 -- we have a fix for #10143 1967 messages :: DynFlags -> Messages, 1968 tab_first :: Maybe RealSrcSpan, -- pos of first tab warning in the file 1969 tab_count :: !Int, -- number of tab warnings in the file 1970 last_tk :: Maybe Token, 1971 last_loc :: RealSrcSpan, -- pos of previous token 1972 last_len :: !Int, -- len of previous token 1973 loc :: RealSrcLoc, -- current loc (end of prev token + 1) 1974 context :: [LayoutContext], 1975 lex_state :: [Int], 1976 srcfiles :: [FastString], 1977 -- Used in the alternative layout rule: 1978 -- These tokens are the next ones to be sent out. They are 1979 -- just blindly emitted, without the rule looking at them again: 1980 alr_pending_implicit_tokens :: [RealLocated Token], 1981 -- This is the next token to be considered or, if it is Nothing, 1982 -- we need to get the next token from the input stream: 1983 alr_next_token :: Maybe (RealLocated Token), 1984 -- This is what we consider to be the location of the last token 1985 -- emitted: 1986 alr_last_loc :: RealSrcSpan, 1987 -- The stack of layout contexts: 1988 alr_context :: [ALRContext], 1989 -- Are we expecting a '{'? If it's Just, then the ALRLayout tells 1990 -- us what sort of layout the '{' will open: 1991 alr_expecting_ocurly :: Maybe ALRLayout, 1992 -- Have we just had the '}' for a let block? If so, than an 'in' 1993 -- token doesn't need to close anything: 1994 alr_justClosedExplicitLetBlock :: Bool, 1995 1996 -- The next three are used to implement Annotations giving the 1997 -- locations of 'noise' tokens in the source, so that users of 1998 -- the GHC API can do source to source conversions. 1999 -- See note [Api annotations] in ApiAnnotation.hs 2000 annotations :: [(ApiAnnKey,[SrcSpan])], 2001 comment_q :: [Located AnnotationComment], 2002 annotations_comments :: [(SrcSpan,[Located AnnotationComment])] 2003 } 2004 -- last_loc and last_len are used when generating error messages, 2005 -- and in pushCurrentContext only. Sigh, if only Happy passed the 2006 -- current token to happyError, we could at least get rid of last_len. 2007 -- Getting rid of last_loc would require finding another way to 2008 -- implement pushCurrentContext (which is only called from one place). 2009 2010data ALRContext = ALRNoLayout Bool{- does it contain commas? -} 2011 Bool{- is it a 'let' block? -} 2012 | ALRLayout ALRLayout Int 2013data ALRLayout = ALRLayoutLet 2014 | ALRLayoutWhere 2015 | ALRLayoutOf 2016 | ALRLayoutDo 2017 2018-- | The parsing monad, isomorphic to @StateT PState Maybe@. 2019newtype P a = P { unP :: PState -> ParseResult a } 2020 2021instance Functor P where 2022 fmap = liftM 2023 2024instance Applicative P where 2025 pure = returnP 2026 (<*>) = ap 2027 2028instance Monad P where 2029 (>>=) = thenP 2030#if !MIN_VERSION_base(4,13,0) 2031 fail = MonadFail.fail 2032#endif 2033 2034instance MonadFail.MonadFail P where 2035 fail = failMsgP 2036 2037returnP :: a -> P a 2038returnP a = a `seq` (P $ \s -> POk s a) 2039 2040thenP :: P a -> (a -> P b) -> P b 2041(P m) `thenP` k = P $ \ s -> 2042 case m s of 2043 POk s1 a -> (unP (k a)) s1 2044 PFailed s1 -> PFailed s1 2045 2046failMsgP :: String -> P a 2047failMsgP msg = do 2048 pState <- getPState 2049 addFatalError (RealSrcSpan (last_loc pState)) (text msg) 2050 2051failLocMsgP :: RealSrcLoc -> RealSrcLoc -> String -> P a 2052failLocMsgP loc1 loc2 str = 2053 addFatalError (RealSrcSpan (mkRealSrcSpan loc1 loc2)) (text str) 2054 2055getPState :: P PState 2056getPState = P $ \s -> POk s s 2057 2058withThisPackage :: (UnitId -> a) -> P a 2059withThisPackage f = P $ \s@(PState{options = o}) -> POk s (f (pThisPackage o)) 2060 2061getExts :: P ExtsBitmap 2062getExts = P $ \s -> POk s (pExtsBitmap . options $ s) 2063 2064setExts :: (ExtsBitmap -> ExtsBitmap) -> P () 2065setExts f = P $ \s -> POk s { 2066 options = 2067 let p = options s 2068 in p { pExtsBitmap = f (pExtsBitmap p) } 2069 } () 2070 2071setSrcLoc :: RealSrcLoc -> P () 2072setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} () 2073 2074getRealSrcLoc :: P RealSrcLoc 2075getRealSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc 2076 2077addSrcFile :: FastString -> P () 2078addSrcFile f = P $ \s -> POk s{ srcfiles = f : srcfiles s } () 2079 2080setLastToken :: RealSrcSpan -> Int -> P () 2081setLastToken loc len = P $ \s -> POk s { 2082 last_loc=loc, 2083 last_len=len 2084 } () 2085 2086setLastTk :: Token -> P () 2087setLastTk tk = P $ \s -> POk s { last_tk = Just tk } () 2088 2089getLastTk :: P (Maybe Token) 2090getLastTk = P $ \s@(PState { last_tk = last_tk }) -> POk s last_tk 2091 2092data AlexInput = AI RealSrcLoc StringBuffer 2093 2094{- 2095Note [Unicode in Alex] 2096~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2097Although newer versions of Alex support unicode, this grammar is processed with 2098the old style '--latin1' behaviour. This means that when implementing the 2099functions 2100 2101 alexGetByte :: AlexInput -> Maybe (Word8,AlexInput) 2102 alexInputPrevChar :: AlexInput -> Char 2103 2104which Alex uses to take apart our 'AlexInput', we must 2105 2106 * return a latin1 character in the 'Word8' that 'alexGetByte' expects 2107 * return a latin1 character in 'alexInputPrevChar'. 2108 2109We handle this in 'adjustChar' by squishing entire classes of unicode 2110characters into single bytes. 2111-} 2112 2113{-# INLINE adjustChar #-} 2114adjustChar :: Char -> Word8 2115adjustChar c = fromIntegral $ ord adj_c 2116 where non_graphic = '\x00' 2117 upper = '\x01' 2118 lower = '\x02' 2119 digit = '\x03' 2120 symbol = '\x04' 2121 space = '\x05' 2122 other_graphic = '\x06' 2123 uniidchar = '\x07' 2124 2125 adj_c 2126 | c <= '\x07' = non_graphic 2127 | c <= '\x7f' = c 2128 -- Alex doesn't handle Unicode, so when Unicode 2129 -- character is encountered we output these values 2130 -- with the actual character value hidden in the state. 2131 | otherwise = 2132 -- NB: The logic behind these definitions is also reflected 2133 -- in basicTypes/Lexeme.hs 2134 -- Any changes here should likely be reflected there. 2135 2136 case generalCategory c of 2137 UppercaseLetter -> upper 2138 LowercaseLetter -> lower 2139 TitlecaseLetter -> upper 2140 ModifierLetter -> uniidchar -- see #10196 2141 OtherLetter -> lower -- see #1103 2142 NonSpacingMark -> uniidchar -- see #7650 2143 SpacingCombiningMark -> other_graphic 2144 EnclosingMark -> other_graphic 2145 DecimalNumber -> digit 2146 LetterNumber -> other_graphic 2147 OtherNumber -> digit -- see #4373 2148 ConnectorPunctuation -> symbol 2149 DashPunctuation -> symbol 2150 OpenPunctuation -> other_graphic 2151 ClosePunctuation -> other_graphic 2152 InitialQuote -> other_graphic 2153 FinalQuote -> other_graphic 2154 OtherPunctuation -> symbol 2155 MathSymbol -> symbol 2156 CurrencySymbol -> symbol 2157 ModifierSymbol -> symbol 2158 OtherSymbol -> symbol 2159 Space -> space 2160 _other -> non_graphic 2161 2162-- Getting the previous 'Char' isn't enough here - we need to convert it into 2163-- the same format that 'alexGetByte' would have produced. 2164-- 2165-- See Note [Unicode in Alex] and #13986. 2166alexInputPrevChar :: AlexInput -> Char 2167alexInputPrevChar (AI _ buf) = chr (fromIntegral (adjustChar pc)) 2168 where pc = prevChar buf '\n' 2169 2170-- backwards compatibility for Alex 2.x 2171alexGetChar :: AlexInput -> Maybe (Char,AlexInput) 2172alexGetChar inp = case alexGetByte inp of 2173 Nothing -> Nothing 2174 Just (b,i) -> c `seq` Just (c,i) 2175 where c = chr $ fromIntegral b 2176 2177-- See Note [Unicode in Alex] 2178alexGetByte :: AlexInput -> Maybe (Word8,AlexInput) 2179alexGetByte (AI loc s) 2180 | atEnd s = Nothing 2181 | otherwise = byte `seq` loc' `seq` s' `seq` 2182 --trace (show (ord c)) $ 2183 Just (byte, (AI loc' s')) 2184 where (c,s') = nextChar s 2185 loc' = advanceSrcLoc loc c 2186 byte = adjustChar c 2187 2188-- This version does not squash unicode characters, it is used when 2189-- lexing strings. 2190alexGetChar' :: AlexInput -> Maybe (Char,AlexInput) 2191alexGetChar' (AI loc s) 2192 | atEnd s = Nothing 2193 | otherwise = c `seq` loc' `seq` s' `seq` 2194 --trace (show (ord c)) $ 2195 Just (c, (AI loc' s')) 2196 where (c,s') = nextChar s 2197 loc' = advanceSrcLoc loc c 2198 2199getInput :: P AlexInput 2200getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (AI l b) 2201 2202setInput :: AlexInput -> P () 2203setInput (AI l b) = P $ \s -> POk s{ loc=l, buffer=b } () 2204 2205nextIsEOF :: P Bool 2206nextIsEOF = do 2207 AI _ s <- getInput 2208 return $ atEnd s 2209 2210pushLexState :: Int -> P () 2211pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} () 2212 2213popLexState :: P Int 2214popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls 2215 2216getLexState :: P Int 2217getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls 2218 2219popNextToken :: P (Maybe (RealLocated Token)) 2220popNextToken 2221 = P $ \s@PState{ alr_next_token = m } -> 2222 POk (s {alr_next_token = Nothing}) m 2223 2224activeContext :: P Bool 2225activeContext = do 2226 ctxt <- getALRContext 2227 expc <- getAlrExpectingOCurly 2228 impt <- implicitTokenPending 2229 case (ctxt,expc) of 2230 ([],Nothing) -> return impt 2231 _other -> return True 2232 2233setAlrLastLoc :: RealSrcSpan -> P () 2234setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) () 2235 2236getAlrLastLoc :: P RealSrcSpan 2237getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l 2238 2239getALRContext :: P [ALRContext] 2240getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs 2241 2242setALRContext :: [ALRContext] -> P () 2243setALRContext cs = P $ \s -> POk (s {alr_context = cs}) () 2244 2245getJustClosedExplicitLetBlock :: P Bool 2246getJustClosedExplicitLetBlock 2247 = P $ \s@(PState {alr_justClosedExplicitLetBlock = b}) -> POk s b 2248 2249setJustClosedExplicitLetBlock :: Bool -> P () 2250setJustClosedExplicitLetBlock b 2251 = P $ \s -> POk (s {alr_justClosedExplicitLetBlock = b}) () 2252 2253setNextToken :: RealLocated Token -> P () 2254setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) () 2255 2256implicitTokenPending :: P Bool 2257implicitTokenPending 2258 = P $ \s@PState{ alr_pending_implicit_tokens = ts } -> 2259 case ts of 2260 [] -> POk s False 2261 _ -> POk s True 2262 2263popPendingImplicitToken :: P (Maybe (RealLocated Token)) 2264popPendingImplicitToken 2265 = P $ \s@PState{ alr_pending_implicit_tokens = ts } -> 2266 case ts of 2267 [] -> POk s Nothing 2268 (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t) 2269 2270setPendingImplicitTokens :: [RealLocated Token] -> P () 2271setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) () 2272 2273getAlrExpectingOCurly :: P (Maybe ALRLayout) 2274getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b 2275 2276setAlrExpectingOCurly :: Maybe ALRLayout -> P () 2277setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) () 2278 2279-- | For reasons of efficiency, boolean parsing flags (eg, language extensions 2280-- or whether we are currently in a @RULE@ pragma) are represented by a bitmap 2281-- stored in a @Word64@. 2282type ExtsBitmap = Word64 2283 2284xbit :: ExtBits -> ExtsBitmap 2285xbit = bit . fromEnum 2286 2287xtest :: ExtBits -> ExtsBitmap -> Bool 2288xtest ext xmap = testBit xmap (fromEnum ext) 2289 2290-- | Various boolean flags, mostly language extensions, that impact lexing and 2291-- parsing. Note that a handful of these can change during lexing/parsing. 2292data ExtBits 2293 -- Flags that are constant once parsing starts 2294 = FfiBit 2295 | InterruptibleFfiBit 2296 | CApiFfiBit 2297 | ArrowsBit 2298 | ThBit 2299 | ThQuotesBit 2300 | IpBit 2301 | OverloadedLabelsBit -- #x overloaded labels 2302 | ExplicitForallBit -- the 'forall' keyword 2303 | BangPatBit -- Tells the parser to understand bang-patterns 2304 -- (doesn't affect the lexer) 2305 | PatternSynonymsBit -- pattern synonyms 2306 | HaddockBit-- Lex and parse Haddock comments 2307 | MagicHashBit -- "#" in both functions and operators 2308 | RecursiveDoBit -- mdo 2309 | UnicodeSyntaxBit -- the forall symbol, arrow symbols, etc 2310 | UnboxedTuplesBit -- (# and #) 2311 | UnboxedSumsBit -- (# and #) 2312 | DatatypeContextsBit 2313 | MonadComprehensionsBit 2314 | TransformComprehensionsBit 2315 | QqBit -- enable quasiquoting 2316 | RawTokenStreamBit -- producing a token stream with all comments included 2317 | AlternativeLayoutRuleBit 2318 | ALRTransitionalBit 2319 | RelaxedLayoutBit 2320 | NondecreasingIndentationBit 2321 | SafeHaskellBit 2322 | TraditionalRecordSyntaxBit 2323 | ExplicitNamespacesBit 2324 | LambdaCaseBit 2325 | BinaryLiteralsBit 2326 | NegativeLiteralsBit 2327 | HexFloatLiteralsBit 2328 | TypeApplicationsBit 2329 | StaticPointersBit 2330 | NumericUnderscoresBit 2331 | StarIsTypeBit 2332 | BlockArgumentsBit 2333 | NPlusKPatternsBit 2334 | DoAndIfThenElseBit 2335 | MultiWayIfBit 2336 | GadtSyntaxBit 2337 | ImportQualifiedPostBit 2338 2339 -- Flags that are updated once parsing starts 2340 | InRulePragBit 2341 | InNestedCommentBit -- See Note [Nested comment line pragmas] 2342 | UsePosPragsBit 2343 -- ^ If this is enabled, '{-# LINE ... -#}' and '{-# COLUMN ... #-}' 2344 -- update the internal position. Otherwise, those pragmas are lexed as 2345 -- tokens of their own. 2346 deriving Enum 2347 2348 2349 2350 2351 2352-- PState for parsing options pragmas 2353-- 2354pragState :: DynFlags -> StringBuffer -> RealSrcLoc -> PState 2355pragState dynflags buf loc = (mkPState dynflags buf loc) { 2356 lex_state = [bol, option_prags, 0] 2357 } 2358 2359{-# INLINE mkParserFlags' #-} 2360mkParserFlags' 2361 :: EnumSet WarningFlag -- ^ warnings flags enabled 2362 -> EnumSet LangExt.Extension -- ^ permitted language extensions enabled 2363 -> UnitId -- ^ key of package currently being compiled 2364 -> Bool -- ^ are safe imports on? 2365 -> Bool -- ^ keeping Haddock comment tokens 2366 -> Bool -- ^ keep regular comment tokens 2367 2368 -> Bool 2369 -- ^ If this is enabled, '{-# LINE ... -#}' and '{-# COLUMN ... #-}' update 2370 -- the internal position kept by the parser. Otherwise, those pragmas are 2371 -- lexed as 'ITline_prag' and 'ITcolumn_prag' tokens. 2372 2373 -> ParserFlags 2374-- ^ Given exactly the information needed, set up the 'ParserFlags' 2375mkParserFlags' warningFlags extensionFlags thisPackage 2376 safeImports isHaddock rawTokStream usePosPrags = 2377 ParserFlags { 2378 pWarningFlags = warningFlags 2379 , pThisPackage = thisPackage 2380 , pExtsBitmap = safeHaskellBit .|. langExtBits .|. optBits 2381 } 2382 where 2383 safeHaskellBit = SafeHaskellBit `setBitIf` safeImports 2384 langExtBits = 2385 FfiBit `xoptBit` LangExt.ForeignFunctionInterface 2386 .|. InterruptibleFfiBit `xoptBit` LangExt.InterruptibleFFI 2387 .|. CApiFfiBit `xoptBit` LangExt.CApiFFI 2388 .|. ArrowsBit `xoptBit` LangExt.Arrows 2389 .|. ThBit `xoptBit` LangExt.TemplateHaskell 2390 .|. ThQuotesBit `xoptBit` LangExt.TemplateHaskellQuotes 2391 .|. QqBit `xoptBit` LangExt.QuasiQuotes 2392 .|. IpBit `xoptBit` LangExt.ImplicitParams 2393 .|. OverloadedLabelsBit `xoptBit` LangExt.OverloadedLabels 2394 .|. ExplicitForallBit `xoptBit` LangExt.ExplicitForAll 2395 .|. BangPatBit `xoptBit` LangExt.BangPatterns 2396 .|. MagicHashBit `xoptBit` LangExt.MagicHash 2397 .|. RecursiveDoBit `xoptBit` LangExt.RecursiveDo 2398 .|. UnicodeSyntaxBit `xoptBit` LangExt.UnicodeSyntax 2399 .|. UnboxedTuplesBit `xoptBit` LangExt.UnboxedTuples 2400 .|. UnboxedSumsBit `xoptBit` LangExt.UnboxedSums 2401 .|. DatatypeContextsBit `xoptBit` LangExt.DatatypeContexts 2402 .|. TransformComprehensionsBit `xoptBit` LangExt.TransformListComp 2403 .|. MonadComprehensionsBit `xoptBit` LangExt.MonadComprehensions 2404 .|. AlternativeLayoutRuleBit `xoptBit` LangExt.AlternativeLayoutRule 2405 .|. ALRTransitionalBit `xoptBit` LangExt.AlternativeLayoutRuleTransitional 2406 .|. RelaxedLayoutBit `xoptBit` LangExt.RelaxedLayout 2407 .|. NondecreasingIndentationBit `xoptBit` LangExt.NondecreasingIndentation 2408 .|. TraditionalRecordSyntaxBit `xoptBit` LangExt.TraditionalRecordSyntax 2409 .|. ExplicitNamespacesBit `xoptBit` LangExt.ExplicitNamespaces 2410 .|. LambdaCaseBit `xoptBit` LangExt.LambdaCase 2411 .|. BinaryLiteralsBit `xoptBit` LangExt.BinaryLiterals 2412 .|. NegativeLiteralsBit `xoptBit` LangExt.NegativeLiterals 2413 .|. HexFloatLiteralsBit `xoptBit` LangExt.HexFloatLiterals 2414 .|. PatternSynonymsBit `xoptBit` LangExt.PatternSynonyms 2415 .|. TypeApplicationsBit `xoptBit` LangExt.TypeApplications 2416 .|. StaticPointersBit `xoptBit` LangExt.StaticPointers 2417 .|. NumericUnderscoresBit `xoptBit` LangExt.NumericUnderscores 2418 .|. StarIsTypeBit `xoptBit` LangExt.StarIsType 2419 .|. BlockArgumentsBit `xoptBit` LangExt.BlockArguments 2420 .|. NPlusKPatternsBit `xoptBit` LangExt.NPlusKPatterns 2421 .|. DoAndIfThenElseBit `xoptBit` LangExt.DoAndIfThenElse 2422 .|. MultiWayIfBit `xoptBit` LangExt.MultiWayIf 2423 .|. GadtSyntaxBit `xoptBit` LangExt.GADTSyntax 2424 .|. ImportQualifiedPostBit `xoptBit` LangExt.ImportQualifiedPost 2425 optBits = 2426 HaddockBit `setBitIf` isHaddock 2427 .|. RawTokenStreamBit `setBitIf` rawTokStream 2428 .|. UsePosPragsBit `setBitIf` usePosPrags 2429 2430 xoptBit bit ext = bit `setBitIf` EnumSet.member ext extensionFlags 2431 2432 setBitIf :: ExtBits -> Bool -> ExtsBitmap 2433 b `setBitIf` cond | cond = xbit b 2434 | otherwise = 0 2435 2436-- | Extracts the flag information needed for parsing 2437mkParserFlags :: DynFlags -> ParserFlags 2438mkParserFlags = 2439 mkParserFlags' 2440 <$> DynFlags.warningFlags 2441 <*> DynFlags.extensionFlags 2442 <*> DynFlags.thisPackage 2443 <*> safeImportsOn 2444 <*> gopt Opt_Haddock 2445 <*> gopt Opt_KeepRawTokenStream 2446 <*> const True 2447 2448-- | Creates a parse state from a 'DynFlags' value 2449mkPState :: DynFlags -> StringBuffer -> RealSrcLoc -> PState 2450mkPState flags = mkPStatePure (mkParserFlags flags) 2451 2452-- | Creates a parse state from a 'ParserFlags' value 2453mkPStatePure :: ParserFlags -> StringBuffer -> RealSrcLoc -> PState 2454mkPStatePure options buf loc = 2455 PState { 2456 buffer = buf, 2457 options = options, 2458 messages = const emptyMessages, 2459 tab_first = Nothing, 2460 tab_count = 0, 2461 last_tk = Nothing, 2462 last_loc = mkRealSrcSpan loc loc, 2463 last_len = 0, 2464 loc = loc, 2465 context = [], 2466 lex_state = [bol, 0], 2467 srcfiles = [], 2468 alr_pending_implicit_tokens = [], 2469 alr_next_token = Nothing, 2470 alr_last_loc = alrInitialLoc (fsLit "<no file>"), 2471 alr_context = [], 2472 alr_expecting_ocurly = Nothing, 2473 alr_justClosedExplicitLetBlock = False, 2474 annotations = [], 2475 comment_q = [], 2476 annotations_comments = [] 2477 } 2478 2479-- | An mtl-style class for monads that support parsing-related operations. 2480-- For example, sometimes we make a second pass over the parsing results to validate, 2481-- disambiguate, or rearrange them, and we do so in the PV monad which cannot consume 2482-- input but can report parsing errors, check for extension bits, and accumulate 2483-- parsing annotations. Both P and PV are instances of MonadP. 2484-- 2485-- MonadP grants us convenient overloading. The other option is to have separate operations 2486-- for each monad: addErrorP vs addErrorPV, getBitP vs getBitPV, and so on. 2487-- 2488class Monad m => MonadP m where 2489 -- | Add a non-fatal error. Use this when the parser can produce a result 2490 -- despite the error. 2491 -- 2492 -- For example, when GHC encounters a @forall@ in a type, 2493 -- but @-XExplicitForAll@ is disabled, the parser constructs @ForAllTy@ 2494 -- as if @-XExplicitForAll@ was enabled, adding a non-fatal error to 2495 -- the accumulator. 2496 -- 2497 -- Control flow wise, non-fatal errors act like warnings: they are added 2498 -- to the accumulator and parsing continues. This allows GHC to report 2499 -- more than one parse error per file. 2500 -- 2501 addError :: SrcSpan -> SDoc -> m () 2502 -- | Add a warning to the accumulator. 2503 -- Use 'getMessages' to get the accumulated warnings. 2504 addWarning :: WarningFlag -> SrcSpan -> SDoc -> m () 2505 -- | Add a fatal error. This will be the last error reported by the parser, and 2506 -- the parser will not produce any result, ending in a 'PFailed' state. 2507 addFatalError :: SrcSpan -> SDoc -> m a 2508 -- | Check if a given flag is currently set in the bitmap. 2509 getBit :: ExtBits -> m Bool 2510 -- | Given a location and a list of AddAnn, apply them all to the location. 2511 addAnnotation :: SrcSpan -- SrcSpan of enclosing AST construct 2512 -> AnnKeywordId -- The first two parameters are the key 2513 -> SrcSpan -- The location of the keyword itself 2514 -> m () 2515 2516appendError 2517 :: SrcSpan 2518 -> SDoc 2519 -> (DynFlags -> Messages) 2520 -> (DynFlags -> Messages) 2521appendError srcspan msg m = 2522 \d -> 2523 let (ws, es) = m d 2524 errormsg = mkErrMsg d srcspan alwaysQualify msg 2525 es' = es `snocBag` errormsg 2526 in (ws, es') 2527 2528appendWarning 2529 :: ParserFlags 2530 -> WarningFlag 2531 -> SrcSpan 2532 -> SDoc 2533 -> (DynFlags -> Messages) 2534 -> (DynFlags -> Messages) 2535appendWarning o option srcspan warning m = 2536 \d -> 2537 let (ws, es) = m d 2538 warning' = makeIntoWarning (Reason option) $ 2539 mkWarnMsg d srcspan alwaysQualify warning 2540 ws' = if warnopt option o then ws `snocBag` warning' else ws 2541 in (ws', es) 2542 2543instance MonadP P where 2544 addError srcspan msg 2545 = P $ \s@PState{messages=m} -> 2546 POk s{messages=appendError srcspan msg m} () 2547 addWarning option srcspan warning 2548 = P $ \s@PState{messages=m, options=o} -> 2549 POk s{messages=appendWarning o option srcspan warning m} () 2550 addFatalError span msg = 2551 addError span msg >> P PFailed 2552 getBit ext = P $ \s -> let b = ext `xtest` pExtsBitmap (options s) 2553 in b `seq` POk s b 2554 addAnnotation l a v = do 2555 addAnnotationOnly l a v 2556 allocateCommentsP l 2557 2558addAnnsAt :: MonadP m => SrcSpan -> [AddAnn] -> m () 2559addAnnsAt l = mapM_ (\(AddAnn a v) -> addAnnotation l a v) 2560 2561addTabWarning :: RealSrcSpan -> P () 2562addTabWarning srcspan 2563 = P $ \s@PState{tab_first=tf, tab_count=tc, options=o} -> 2564 let tf' = if isJust tf then tf else Just srcspan 2565 tc' = tc + 1 2566 s' = if warnopt Opt_WarnTabs o 2567 then s{tab_first = tf', tab_count = tc'} 2568 else s 2569 in POk s' () 2570 2571mkTabWarning :: PState -> DynFlags -> Maybe ErrMsg 2572mkTabWarning PState{tab_first=tf, tab_count=tc} d = 2573 let middle = if tc == 1 2574 then text "" 2575 else text ", and in" <+> speakNOf (tc - 1) (text "further location") 2576 message = text "Tab character found here" 2577 <> middle 2578 <> text "." 2579 $+$ text "Please use spaces instead." 2580 in fmap (\s -> makeIntoWarning (Reason Opt_WarnTabs) $ 2581 mkWarnMsg d (RealSrcSpan s) alwaysQualify message) tf 2582 2583-- | Get a bag of the errors that have been accumulated so far. 2584-- Does not take -Werror into account. 2585getErrorMessages :: PState -> DynFlags -> ErrorMessages 2586getErrorMessages PState{messages=m} d = 2587 let (_, es) = m d in es 2588 2589-- | Get the warnings and errors accumulated so far. 2590-- Does not take -Werror into account. 2591getMessages :: PState -> DynFlags -> Messages 2592getMessages p@PState{messages=m} d = 2593 let (ws, es) = m d 2594 tabwarning = mkTabWarning p d 2595 ws' = maybe ws (`consBag` ws) tabwarning 2596 in (ws', es) 2597 2598getContext :: P [LayoutContext] 2599getContext = P $ \s@PState{context=ctx} -> POk s ctx 2600 2601setContext :: [LayoutContext] -> P () 2602setContext ctx = P $ \s -> POk s{context=ctx} () 2603 2604popContext :: P () 2605popContext = P $ \ s@(PState{ buffer = buf, options = o, context = ctx, 2606 last_len = len, last_loc = last_loc }) -> 2607 case ctx of 2608 (_:tl) -> 2609 POk s{ context = tl } () 2610 [] -> 2611 unP (addFatalError (RealSrcSpan last_loc) (srcParseErr o buf len)) s 2612 2613-- Push a new layout context at the indentation of the last token read. 2614pushCurrentContext :: GenSemic -> P () 2615pushCurrentContext gen_semic = P $ \ s@PState{ last_loc=loc, context=ctx } -> 2616 POk s{context = Layout (srcSpanStartCol loc) gen_semic : ctx} () 2617 2618-- This is only used at the outer level of a module when the 'module' keyword is 2619-- missing. 2620pushModuleContext :: P () 2621pushModuleContext = pushCurrentContext generateSemic 2622 2623getOffside :: P (Ordering, Bool) 2624getOffside = P $ \s@PState{last_loc=loc, context=stk} -> 2625 let offs = srcSpanStartCol loc in 2626 let ord = case stk of 2627 Layout n gen_semic : _ -> 2628 --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $ 2629 (compare offs n, gen_semic) 2630 _ -> 2631 (GT, dontGenerateSemic) 2632 in POk s ord 2633 2634-- --------------------------------------------------------------------------- 2635-- Construct a parse error 2636 2637srcParseErr 2638 :: ParserFlags 2639 -> StringBuffer -- current buffer (placed just after the last token) 2640 -> Int -- length of the previous token 2641 -> MsgDoc 2642srcParseErr options buf len 2643 = if null token 2644 then text "parse error (possibly incorrect indentation or mismatched brackets)" 2645 else text "parse error on input" <+> quotes (text token) 2646 $$ ppWhen (not th_enabled && token == "$") -- #7396 2647 (text "Perhaps you intended to use TemplateHaskell") 2648 $$ ppWhen (token == "<-") 2649 (if mdoInLast100 2650 then text "Perhaps you intended to use RecursiveDo" 2651 else text "Perhaps this statement should be within a 'do' block?") 2652 $$ ppWhen (token == "=" && doInLast100) -- #15849 2653 (text "Perhaps you need a 'let' in a 'do' block?" 2654 $$ text "e.g. 'let x = 5' instead of 'x = 5'") 2655 $$ ppWhen (not ps_enabled && pattern == "pattern ") -- #12429 2656 (text "Perhaps you intended to use PatternSynonyms") 2657 where token = lexemeToString (offsetBytes (-len) buf) len 2658 pattern = decodePrevNChars 8 buf 2659 last100 = decodePrevNChars 100 buf 2660 doInLast100 = "do" `isInfixOf` last100 2661 mdoInLast100 = "mdo" `isInfixOf` last100 2662 th_enabled = ThBit `xtest` pExtsBitmap options 2663 ps_enabled = PatternSynonymsBit `xtest` pExtsBitmap options 2664 2665-- Report a parse failure, giving the span of the previous token as 2666-- the location of the error. This is the entry point for errors 2667-- detected during parsing. 2668srcParseFail :: P a 2669srcParseFail = P $ \s@PState{ buffer = buf, options = o, last_len = len, 2670 last_loc = last_loc } -> 2671 unP (addFatalError (RealSrcSpan last_loc) (srcParseErr o buf len)) s 2672 2673-- A lexical error is reported at a particular position in the source file, 2674-- not over a token range. 2675lexError :: String -> P a 2676lexError str = do 2677 loc <- getRealSrcLoc 2678 (AI end buf) <- getInput 2679 reportLexError loc end buf str 2680 2681-- ----------------------------------------------------------------------------- 2682-- This is the top-level function: called from the parser each time a 2683-- new token is to be read from the input. 2684 2685lexer :: Bool -> (Located Token -> P a) -> P a 2686lexer queueComments cont = do 2687 alr <- getBit AlternativeLayoutRuleBit 2688 let lexTokenFun = if alr then lexTokenAlr else lexToken 2689 (L span tok) <- lexTokenFun 2690 --trace ("token: " ++ show tok) $ do 2691 2692 case tok of 2693 ITeof -> addAnnotationOnly noSrcSpan AnnEofPos (RealSrcSpan span) 2694 _ -> return () 2695 2696 if (queueComments && isDocComment tok) 2697 then queueComment (L (RealSrcSpan span) tok) 2698 else return () 2699 2700 if (queueComments && isComment tok) 2701 then queueComment (L (RealSrcSpan span) tok) >> lexer queueComments cont 2702 else cont (L (RealSrcSpan span) tok) 2703 2704lexTokenAlr :: P (RealLocated Token) 2705lexTokenAlr = do mPending <- popPendingImplicitToken 2706 t <- case mPending of 2707 Nothing -> 2708 do mNext <- popNextToken 2709 t <- case mNext of 2710 Nothing -> lexToken 2711 Just next -> return next 2712 alternativeLayoutRuleToken t 2713 Just t -> 2714 return t 2715 setAlrLastLoc (getRealSrcSpan t) 2716 case unRealSrcSpan t of 2717 ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere) 2718 ITlet -> setAlrExpectingOCurly (Just ALRLayoutLet) 2719 ITof -> setAlrExpectingOCurly (Just ALRLayoutOf) 2720 ITlcase -> setAlrExpectingOCurly (Just ALRLayoutOf) 2721 ITdo -> setAlrExpectingOCurly (Just ALRLayoutDo) 2722 ITmdo -> setAlrExpectingOCurly (Just ALRLayoutDo) 2723 ITrec -> setAlrExpectingOCurly (Just ALRLayoutDo) 2724 _ -> return () 2725 return t 2726 2727alternativeLayoutRuleToken :: RealLocated Token -> P (RealLocated Token) 2728alternativeLayoutRuleToken t 2729 = do context <- getALRContext 2730 lastLoc <- getAlrLastLoc 2731 mExpectingOCurly <- getAlrExpectingOCurly 2732 transitional <- getBit ALRTransitionalBit 2733 justClosedExplicitLetBlock <- getJustClosedExplicitLetBlock 2734 setJustClosedExplicitLetBlock False 2735 let thisLoc = getRealSrcSpan t 2736 thisCol = srcSpanStartCol thisLoc 2737 newLine = srcSpanStartLine thisLoc > srcSpanEndLine lastLoc 2738 case (unRealSrcSpan t, context, mExpectingOCurly) of 2739 -- This case handles a GHC extension to the original H98 2740 -- layout rule... 2741 (ITocurly, _, Just alrLayout) -> 2742 do setAlrExpectingOCurly Nothing 2743 let isLet = case alrLayout of 2744 ALRLayoutLet -> True 2745 _ -> False 2746 setALRContext (ALRNoLayout (containsCommas ITocurly) isLet : context) 2747 return t 2748 -- ...and makes this case unnecessary 2749 {- 2750 -- I think our implicit open-curly handling is slightly 2751 -- different to John's, in how it interacts with newlines 2752 -- and "in" 2753 (ITocurly, _, Just _) -> 2754 do setAlrExpectingOCurly Nothing 2755 setNextToken t 2756 lexTokenAlr 2757 -} 2758 (_, ALRLayout _ col : _ls, Just expectingOCurly) 2759 | (thisCol > col) || 2760 (thisCol == col && 2761 isNonDecreasingIndentation expectingOCurly) -> 2762 do setAlrExpectingOCurly Nothing 2763 setALRContext (ALRLayout expectingOCurly thisCol : context) 2764 setNextToken t 2765 return (L thisLoc ITvocurly) 2766 | otherwise -> 2767 do setAlrExpectingOCurly Nothing 2768 setPendingImplicitTokens [L lastLoc ITvccurly] 2769 setNextToken t 2770 return (L lastLoc ITvocurly) 2771 (_, _, Just expectingOCurly) -> 2772 do setAlrExpectingOCurly Nothing 2773 setALRContext (ALRLayout expectingOCurly thisCol : context) 2774 setNextToken t 2775 return (L thisLoc ITvocurly) 2776 -- We do the [] cases earlier than in the spec, as we 2777 -- have an actual EOF token 2778 (ITeof, ALRLayout _ _ : ls, _) -> 2779 do setALRContext ls 2780 setNextToken t 2781 return (L thisLoc ITvccurly) 2782 (ITeof, _, _) -> 2783 return t 2784 -- the other ITeof case omitted; general case below covers it 2785 (ITin, _, _) 2786 | justClosedExplicitLetBlock -> 2787 return t 2788 (ITin, ALRLayout ALRLayoutLet _ : ls, _) 2789 | newLine -> 2790 do setPendingImplicitTokens [t] 2791 setALRContext ls 2792 return (L thisLoc ITvccurly) 2793 -- This next case is to handle a transitional issue: 2794 (ITwhere, ALRLayout _ col : ls, _) 2795 | newLine && thisCol == col && transitional -> 2796 do addWarning Opt_WarnAlternativeLayoutRuleTransitional 2797 (RealSrcSpan thisLoc) 2798 (transitionalAlternativeLayoutWarning 2799 "`where' clause at the same depth as implicit layout block") 2800 setALRContext ls 2801 setNextToken t 2802 -- Note that we use lastLoc, as we may need to close 2803 -- more layouts, or give a semicolon 2804 return (L lastLoc ITvccurly) 2805 -- This next case is to handle a transitional issue: 2806 (ITvbar, ALRLayout _ col : ls, _) 2807 | newLine && thisCol == col && transitional -> 2808 do addWarning Opt_WarnAlternativeLayoutRuleTransitional 2809 (RealSrcSpan thisLoc) 2810 (transitionalAlternativeLayoutWarning 2811 "`|' at the same depth as implicit layout block") 2812 setALRContext ls 2813 setNextToken t 2814 -- Note that we use lastLoc, as we may need to close 2815 -- more layouts, or give a semicolon 2816 return (L lastLoc ITvccurly) 2817 (_, ALRLayout _ col : ls, _) 2818 | newLine && thisCol == col -> 2819 do setNextToken t 2820 let loc = realSrcSpanStart thisLoc 2821 zeroWidthLoc = mkRealSrcSpan loc loc 2822 return (L zeroWidthLoc ITsemi) 2823 | newLine && thisCol < col -> 2824 do setALRContext ls 2825 setNextToken t 2826 -- Note that we use lastLoc, as we may need to close 2827 -- more layouts, or give a semicolon 2828 return (L lastLoc ITvccurly) 2829 -- We need to handle close before open, as 'then' is both 2830 -- an open and a close 2831 (u, _, _) 2832 | isALRclose u -> 2833 case context of 2834 ALRLayout _ _ : ls -> 2835 do setALRContext ls 2836 setNextToken t 2837 return (L thisLoc ITvccurly) 2838 ALRNoLayout _ isLet : ls -> 2839 do let ls' = if isALRopen u 2840 then ALRNoLayout (containsCommas u) False : ls 2841 else ls 2842 setALRContext ls' 2843 when isLet $ setJustClosedExplicitLetBlock True 2844 return t 2845 [] -> 2846 do let ls = if isALRopen u 2847 then [ALRNoLayout (containsCommas u) False] 2848 else [] 2849 setALRContext ls 2850 -- XXX This is an error in John's code, but 2851 -- it looks reachable to me at first glance 2852 return t 2853 (u, _, _) 2854 | isALRopen u -> 2855 do setALRContext (ALRNoLayout (containsCommas u) False : context) 2856 return t 2857 (ITin, ALRLayout ALRLayoutLet _ : ls, _) -> 2858 do setALRContext ls 2859 setPendingImplicitTokens [t] 2860 return (L thisLoc ITvccurly) 2861 (ITin, ALRLayout _ _ : ls, _) -> 2862 do setALRContext ls 2863 setNextToken t 2864 return (L thisLoc ITvccurly) 2865 -- the other ITin case omitted; general case below covers it 2866 (ITcomma, ALRLayout _ _ : ls, _) 2867 | topNoLayoutContainsCommas ls -> 2868 do setALRContext ls 2869 setNextToken t 2870 return (L thisLoc ITvccurly) 2871 (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) -> 2872 do setALRContext ls 2873 setPendingImplicitTokens [t] 2874 return (L thisLoc ITvccurly) 2875 -- the other ITwhere case omitted; general case below covers it 2876 (_, _, _) -> return t 2877 2878transitionalAlternativeLayoutWarning :: String -> SDoc 2879transitionalAlternativeLayoutWarning msg 2880 = text "transitional layout will not be accepted in the future:" 2881 $$ text msg 2882 2883isALRopen :: Token -> Bool 2884isALRopen ITcase = True 2885isALRopen ITif = True 2886isALRopen ITthen = True 2887isALRopen IToparen = True 2888isALRopen ITobrack = True 2889isALRopen ITocurly = True 2890-- GHC Extensions: 2891isALRopen IToubxparen = True 2892isALRopen ITparenEscape = True 2893isALRopen ITparenTyEscape = True 2894isALRopen _ = False 2895 2896isALRclose :: Token -> Bool 2897isALRclose ITof = True 2898isALRclose ITthen = True 2899isALRclose ITelse = True 2900isALRclose ITcparen = True 2901isALRclose ITcbrack = True 2902isALRclose ITccurly = True 2903-- GHC Extensions: 2904isALRclose ITcubxparen = True 2905isALRclose _ = False 2906 2907isNonDecreasingIndentation :: ALRLayout -> Bool 2908isNonDecreasingIndentation ALRLayoutDo = True 2909isNonDecreasingIndentation _ = False 2910 2911containsCommas :: Token -> Bool 2912containsCommas IToparen = True 2913containsCommas ITobrack = True 2914-- John doesn't have {} as containing commas, but records contain them, 2915-- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs 2916-- (defaultInstallDirs). 2917containsCommas ITocurly = True 2918-- GHC Extensions: 2919containsCommas IToubxparen = True 2920containsCommas _ = False 2921 2922topNoLayoutContainsCommas :: [ALRContext] -> Bool 2923topNoLayoutContainsCommas [] = False 2924topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls 2925topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b 2926 2927lexToken :: P (RealLocated Token) 2928lexToken = do 2929 inp@(AI loc1 buf) <- getInput 2930 sc <- getLexState 2931 exts <- getExts 2932 case alexScanUser exts inp sc of 2933 AlexEOF -> do 2934 let span = mkRealSrcSpan loc1 loc1 2935 setLastToken span 0 2936 return (L span ITeof) 2937 AlexError (AI loc2 buf) -> 2938 reportLexError loc1 loc2 buf "lexical error" 2939 AlexSkip inp2 _ -> do 2940 setInput inp2 2941 lexToken 2942 AlexToken inp2@(AI end buf2) _ t -> do 2943 setInput inp2 2944 let span = mkRealSrcSpan loc1 end 2945 let bytes = byteDiff buf buf2 2946 span `seq` setLastToken span bytes 2947 lt <- t span buf bytes 2948 case unRealSrcSpan lt of 2949 ITlineComment _ -> return lt 2950 ITblockComment _ -> return lt 2951 lt' -> do 2952 setLastTk lt' 2953 return lt 2954 2955reportLexError :: RealSrcLoc -> RealSrcLoc -> StringBuffer -> [Char] -> P a 2956reportLexError loc1 loc2 buf str 2957 | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input") 2958 | otherwise = 2959 let c = fst (nextChar buf) 2960 in if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar# 2961 then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)") 2962 else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c) 2963 2964lexTokenStream :: StringBuffer -> RealSrcLoc -> DynFlags -> ParseResult [Located Token] 2965lexTokenStream buf loc dflags = unP go initState{ options = opts' } 2966 where dflags' = gopt_set (gopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream 2967 initState@PState{ options = opts } = mkPState dflags' buf loc 2968 opts' = opts{ pExtsBitmap = complement (xbit UsePosPragsBit) .&. pExtsBitmap opts } 2969 go = do 2970 ltok <- lexer False return 2971 case ltok of 2972 L _ ITeof -> return [] 2973 _ -> liftM (ltok:) go 2974 2975linePrags = Map.singleton "line" linePrag 2976 2977fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag), 2978 ("options_ghc", lex_string_prag IToptions_prag), 2979 ("options_haddock", lex_string_prag ITdocOptions), 2980 ("language", token ITlanguage_prag), 2981 ("include", lex_string_prag ITinclude_prag)]) 2982 2983ignoredPrags = Map.fromList (map ignored pragmas) 2984 where ignored opt = (opt, nested_comment lexToken) 2985 impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"] 2986 options_pragmas = map ("options_" ++) impls 2987 -- CFILES is a hugs-only thing. 2988 pragmas = options_pragmas ++ ["cfiles", "contract"] 2989 2990oneWordPrags = Map.fromList [ 2991 ("rules", rulePrag), 2992 ("inline", 2993 strtoken (\s -> (ITinline_prag (SourceText s) Inline FunLike))), 2994 ("inlinable", 2995 strtoken (\s -> (ITinline_prag (SourceText s) Inlinable FunLike))), 2996 ("inlineable", 2997 strtoken (\s -> (ITinline_prag (SourceText s) Inlinable FunLike))), 2998 -- Spelling variant 2999 ("notinline", 3000 strtoken (\s -> (ITinline_prag (SourceText s) NoInline FunLike))), 3001 ("specialize", strtoken (\s -> ITspec_prag (SourceText s))), 3002 ("source", strtoken (\s -> ITsource_prag (SourceText s))), 3003 ("warning", strtoken (\s -> ITwarning_prag (SourceText s))), 3004 ("deprecated", strtoken (\s -> ITdeprecated_prag (SourceText s))), 3005 ("scc", strtoken (\s -> ITscc_prag (SourceText s))), 3006 ("generated", strtoken (\s -> ITgenerated_prag (SourceText s))), 3007 ("core", strtoken (\s -> ITcore_prag (SourceText s))), 3008 ("unpack", strtoken (\s -> ITunpack_prag (SourceText s))), 3009 ("nounpack", strtoken (\s -> ITnounpack_prag (SourceText s))), 3010 ("ann", strtoken (\s -> ITann_prag (SourceText s))), 3011 ("minimal", strtoken (\s -> ITminimal_prag (SourceText s))), 3012 ("overlaps", strtoken (\s -> IToverlaps_prag (SourceText s))), 3013 ("overlappable", strtoken (\s -> IToverlappable_prag (SourceText s))), 3014 ("overlapping", strtoken (\s -> IToverlapping_prag (SourceText s))), 3015 ("incoherent", strtoken (\s -> ITincoherent_prag (SourceText s))), 3016 ("ctype", strtoken (\s -> ITctype (SourceText s))), 3017 ("complete", strtoken (\s -> ITcomplete_prag (SourceText s))), 3018 ("column", columnPrag) 3019 ] 3020 3021twoWordPrags = Map.fromList [ 3022 ("inline conlike", 3023 strtoken (\s -> (ITinline_prag (SourceText s) Inline ConLike))), 3024 ("notinline conlike", 3025 strtoken (\s -> (ITinline_prag (SourceText s) NoInline ConLike))), 3026 ("specialize inline", 3027 strtoken (\s -> (ITspec_inline_prag (SourceText s) True))), 3028 ("specialize notinline", 3029 strtoken (\s -> (ITspec_inline_prag (SourceText s) False))) 3030 ] 3031 3032dispatch_pragmas :: Map String Action -> Action 3033dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of 3034 Just found -> found span buf len 3035 Nothing -> lexError "unknown pragma" 3036 3037known_pragma :: Map String Action -> AlexAccPred ExtsBitmap 3038known_pragma prags _ (AI _ startbuf) _ (AI _ curbuf) 3039 = isKnown && nextCharIsNot curbuf pragmaNameChar 3040 where l = lexemeToString startbuf (byteDiff startbuf curbuf) 3041 isKnown = isJust $ Map.lookup (clean_pragma l) prags 3042 pragmaNameChar c = isAlphaNum c || c == '_' 3043 3044clean_pragma :: String -> String 3045clean_pragma prag = canon_ws (map toLower (unprefix prag)) 3046 where unprefix prag' = case stripPrefix "{-#" prag' of 3047 Just rest -> rest 3048 Nothing -> prag' 3049 canonical prag' = case prag' of 3050 "noinline" -> "notinline" 3051 "specialise" -> "specialize" 3052 "constructorlike" -> "conlike" 3053 _ -> prag' 3054 canon_ws s = unwords (map canonical (words s)) 3055 3056 3057 3058{- 3059%************************************************************************ 3060%* * 3061 Helper functions for generating annotations in the parser 3062%* * 3063%************************************************************************ 3064-} 3065 3066-- | Encapsulated call to addAnnotation, requiring only the SrcSpan of 3067-- the AST construct the annotation belongs to; together with the 3068-- AnnKeywordId, this is the key of the annotation map. 3069-- 3070-- This type is useful for places in the parser where it is not yet 3071-- known what SrcSpan an annotation should be added to. The most 3072-- common situation is when we are parsing a list: the annotations 3073-- need to be associated with the AST element that *contains* the 3074-- list, not the list itself. 'AddAnn' lets us defer adding the 3075-- annotations until we finish parsing the list and are now parsing 3076-- the enclosing element; we then apply the 'AddAnn' to associate 3077-- the annotations. Another common situation is where a common fragment of 3078-- the AST has been factored out but there is no separate AST node for 3079-- this fragment (this occurs in class and data declarations). In this 3080-- case, the annotation belongs to the parent data declaration. 3081-- 3082-- The usual way an 'AddAnn' is created is using the 'mj' ("make jump") 3083-- function, and then it can be discharged using the 'ams' function. 3084data AddAnn = AddAnn AnnKeywordId SrcSpan 3085 3086addAnnotationOnly :: SrcSpan -> AnnKeywordId -> SrcSpan -> P () 3087addAnnotationOnly l a v = P $ \s -> POk s { 3088 annotations = ((l,a), [v]) : annotations s 3089 } () 3090 3091-- |Given a 'SrcSpan' that surrounds a 'HsPar' or 'HsParTy', generate 3092-- 'AddAnn' values for the opening and closing bordering on the start 3093-- and end of the span 3094mkParensApiAnn :: SrcSpan -> [AddAnn] 3095mkParensApiAnn (UnhelpfulSpan _) = [] 3096mkParensApiAnn s@(RealSrcSpan ss) = [AddAnn AnnOpenP lo,AddAnn AnnCloseP lc] 3097 where 3098 f = srcSpanFile ss 3099 sl = srcSpanStartLine ss 3100 sc = srcSpanStartCol ss 3101 el = srcSpanEndLine ss 3102 ec = srcSpanEndCol ss 3103 lo = mkSrcSpan (srcSpanStart s) (mkSrcLoc f sl (sc+1)) 3104 lc = mkSrcSpan (mkSrcLoc f el (ec - 1)) (srcSpanEnd s) 3105 3106queueComment :: Located Token -> P() 3107queueComment c = P $ \s -> POk s { 3108 comment_q = commentToAnnotation c : comment_q s 3109 } () 3110 3111-- | Go through the @comment_q@ in @PState@ and remove all comments 3112-- that belong within the given span 3113allocateCommentsP :: SrcSpan -> P () 3114allocateCommentsP ss = P $ \s -> 3115 let (comment_q', newAnns) = allocateComments ss (comment_q s) in 3116 POk s { 3117 comment_q = comment_q' 3118 , annotations_comments = newAnns ++ (annotations_comments s) 3119 } () 3120 3121allocateComments 3122 :: SrcSpan 3123 -> [Located AnnotationComment] 3124 -> ([Located AnnotationComment], [(SrcSpan,[Located AnnotationComment])]) 3125allocateComments ss comment_q = 3126 let 3127 (before,rest) = break (\(L l _) -> isSubspanOf l ss) comment_q 3128 (middle,after) = break (\(L l _) -> not (isSubspanOf l ss)) rest 3129 comment_q' = before ++ after 3130 newAnns = if null middle then [] 3131 else [(ss,middle)] 3132 in 3133 (comment_q', newAnns) 3134 3135 3136commentToAnnotation :: Located Token -> Located AnnotationComment 3137commentToAnnotation (L l (ITdocCommentNext s)) = L l (AnnDocCommentNext s) 3138commentToAnnotation (L l (ITdocCommentPrev s)) = L l (AnnDocCommentPrev s) 3139commentToAnnotation (L l (ITdocCommentNamed s)) = L l (AnnDocCommentNamed s) 3140commentToAnnotation (L l (ITdocSection n s)) = L l (AnnDocSection n s) 3141commentToAnnotation (L l (ITdocOptions s)) = L l (AnnDocOptions s) 3142commentToAnnotation (L l (ITlineComment s)) = L l (AnnLineComment s) 3143commentToAnnotation (L l (ITblockComment s)) = L l (AnnBlockComment s) 3144commentToAnnotation _ = panic "commentToAnnotation" 3145 3146-- --------------------------------------------------------------------- 3147 3148isComment :: Token -> Bool 3149isComment (ITlineComment _) = True 3150isComment (ITblockComment _) = True 3151isComment _ = False 3152 3153isDocComment :: Token -> Bool 3154isDocComment (ITdocCommentNext _) = True 3155isDocComment (ITdocCommentPrev _) = True 3156isDocComment (ITdocCommentNamed _) = True 3157isDocComment (ITdocSection _ _) = True 3158isDocComment (ITdocOptions _) = True 3159isDocComment _ = False 3160} 3161