1{-# LANGUAGE CPP #-} 2{-# OPTIONS_HADDOCK hide #-} 3----------------------------------------------------------------------------- 4-- | 5-- Module : Language.Haskell.Exts.ParseMonad 6-- Copyright : Niklas Broberg (c) 2004-2009, 7-- Original (c) The GHC Team, 1997-2000 8-- License : BSD-style (see the file libraries/base/LICENSE) 9-- 10-- Maintainer : Niklas Broberg, d00nibro@chalmers.se 11-- Stability : stable 12-- Portability : portable 13-- 14-- Monads for the Haskell parser and lexer. 15-- 16----------------------------------------------------------------------------- 17 18module Language.Haskell.Exts.ParseMonad( 19 -- * Generic Parsing 20 Parseable(..), 21 -- * Parsing 22 P, ParseResult(..), atSrcLoc, LexContext(..), 23 ParseMode(..), defaultParseMode, fromParseResult, 24 runParserWithMode, runParserWithModeComments, runParser, 25 getSrcLoc, pushCurrentContext, popContext, 26 getExtensions, getIgnoreFunctionArity, 27 -- * Lexing 28 Lex(runL), getInput, discard, getLastChar, lexNewline, 29 lexTab, lexWhile, lexWhile_, 30 alternative, checkBOL, setBOL, startToken, getOffside, 31 pushContextL, popContextL, getExtensionsL, addExtensionL, 32 saveExtensionsL, restoreExtensionsL, pushComment, 33 getSrcLocL, setSrcLineL, ignoreLinePragmasL, setLineFilenameL, 34 -- * Harp/Hsx 35 ExtContext(..), 36 pushExtContextL, popExtContextL, getExtContext, 37 pullCtxtFlag, flagDo, 38 getModuleName 39 ) where 40 41import Language.Haskell.Exts.SrcLoc (SrcLoc(..), noLoc) 42import Language.Haskell.Exts.Fixity (Fixity, preludeFixities) 43import Language.Haskell.Exts.Comments 44import Language.Haskell.Exts.Extension -- (Extension, impliesExts, haskell2010) 45 46import Data.List (intercalate) 47import Control.Applicative 48import Control.Monad (when, liftM, ap) 49import qualified Control.Monad.Fail as Fail 50import Data.Monoid hiding ((<>)) 51#if !MIN_VERSION_base(4,13,0) 52import Data.Semigroup (Semigroup(..)) 53#endif 54-- To avoid import warnings for Control.Applicative, Data.Monoid, and Data.Semigroup 55import Prelude 56 57-- | Class providing function for parsing at many different types. 58-- 59-- Note that for convenience of implementation, the default methods have 60-- definitions equivalent to 'undefined'. The minimal definition is all of 61-- the visible methods. 62class Parseable ast where 63 -- | Parse a string with default mode. 64 parse :: String -> ParseResult ast 65 parse = parseWithMode defaultParseMode 66 -- | Parse a string with an explicit 'ParseMode'. 67 parseWithMode :: ParseMode -> String -> ParseResult ast 68 parseWithMode mode = runParserWithMode mode . parser $ fixities mode 69 -- | Parse a string with an explicit 'ParseMode', returning all comments along 70 -- with the AST. 71 parseWithComments :: ParseMode -> String -> ParseResult (ast, [Comment]) 72 parseWithComments mode = runParserWithModeComments mode . parser $ fixities mode 73 -- | Internal parser, used to provide default definitions for the others. 74 parser :: Maybe [Fixity] -> P ast 75 76-- | The result of a parse. 77data ParseResult a 78 = ParseOk a -- ^ The parse succeeded, yielding a value. 79 | ParseFailed SrcLoc String 80 -- ^ The parse failed at the specified 81 -- source location, with an error message. 82 deriving (Show, Ord, Eq) 83 84-- | Retrieve the result of a successful parse, throwing an 85-- error if the parse is actually not successful. 86fromParseResult :: ParseResult a -> a 87fromParseResult (ParseOk a) = a 88fromParseResult (ParseFailed loc str) = error $ "fromParseResult: Parse failed at [" 89 ++ srcFilename loc ++ "] (" ++ show (srcLine loc) ++ ":" ++ show (srcColumn loc) ++ "): " ++ str 90 91instance Functor ParseResult where 92 fmap f (ParseOk x) = ParseOk $ f x 93 fmap _ (ParseFailed loc msg) = ParseFailed loc msg 94 95instance Applicative ParseResult where 96 pure = ParseOk 97 ParseOk f <*> x = f <$> x 98 ParseFailed loc msg <*> _ = ParseFailed loc msg 99 100instance Monad ParseResult where 101 return = ParseOk 102#if !MIN_VERSION_base(4,13,0) 103 fail = Fail.fail 104#endif 105 ParseOk x >>= f = f x 106 ParseFailed loc msg >>= _ = ParseFailed loc msg 107instance Fail.MonadFail ParseResult where 108 fail = ParseFailed noLoc 109 110instance Semigroup m => Semigroup (ParseResult m) where 111 ParseOk x <> ParseOk y = ParseOk $ x <> y 112 ParseOk _ <> err = err 113 err <> _ = err -- left-biased 114 115instance ( Monoid m , Semigroup m) => Monoid (ParseResult m) where 116 mempty = ParseOk mempty 117 mappend = (<>) 118 119-- internal version 120data ParseStatus a = Ok ParseState a | Failed SrcLoc String 121 deriving Show 122 123data LexContext = NoLayout | Layout Int 124 deriving (Eq,Ord,Show) 125 126data ExtContext = CodeCtxt | HarpCtxt | TagCtxt | ChildCtxt 127 | CloseTagCtxt | CodeTagCtxt 128 deriving (Eq,Ord,Show) 129 130type CtxtFlag = (Bool,Bool) 131-- (True,_) = We're in a do context. 132-- (_, True)= Next token must be a virtual closing brace. 133 134type ParseState = ([LexContext],[[KnownExtension]],[ExtContext],CtxtFlag,[Comment]) 135 136indentOfParseState :: ParseState -> Int 137indentOfParseState (Layout n:_,_,_,_,_) = n 138indentOfParseState _ = 0 139 140-- | Static parameters governing a parse. 141-- Note that the various parse functions in "Language.Haskell.Exts.Parser" 142-- never look at LANGUAGE pragmas, regardless of 143-- what the @ignoreLanguagePragmas@ flag is set to. 144-- Only the various @parseFile@ functions in "Language.Haskell.Exts" will 145-- act on it, when set to 'False'. 146 147data ParseMode = ParseMode { 148 -- | original name of the file being parsed 149 parseFilename :: String, 150 -- | base language (e.g. Haskell98, Haskell2010) 151 baseLanguage :: Language, 152 -- | list of extensions enabled for parsing 153 extensions :: [Extension], 154 -- | if 'True', the parser won't care about further extensions 155 -- in LANGUAGE pragmas in source files 156 ignoreLanguagePragmas :: Bool, 157 -- | if 'True', the parser won't read line position information 158 -- from LINE pragmas in source files 159 ignoreLinePragmas :: Bool, 160 -- | list of fixities to be aware of 161 fixities :: Maybe [Fixity], 162 -- | Checks whether functions have a consistent arity 163 ignoreFunctionArity :: Bool 164 } 165 166-- | Default parameters for a parse. 167-- The default is an unknown filename, 168-- no extensions (i.e. Haskell 98), 169-- don't ignore LANGUAGE pragmas, do ignore LINE pragmas, 170-- and be aware of fixities from the 'Prelude'. 171defaultParseMode :: ParseMode 172defaultParseMode = ParseMode { 173 parseFilename = "<unknown>.hs", 174 baseLanguage = Haskell2010, 175 extensions = [], 176 ignoreLanguagePragmas = False, 177 ignoreLinePragmas = True, 178 fixities = Just preludeFixities, 179 ignoreFunctionArity = False 180 } 181 182-- Version of ParseMode used internally, 183-- where the language and extensions have 184-- been expanded 185data InternalParseMode = IParseMode { 186 iParseFilename :: String, 187 iExtensions :: [KnownExtension], 188 -- iIgnoreLanguagePragmas :: Bool, 189 iIgnoreLinePragmas :: Bool, 190 iIgnoreFunctionArity :: Bool 191 -- iFixities :: Maybe [Fixity] 192 } 193 194toInternalParseMode :: ParseMode -> InternalParseMode 195toInternalParseMode (ParseMode pf bLang exts _ilang iline _fx farity) = 196 IParseMode pf (toExtensionList bLang exts) {-_ilang -} iline {- _fx -} farity 197 198 199-- | Monad for parsing 200 201newtype P a = P { runP :: 202 String -- input string 203 -> Int -- current column 204 -> Int -- current line 205 -> SrcLoc -- location of last token read 206 -> Char -- Last token read used for lexing TypeApplication UGH 207 -> ParseState -- layout info. 208 -> InternalParseMode -- parse parameters 209 -> ParseStatus a 210 } 211 212runParserWithMode :: ParseMode -> P a -> String -> ParseResult a 213{-runParserWithMode mode (P m) s = case m s 0 1 start ([],[],(False,False),[]) mode of 214 Ok _ a -> ParseOk a 215 Failed loc msg -> ParseFailed loc msg 216 where start = SrcLoc { 217 srcFilename = parseFilename mode, 218 srcLine = 1, 219 srcColumn = 1 220 } 221-} 222runParserWithMode mode pm = fmap fst . runParserWithModeComments mode pm 223 224runParser :: P a -> String -> ParseResult a 225runParser = runParserWithMode defaultParseMode 226 227runParserWithModeComments :: ParseMode -> P a -> String -> ParseResult (a, [Comment]) 228runParserWithModeComments mode = let mode2 = toInternalParseMode mode in \(P m) s -> 229 case m s 0 1 start '\n' ([],[],[],(False,False),[]) mode2 of 230 Ok (_,_,_,_,cs) a -> ParseOk (a, reverse cs) 231 Failed loc msg -> ParseFailed loc msg 232 where start = SrcLoc { 233 srcFilename = parseFilename mode, 234 srcLine = 1, 235 srcColumn = 1 236 } 237 -- allExts mode@(ParseMode {extensions = es}) = mode { extensions = impliesExts es } 238 239 -- allExts mode = let imode = to 240 241instance Functor P where 242 fmap = liftM 243 244instance Applicative P where 245 pure = return 246 (<*>) = ap 247 248instance Monad P where 249 return a = P $ \_i _x _y _l _ch s _m -> Ok s a 250 P m >>= k = P $ \i x y l ch s mode -> 251 case m i x y l ch s mode of 252 Failed loc msg -> Failed loc msg 253 Ok s' a -> runP (k a) i x y l ch s' mode 254#if !MIN_VERSION_base(4,13,0) 255 fail = Fail.fail 256#endif 257 258instance Fail.MonadFail P where 259 fail s = P $ \_r _col _line loc _ _stk _m -> Failed loc s 260 261atSrcLoc :: P a -> SrcLoc -> P a 262P m `atSrcLoc` loc = P $ \i x y _l ch -> m i x y loc ch 263 264getSrcLoc :: P SrcLoc 265getSrcLoc = P $ \_i _x _y l _ s _m -> Ok s l 266 267getModuleName :: P String 268getModuleName = P $ \_i _x _y _l _ch s m -> 269 let fn = iParseFilename m 270 mn = intercalate "." $ splitPath fn 271 272 splitPath :: String -> [String] 273 splitPath "" = [] 274 splitPath str = let (l,str') = break ('\\'==) str 275 in case str' of 276 [] -> [removeSuffix l] 277 (_:str'') -> l : splitPath str'' 278 279 removeSuffix l = reverse $ tail $ dropWhile ('.'/=) $ reverse l 280 281 in Ok s mn 282 283-- Enter a new layout context. If we are already in a layout context, 284-- ensure that the new indent is greater than the indent of that context. 285-- (So if the source loc is not to the right of the current indent, an 286-- empty list {} will be inserted.) 287 288pushCurrentContext :: P () 289pushCurrentContext = do 290 lc <- getSrcLoc 291 indent <- currentIndent 292 dob <- pullDoStatus 293 let loc = srcColumn lc 294 when (dob && loc < indent 295 || not dob && loc <= indent) pushCtxtFlag 296 pushContext (Layout loc) 297 298currentIndent :: P Int 299currentIndent = P $ \_r _x _y _ _ stk _mode -> Ok stk (indentOfParseState stk) 300 301pushContext :: LexContext -> P () 302pushContext ctxt = 303--trace ("pushing lexical scope: " ++ show ctxt ++"\n") $ 304 P $ \_i _x _y _l _ (s, exts, e, p, c) _m -> Ok (ctxt:s, exts, e, p, c) () 305 306popContext :: P () 307popContext = P $ \_i _x _y loc _ stk _m -> 308 case stk of 309 (_:s, exts, e, p, c) -> --trace ("popping lexical scope, context now "++show s ++ "\n") $ 310 Ok (s, exts, e, p, c) () 311 ([],_,_,_,_) -> Failed loc "Unexpected }" -- error "Internal error: empty context in popContext" 312 313{- 314-- HaRP/Hsx 315pushExtContext :: ExtContext -> P () 316pushExtContext ctxt = P $ \_i _x _y _l (s, e, p, c) _m -> Ok (s, ctxt:e, p, c) () 317 318popExtContext :: P () 319popExtContext = P $ \_i _x _y _l (s, e, p, c) _m -> 320 case e of 321 (_:e') -> 322 Ok (s, e', p, c) () 323 [] -> error "Internal error: empty context in popExtContext" 324-} 325 326-- Extension-aware lexing/parsing 327getExtensions :: P [KnownExtension] 328getExtensions = P $ \_i _x _y _l _ s m -> 329 Ok s $ iExtensions m 330 331pushCtxtFlag :: P () 332pushCtxtFlag = 333 P $ \_i _x _y _l _ (s, exts, e, (d,c), cs) _m -> case c of 334 False -> Ok (s, exts, e, (d,True), cs) () 335 _ -> error "Internal error: context flag already pushed" 336 337pullDoStatus :: P Bool 338pullDoStatus = P $ \_i _x _y _l _ (s, exts, e, (d,c), cs) _m -> Ok (s,exts,e,(False,c),cs) d 339 340getIgnoreFunctionArity :: P Bool 341getIgnoreFunctionArity = P $ \_i _x _y _l _ s m -> 342 Ok s $ iIgnoreFunctionArity m 343 344 345 346 347---------------------------------------------------------------------------- 348-- Monad for lexical analysis: 349-- a continuation-passing version of the parsing monad 350 351newtype Lex r a = Lex { runL :: (a -> P r) -> P r } 352 353instance Functor (Lex r) where 354 fmap = liftM 355 356instance Applicative (Lex r) where 357 pure = return 358 (<*>) = ap 359 360instance Monad (Lex r) where 361 return a = Lex $ \k -> k a 362 Lex v >>= f = Lex $ \k -> v (\a -> runL (f a) k) 363 Lex v >> Lex w = Lex $ \k -> v (\_ -> w k) 364#if !MIN_VERSION_base(4,13,0) 365 fail = Fail.fail 366#endif 367 368instance Fail.MonadFail (Lex r) where 369 fail s = Lex $ \_ -> fail s 370 371-- Operations on this monad 372 373getInput :: Lex r String 374getInput = Lex $ \cont -> P $ \r -> runP (cont r) r 375 376-- | Discard some input characters (these must not include tabs or newlines). 377 378discard :: Int -> Lex r () 379discard n = Lex $ \cont -> P $ \r x y loc ch 380 -> let (newCh:rest)= if n > 0 then drop (n-1) r else (ch:r) 381 in runP (cont ()) rest (x+n) y loc newCh 382 383-- | Get the last discarded character. 384-- This is only used for type application. 385 386getLastChar :: Lex r Char 387getLastChar = Lex $ \cont -> P $ \r x y loc ch -> runP (cont ch) r x y loc ch 388 389 390-- | Discard the next character, which must be a newline. 391 392lexNewline :: Lex a () 393lexNewline = Lex $ \cont -> P $ \rs _x y loc -> 394 case rs of 395 (_:r) -> runP (cont ()) r 1 (y+1) loc 396 [] -> \_ _ _ -> Failed loc "Lexer: expected newline." 397 398-- | Discard the next character, which must be a tab. 399 400lexTab :: Lex a () 401lexTab = Lex $ \cont -> P $ \(_:r) x -> runP (cont ()) r (nextTab x) 402 403nextTab :: Int -> Int 404nextTab x = x + (tAB_LENGTH - (x-1) `mod` tAB_LENGTH) 405 406tAB_LENGTH :: Int 407tAB_LENGTH = 8 408 409-- Consume and return the largest string of characters satisfying p 410 411lexWhile :: (Char -> Bool) -> Lex a String 412lexWhile p = Lex $ \cont -> P $ \rss c l loc char -> 413 case rss of 414 [] -> runP (cont []) [] c l loc char 415 (r:rs) -> 416 let 417 l' = case r of 418 '\n' -> l + 1 419 _ -> l 420 c' = case r of 421 '\n' -> 1 422 _ -> c + 1 423 in if p r 424 then runP (runL ((r:) <$> lexWhile p) cont) rs c' l' loc r 425 else runP (cont []) (r:rs) c l loc char 426 427-- | lexWhile without the return value. 428lexWhile_ :: (Char -> Bool) -> Lex a () 429lexWhile_ p = do _ <- lexWhile p 430 return () 431 432-- An alternative scan, to which we can return if subsequent scanning 433-- is unsuccessful. 434 435alternative :: Lex a v -> Lex a (Lex a v) 436alternative (Lex v) = Lex $ \cont -> P $ \r x y -> 437 runP (cont (Lex $ \cont' -> P $ \_r _x _y -> 438 runP (v cont') r x y)) r x y 439 440-- The source location is the coordinates of the previous token, 441-- or, while scanning a token, the start of the current token. 442 443-- col is the current column in the source file. 444-- We also need to remember between scanning tokens whether we are 445-- somewhere at the beginning of the line before the first token. 446-- This could be done with an extra Bool argument to the P monad, 447-- but as a hack we use a col value of 0 to indicate this situation. 448 449-- Setting col to 0 is used in two places: just after emitting a virtual 450-- close brace due to layout, so that next time through we check whether 451-- we also need to emit a semi-colon, and at the beginning of the file, 452-- by runParser, to kick off the lexer. 453-- Thus when col is zero, the true column can be taken from the loc. 454 455checkBOL :: Lex a Bool 456checkBOL = Lex $ \cont -> P $ \r x y loc -> 457 if x == 0 then runP (cont True) r (srcColumn loc) y loc 458 else runP (cont False) r x y loc 459 460setBOL :: Lex a () 461setBOL = Lex $ \cont -> P $ \r _ -> runP (cont ()) r 0 462 463-- Set the loc to the current position 464 465startToken :: Lex a () 466startToken = Lex $ \cont -> P $ \s x y _ c stk mode -> 467 let loc = SrcLoc { 468 srcFilename = iParseFilename mode, 469 srcLine = y, 470 srcColumn = x 471 } in 472 runP (cont ()) s x y loc c stk mode 473 474-- Current status with respect to the offside (layout) rule: 475-- LT: we are to the left of the current indent (if any) 476-- EQ: we are at the current indent (if any) 477-- GT: we are to the right of the current indent, or not subject to layout 478 479getOffside :: Lex a Ordering 480getOffside = Lex $ \cont -> P $ \r x y loc ch stk -> 481 runP (cont (compare x (indentOfParseState stk))) r x y loc ch stk 482 483getSrcLocL :: Lex a SrcLoc 484getSrcLocL = Lex $ \cont -> P $ \i x y l -> 485 runP (cont (l { srcLine = y, srcColumn = x })) i x y l 486 487setSrcLineL :: Int -> Lex a () 488setSrcLineL y = Lex $ \cont -> P $ \i x _ -> 489 runP (cont ()) i x y 490 491pushContextL :: LexContext -> Lex a () 492pushContextL ctxt = Lex $ \cont -> P $ \r x y loc ch (stk, exts, e, pst, cs) -> 493 runP (cont ()) r x y loc ch (ctxt:stk, exts, e, pst, cs) 494 495popContextL :: String -> Lex a () 496popContextL _ = Lex $ \cont -> P $ \r x y loc ch stk m -> case stk of 497 (_:ctxt, exts, e, pst, cs) -> runP (cont ()) r x y loc ch (ctxt, exts, e, pst, cs) m 498 ([], _, _, _, _) -> Failed loc "Unexpected }" 499 500pullCtxtFlag :: Lex a Bool 501pullCtxtFlag = Lex $ \cont -> P $ \r x y loc ch (ct, exts, e, (d,c), cs) -> 502 runP (cont c) r x y loc ch (ct, exts, e, (d,False), cs) 503 504 505flagDo :: Lex a () 506flagDo = Lex $ \cont -> P $ \r x y loc ch (ct, exts, e, (_,c), cs) -> 507 runP (cont ()) r x y loc ch (ct, exts, e, (True,c), cs) 508 509 510-- Harp/Hsx 511 512getExtContext :: Lex a (Maybe ExtContext) 513getExtContext = Lex $ \cont -> P $ \r x y loc ch stk@(_, _, e, _, _) -> 514 let me = case e of 515 [] -> Nothing 516 (c:_) -> Just c 517 in runP (cont me) r x y loc ch stk 518 519pushExtContextL :: ExtContext -> Lex a () 520pushExtContextL ec = Lex $ \cont -> P $ \r x y loc ch (s, exts, e, p, c) -> 521 runP (cont ()) r x y loc ch (s, exts, ec:e, p, c) 522 523popExtContextL :: String -> Lex a () 524popExtContextL fn = Lex $ \cont -> P $ \r x y loc ch (s,exts,e,p,c) m -> case e of 525 (_:ec) -> runP (cont ()) r x y loc ch (s,exts,ec,p,c) m 526 [] -> Failed loc ("Internal error: empty tag context in " ++ fn) 527 528 529-- Extension-aware lexing 530 531getExtensionsL :: Lex a [KnownExtension] 532getExtensionsL = Lex $ \cont -> P $ \r x y loc ch s m -> 533 runP (cont $ iExtensions m) r x y loc ch s m 534 535-- | Add an extension to the current configuration. 536addExtensionL :: KnownExtension -> Lex a () 537addExtensionL ext = Lex $ \cont -> P $ \r x y loc ch (s, oldExts, e, p, c) m -> 538 let newExts = impliesExts [ext] ++ iExtensions m 539 in runP (cont ()) r x y loc ch (s, oldExts, e, p, c) (m {iExtensions = newExts}) 540 541-- | Save the current configuration of extensions. 542saveExtensionsL :: Lex a () 543saveExtensionsL = Lex $ \cont -> P $ \r x y loc ch (s, oldExts, e, p, c) m -> 544 runP (cont ()) r x y loc ch (s, iExtensions m:oldExts, e, p, c) m 545 546-- | Return to the previous saved extensions configuration. 547restoreExtensionsL :: Lex a () 548restoreExtensionsL = Lex $ \cont -> P $ \r x y loc ch (s,exts,e,p,c) m -> case exts of 549 (_:prev) -> runP (cont ()) r x y loc ch (s,prev,e,p,c) m 550 _ -> Failed loc "Internal error: empty extension stack" 551 552-- LINE-aware lexing 553 554ignoreLinePragmasL :: Lex a Bool 555ignoreLinePragmasL = Lex $ \cont -> P $ \r x y loc c s m -> 556 runP (cont $ iIgnoreLinePragmas m) r x y loc c s m 557 558-- If we read a file name in a LINE pragma, we should update the state. 559setLineFilenameL :: String -> Lex a () 560setLineFilenameL name = Lex $ \cont -> P $ \r x y loc ch s m -> 561 runP (cont ()) r x y loc ch s (m {iParseFilename = name}) 562 563-- Comments 564 565pushComment :: Comment -> Lex a () 566pushComment c = Lex $ \cont -> P $ \r x y loc ch (s, exts, e, p, cs) -> 567 runP (cont ()) r x y loc ch (s, exts, e, p, c:cs) 568