1{-# LANGUAGE CPP #-}
2{-# OPTIONS_HADDOCK hide #-}
3-----------------------------------------------------------------------------
4-- |
5-- Module      :  Language.Haskell.Exts.ParseMonad
6-- Copyright   :  Niklas Broberg (c) 2004-2009,
7--                Original (c) The GHC Team, 1997-2000
8-- License     :  BSD-style (see the file libraries/base/LICENSE)
9--
10-- Maintainer  :  Niklas Broberg, d00nibro@chalmers.se
11-- Stability   :  stable
12-- Portability :  portable
13--
14-- Monads for the Haskell parser and lexer.
15--
16-----------------------------------------------------------------------------
17
18module Language.Haskell.Exts.ParseMonad(
19        -- * Generic Parsing
20        Parseable(..),
21        -- * Parsing
22        P, ParseResult(..), atSrcLoc, LexContext(..),
23        ParseMode(..), defaultParseMode, fromParseResult,
24        runParserWithMode, runParserWithModeComments, runParser,
25        getSrcLoc, pushCurrentContext, popContext,
26        getExtensions, getIgnoreFunctionArity,
27        -- * Lexing
28        Lex(runL), getInput, discard, getLastChar, lexNewline,
29        lexTab, lexWhile, lexWhile_,
30        alternative, checkBOL, setBOL, startToken, getOffside,
31        pushContextL, popContextL, getExtensionsL, addExtensionL,
32        saveExtensionsL, restoreExtensionsL, pushComment,
33        getSrcLocL, setSrcLineL, ignoreLinePragmasL, setLineFilenameL,
34        -- * Harp/Hsx
35        ExtContext(..),
36        pushExtContextL, popExtContextL, getExtContext,
37        pullCtxtFlag, flagDo,
38        getModuleName
39    ) where
40
41import Language.Haskell.Exts.SrcLoc (SrcLoc(..), noLoc)
42import Language.Haskell.Exts.Fixity (Fixity, preludeFixities)
43import Language.Haskell.Exts.Comments
44import Language.Haskell.Exts.Extension -- (Extension, impliesExts, haskell2010)
45
46import Data.List (intercalate)
47import Control.Applicative
48import Control.Monad (when, liftM, ap)
49import qualified Control.Monad.Fail as Fail
50import Data.Monoid hiding ((<>))
51#if !MIN_VERSION_base(4,13,0)
52import Data.Semigroup (Semigroup(..))
53#endif
54-- To avoid import warnings for Control.Applicative, Data.Monoid, and Data.Semigroup
55import Prelude
56
57-- | Class providing function for parsing at many different types.
58--
59--   Note that for convenience of implementation, the default methods have
60--   definitions equivalent to 'undefined'.  The minimal definition is all of
61--   the visible methods.
62class Parseable ast where
63  -- | Parse a string with default mode.
64  parse :: String -> ParseResult ast
65  parse = parseWithMode defaultParseMode
66  -- | Parse a string with an explicit 'ParseMode'.
67  parseWithMode :: ParseMode -> String -> ParseResult ast
68  parseWithMode mode = runParserWithMode mode . parser $ fixities mode
69  -- | Parse a string with an explicit 'ParseMode', returning all comments along
70  --   with the AST.
71  parseWithComments :: ParseMode -> String -> ParseResult (ast, [Comment])
72  parseWithComments mode = runParserWithModeComments mode . parser $ fixities mode
73  -- | Internal parser, used to provide default definitions for the others.
74  parser :: Maybe [Fixity] -> P ast
75
76-- | The result of a parse.
77data ParseResult a
78    = ParseOk a  -- ^ The parse succeeded, yielding a value.
79    | ParseFailed SrcLoc String
80                -- ^ The parse failed at the specified
81                -- source location, with an error message.
82    deriving (Show, Ord, Eq)
83
84-- | Retrieve the result of a successful parse, throwing an
85--   error if the parse is actually not successful.
86fromParseResult :: ParseResult a -> a
87fromParseResult (ParseOk a) = a
88fromParseResult (ParseFailed loc str) = error $ "fromParseResult: Parse failed at ["
89                ++ srcFilename loc ++ "] (" ++ show (srcLine loc) ++ ":" ++ show (srcColumn loc) ++ "): " ++ str
90
91instance Functor ParseResult where
92  fmap f (ParseOk x)           = ParseOk $ f x
93  fmap _ (ParseFailed loc msg) = ParseFailed loc msg
94
95instance Applicative ParseResult where
96  pure = ParseOk
97  ParseOk f           <*> x = f <$> x
98  ParseFailed loc msg <*> _ = ParseFailed loc msg
99
100instance Monad ParseResult where
101  return = ParseOk
102#if !MIN_VERSION_base(4,13,0)
103  fail = Fail.fail
104#endif
105  ParseOk x           >>= f = f x
106  ParseFailed loc msg >>= _ = ParseFailed loc msg
107instance Fail.MonadFail ParseResult where
108  fail = ParseFailed noLoc
109
110instance Semigroup m => Semigroup (ParseResult m) where
111 ParseOk x <> ParseOk y = ParseOk $ x <> y
112 ParseOk _ <> err       = err
113 err       <> _         = err -- left-biased
114
115instance ( Monoid m , Semigroup m) => Monoid (ParseResult m) where
116  mempty = ParseOk mempty
117  mappend = (<>)
118
119-- internal version
120data ParseStatus a = Ok ParseState a | Failed SrcLoc String
121    deriving Show
122
123data LexContext = NoLayout | Layout Int
124    deriving (Eq,Ord,Show)
125
126data ExtContext = CodeCtxt | HarpCtxt | TagCtxt | ChildCtxt
127        | CloseTagCtxt | CodeTagCtxt
128    deriving (Eq,Ord,Show)
129
130type CtxtFlag = (Bool,Bool)
131-- (True,_) = We're in a do context.
132-- (_, True)= Next token must be a virtual closing brace.
133
134type ParseState = ([LexContext],[[KnownExtension]],[ExtContext],CtxtFlag,[Comment])
135
136indentOfParseState :: ParseState -> Int
137indentOfParseState (Layout n:_,_,_,_,_) = n
138indentOfParseState _                    = 0
139
140-- | Static parameters governing a parse.
141--   Note that the various parse functions in "Language.Haskell.Exts.Parser"
142--   never look at LANGUAGE pragmas, regardless of
143--   what the @ignoreLanguagePragmas@ flag is set to.
144--   Only the various @parseFile@ functions in "Language.Haskell.Exts" will
145--   act on it, when set to 'False'.
146
147data ParseMode = ParseMode {
148        -- | original name of the file being parsed
149        parseFilename :: String,
150        -- | base language (e.g. Haskell98, Haskell2010)
151        baseLanguage :: Language,
152        -- | list of extensions enabled for parsing
153        extensions :: [Extension],
154        -- | if 'True', the parser won't care about further extensions
155        --   in LANGUAGE pragmas in source files
156        ignoreLanguagePragmas :: Bool,
157        -- | if 'True', the parser won't read line position information
158        --   from LINE pragmas in source files
159        ignoreLinePragmas :: Bool,
160        -- | list of fixities to be aware of
161        fixities :: Maybe [Fixity],
162        -- | Checks whether functions have a consistent arity
163        ignoreFunctionArity :: Bool
164        }
165
166-- | Default parameters for a parse.
167--   The default is an unknown filename,
168--   no extensions (i.e. Haskell 98),
169--   don't ignore LANGUAGE pragmas, do ignore LINE pragmas,
170--   and be aware of fixities from the 'Prelude'.
171defaultParseMode :: ParseMode
172defaultParseMode = ParseMode {
173        parseFilename = "<unknown>.hs",
174        baseLanguage = Haskell2010,
175        extensions = [],
176        ignoreLanguagePragmas = False,
177        ignoreLinePragmas = True,
178        fixities = Just preludeFixities,
179        ignoreFunctionArity = False
180        }
181
182-- Version of ParseMode used internally,
183-- where the language and extensions have
184-- been expanded
185data InternalParseMode = IParseMode {
186        iParseFilename :: String,
187        iExtensions :: [KnownExtension],
188        -- iIgnoreLanguagePragmas :: Bool,
189        iIgnoreLinePragmas :: Bool,
190        iIgnoreFunctionArity :: Bool
191        -- iFixities :: Maybe [Fixity]
192    }
193
194toInternalParseMode :: ParseMode -> InternalParseMode
195toInternalParseMode (ParseMode pf bLang exts _ilang iline _fx farity) =
196    IParseMode pf (toExtensionList bLang exts) {-_ilang -} iline {- _fx -} farity
197
198
199-- | Monad for parsing
200
201newtype P a = P { runP ::
202                String              -- input string
203             -> Int                 -- current column
204             -> Int                 -- current line
205             -> SrcLoc              -- location of last token read
206             -> Char                -- Last token read used for lexing TypeApplication UGH
207             -> ParseState          -- layout info.
208             -> InternalParseMode   -- parse parameters
209             -> ParseStatus a
210        }
211
212runParserWithMode :: ParseMode -> P a -> String -> ParseResult a
213{-runParserWithMode mode (P m) s = case m s 0 1 start ([],[],(False,False),[]) mode of
214    Ok _ a -> ParseOk a
215    Failed loc msg -> ParseFailed loc msg
216    where start = SrcLoc {
217        srcFilename = parseFilename mode,
218        srcLine = 1,
219        srcColumn = 1
220    }
221-}
222runParserWithMode mode pm = fmap fst . runParserWithModeComments mode pm
223
224runParser :: P a -> String -> ParseResult a
225runParser = runParserWithMode defaultParseMode
226
227runParserWithModeComments :: ParseMode -> P a -> String -> ParseResult (a, [Comment])
228runParserWithModeComments mode = let mode2 = toInternalParseMode mode in \(P m) s ->
229  case m s 0 1 start '\n' ([],[],[],(False,False),[]) mode2 of
230    Ok (_,_,_,_,cs) a -> ParseOk (a, reverse cs)
231    Failed loc msg    -> ParseFailed loc msg
232    where start = SrcLoc {
233        srcFilename = parseFilename mode,
234        srcLine = 1,
235        srcColumn = 1
236        }
237  --        allExts mode@(ParseMode {extensions = es}) = mode { extensions = impliesExts es }
238
239    --      allExts mode = let imode = to
240
241instance Functor P where
242  fmap = liftM
243
244instance Applicative P where
245  pure = return
246  (<*>) = ap
247
248instance Monad P where
249    return a = P $ \_i _x _y _l _ch s _m -> Ok s a
250    P m >>= k = P $ \i x y l ch s mode ->
251        case m i x y l ch s mode of
252            Failed loc msg -> Failed loc msg
253            Ok s' a -> runP (k a) i x y l ch s' mode
254#if !MIN_VERSION_base(4,13,0)
255    fail   = Fail.fail
256#endif
257
258instance Fail.MonadFail P where
259    fail s = P $ \_r _col _line loc _ _stk _m -> Failed loc s
260
261atSrcLoc :: P a -> SrcLoc -> P a
262P m `atSrcLoc` loc = P $ \i x y _l ch -> m i x y loc ch
263
264getSrcLoc :: P SrcLoc
265getSrcLoc = P $ \_i _x _y l _ s _m -> Ok s l
266
267getModuleName :: P String
268getModuleName = P $ \_i _x _y _l _ch s m ->
269    let fn = iParseFilename m
270        mn = intercalate "." $ splitPath fn
271
272        splitPath :: String -> [String]
273        splitPath ""   = []
274        splitPath str  = let (l,str') = break ('\\'==) str
275                          in case str' of
276                              []      -> [removeSuffix l]
277                              (_:str'') -> l : splitPath str''
278
279        removeSuffix l = reverse $ tail $ dropWhile ('.'/=) $ reverse l
280
281     in Ok s mn
282
283-- Enter a new layout context.  If we are already in a layout context,
284-- ensure that the new indent is greater than the indent of that context.
285-- (So if the source loc is not to the right of the current indent, an
286-- empty list {} will be inserted.)
287
288pushCurrentContext :: P ()
289pushCurrentContext = do
290    lc <- getSrcLoc
291    indent <- currentIndent
292    dob <- pullDoStatus
293    let loc = srcColumn lc
294    when (dob && loc < indent
295           || not dob && loc <= indent) pushCtxtFlag
296    pushContext (Layout loc)
297
298currentIndent :: P Int
299currentIndent = P $ \_r _x _y _ _ stk _mode -> Ok stk (indentOfParseState stk)
300
301pushContext :: LexContext -> P ()
302pushContext ctxt =
303--trace ("pushing lexical scope: " ++ show ctxt ++"\n") $
304    P $ \_i _x _y _l _ (s, exts, e, p, c) _m -> Ok (ctxt:s, exts, e, p, c) ()
305
306popContext :: P ()
307popContext = P $ \_i _x _y loc _ stk _m ->
308      case stk of
309        (_:s, exts, e, p, c) -> --trace ("popping lexical scope, context now "++show s ++ "\n") $
310                          Ok (s, exts, e, p, c) ()
311        ([],_,_,_,_)   -> Failed loc "Unexpected }" -- error "Internal error: empty context in popContext"
312
313{-
314-- HaRP/Hsx
315pushExtContext :: ExtContext -> P ()
316pushExtContext ctxt = P $ \_i _x _y _l (s, e, p, c) _m -> Ok (s, ctxt:e, p, c) ()
317
318popExtContext :: P ()
319popExtContext = P $ \_i _x _y _l (s, e, p, c) _m ->
320    case e of
321     (_:e') ->
322       Ok (s, e', p, c) ()
323     [] -> error "Internal error: empty context in popExtContext"
324-}
325
326-- Extension-aware lexing/parsing
327getExtensions :: P [KnownExtension]
328getExtensions = P $ \_i _x _y _l _ s m ->
329    Ok s $ iExtensions m
330
331pushCtxtFlag :: P ()
332pushCtxtFlag =
333    P $ \_i _x _y _l _ (s, exts, e, (d,c), cs) _m -> case c of
334        False -> Ok (s, exts, e, (d,True), cs) ()
335        _     -> error "Internal error: context flag already pushed"
336
337pullDoStatus :: P Bool
338pullDoStatus = P $ \_i _x _y _l _ (s, exts, e, (d,c), cs) _m -> Ok (s,exts,e,(False,c),cs) d
339
340getIgnoreFunctionArity :: P Bool
341getIgnoreFunctionArity = P $ \_i _x _y _l _ s m ->
342  Ok s $ iIgnoreFunctionArity m
343
344
345
346
347----------------------------------------------------------------------------
348-- Monad for lexical analysis:
349-- a continuation-passing version of the parsing monad
350
351newtype Lex r a = Lex { runL :: (a -> P r) -> P r }
352
353instance Functor (Lex r) where
354    fmap = liftM
355
356instance Applicative (Lex r) where
357    pure = return
358    (<*>) = ap
359
360instance Monad (Lex r) where
361    return a = Lex $ \k -> k a
362    Lex v >>= f = Lex $ \k -> v (\a -> runL (f a) k)
363    Lex v >> Lex w = Lex $ \k -> v (\_ -> w k)
364#if !MIN_VERSION_base(4,13,0)
365    fail   = Fail.fail
366#endif
367
368instance Fail.MonadFail (Lex r) where
369    fail s = Lex $ \_ -> fail s
370
371-- Operations on this monad
372
373getInput :: Lex r String
374getInput = Lex $ \cont -> P $ \r -> runP (cont r) r
375
376-- | Discard some input characters (these must not include tabs or newlines).
377
378discard :: Int -> Lex r ()
379discard n = Lex $ \cont -> P $ \r x y loc ch
380                        -> let (newCh:rest)= if n > 0 then drop (n-1) r else (ch:r)
381                           in runP (cont ()) rest (x+n) y loc newCh
382
383-- | Get the last discarded character.
384-- This is only used for type application.
385
386getLastChar :: Lex r Char
387getLastChar = Lex $ \cont -> P $ \r x y loc ch -> runP (cont ch) r x y loc ch
388
389
390-- | Discard the next character, which must be a newline.
391
392lexNewline :: Lex a ()
393lexNewline = Lex $ \cont -> P $ \rs _x y loc  ->
394  case rs of
395    (_:r) -> runP (cont ()) r 1 (y+1) loc
396    []    -> \_ _ _ -> Failed loc "Lexer: expected newline."
397
398-- | Discard the next character, which must be a tab.
399
400lexTab :: Lex a ()
401lexTab = Lex $ \cont -> P $ \(_:r) x -> runP (cont ()) r (nextTab x)
402
403nextTab :: Int -> Int
404nextTab x = x + (tAB_LENGTH - (x-1) `mod` tAB_LENGTH)
405
406tAB_LENGTH :: Int
407tAB_LENGTH = 8
408
409-- Consume and return the largest string of characters satisfying p
410
411lexWhile :: (Char -> Bool) -> Lex a String
412lexWhile p = Lex $ \cont -> P $ \rss c l loc char ->
413  case rss of
414    [] -> runP (cont []) [] c l loc char
415    (r:rs) ->
416      let
417        l' = case r of
418              '\n' -> l + 1
419              _    -> l
420        c' = case r of
421              '\n' -> 1
422              _    -> c + 1
423       in if p r
424            then runP (runL ((r:) <$> lexWhile p) cont) rs c' l' loc r
425            else runP (cont []) (r:rs) c l loc char
426
427-- | lexWhile without the return value.
428lexWhile_ :: (Char -> Bool) -> Lex a ()
429lexWhile_ p = do _ <- lexWhile p
430                 return ()
431
432-- An alternative scan, to which we can return if subsequent scanning
433-- is unsuccessful.
434
435alternative :: Lex a v -> Lex a (Lex a v)
436alternative (Lex v) = Lex $ \cont -> P $ \r x y ->
437    runP (cont (Lex $ \cont' -> P $ \_r _x _y ->
438        runP (v cont') r x y)) r x y
439
440-- The source location is the coordinates of the previous token,
441-- or, while scanning a token, the start of the current token.
442
443-- col is the current column in the source file.
444-- We also need to remember between scanning tokens whether we are
445-- somewhere at the beginning of the line before the first token.
446-- This could be done with an extra Bool argument to the P monad,
447-- but as a hack we use a col value of 0 to indicate this situation.
448
449-- Setting col to 0 is used in two places: just after emitting a virtual
450-- close brace due to layout, so that next time through we check whether
451-- we also need to emit a semi-colon, and at the beginning of the file,
452-- by runParser, to kick off the lexer.
453-- Thus when col is zero, the true column can be taken from the loc.
454
455checkBOL :: Lex a Bool
456checkBOL = Lex $ \cont -> P $ \r x y loc ->
457        if x == 0 then runP (cont True) r (srcColumn loc) y loc
458            else runP (cont False) r x y loc
459
460setBOL :: Lex a ()
461setBOL = Lex $ \cont -> P $ \r _ -> runP (cont ()) r 0
462
463-- Set the loc to the current position
464
465startToken :: Lex a ()
466startToken = Lex $ \cont -> P $ \s x y _ c stk mode ->
467    let loc = SrcLoc {
468        srcFilename = iParseFilename mode,
469        srcLine = y,
470        srcColumn = x
471    } in
472    runP (cont ()) s x y loc c stk mode
473
474-- Current status with respect to the offside (layout) rule:
475-- LT: we are to the left of the current indent (if any)
476-- EQ: we are at the current indent (if any)
477-- GT: we are to the right of the current indent, or not subject to layout
478
479getOffside :: Lex a Ordering
480getOffside = Lex $ \cont -> P $ \r x y loc ch stk ->
481        runP (cont (compare x (indentOfParseState stk))) r x y loc ch stk
482
483getSrcLocL :: Lex a SrcLoc
484getSrcLocL = Lex $ \cont -> P $ \i x y l ->
485        runP (cont (l { srcLine = y, srcColumn = x })) i x y l
486
487setSrcLineL :: Int -> Lex a ()
488setSrcLineL y = Lex $ \cont -> P $ \i x _ ->
489        runP (cont ()) i x y
490
491pushContextL :: LexContext -> Lex a ()
492pushContextL ctxt = Lex $ \cont -> P $ \r x y loc ch (stk, exts, e, pst, cs) ->
493        runP (cont ()) r x y loc ch (ctxt:stk, exts, e, pst, cs)
494
495popContextL :: String -> Lex a ()
496popContextL _ = Lex $ \cont -> P $ \r x y loc ch stk m -> case stk of
497        (_:ctxt, exts, e, pst, cs) -> runP (cont ()) r x y loc ch (ctxt, exts, e, pst, cs) m
498        ([], _, _, _, _)           -> Failed loc "Unexpected }"
499
500pullCtxtFlag :: Lex a Bool
501pullCtxtFlag = Lex $ \cont -> P $ \r x y loc ch (ct, exts, e, (d,c), cs) ->
502        runP (cont c) r x y loc ch (ct, exts, e, (d,False), cs)
503
504
505flagDo :: Lex a ()
506flagDo = Lex $ \cont -> P $ \r x y loc ch (ct, exts, e, (_,c), cs) ->
507        runP (cont ()) r x y loc ch (ct, exts, e, (True,c), cs)
508
509
510-- Harp/Hsx
511
512getExtContext :: Lex a (Maybe ExtContext)
513getExtContext = Lex $ \cont -> P $ \r x y loc ch stk@(_, _, e, _, _) ->
514        let me = case e of
515              [] -> Nothing
516              (c:_) -> Just c
517        in runP (cont me) r x y loc ch stk
518
519pushExtContextL :: ExtContext -> Lex a ()
520pushExtContextL ec = Lex $ \cont -> P $ \r x y loc ch (s, exts, e, p, c) ->
521        runP (cont ()) r x y loc ch (s, exts, ec:e, p, c)
522
523popExtContextL :: String -> Lex a ()
524popExtContextL fn = Lex $ \cont -> P $ \r x y loc ch (s,exts,e,p,c) m -> case e of
525            (_:ec)   -> runP (cont ()) r x y loc ch (s,exts,ec,p,c) m
526            []       -> Failed loc ("Internal error: empty tag context in " ++ fn)
527
528
529-- Extension-aware lexing
530
531getExtensionsL :: Lex a [KnownExtension]
532getExtensionsL = Lex $ \cont -> P $ \r x y loc ch s m ->
533        runP (cont $ iExtensions m) r x y loc ch s m
534
535-- | Add an extension to the current configuration.
536addExtensionL :: KnownExtension -> Lex a ()
537addExtensionL ext = Lex $ \cont -> P $ \r x y loc ch (s, oldExts, e, p, c) m ->
538        let newExts = impliesExts [ext] ++ iExtensions m
539        in runP (cont ()) r x y loc ch (s, oldExts, e, p, c) (m {iExtensions = newExts})
540
541-- | Save the current configuration of extensions.
542saveExtensionsL :: Lex a ()
543saveExtensionsL = Lex $ \cont -> P $ \r x y loc ch (s, oldExts, e, p, c) m ->
544        runP (cont ()) r x y loc ch (s, iExtensions m:oldExts, e, p, c) m
545
546-- | Return to the previous saved extensions configuration.
547restoreExtensionsL :: Lex a ()
548restoreExtensionsL = Lex $ \cont -> P $ \r x y loc ch (s,exts,e,p,c) m -> case exts of
549            (_:prev) -> runP (cont ()) r x y loc ch (s,prev,e,p,c) m
550            _        -> Failed loc "Internal error: empty extension stack"
551
552-- LINE-aware lexing
553
554ignoreLinePragmasL :: Lex a Bool
555ignoreLinePragmasL = Lex $ \cont -> P $ \r x y loc c s m ->
556        runP (cont $ iIgnoreLinePragmas m) r x y loc c s m
557
558-- If we read a file name in a LINE pragma, we should update the state.
559setLineFilenameL :: String -> Lex a ()
560setLineFilenameL name = Lex $ \cont -> P $ \r x y loc ch s m ->
561        runP (cont ()) r x y loc ch s (m {iParseFilename = name})
562
563-- Comments
564
565pushComment :: Comment -> Lex a ()
566pushComment c = Lex $ \cont -> P $ \r x y loc ch (s, exts, e, p, cs) ->
567        runP (cont ()) r x y loc ch (s, exts, e, p, c:cs)
568