1-- -----------------------------------------------------------------------------
2--
3-- ParseMonad.hs, part of Alex
4--
5-- (c) Simon Marlow 2003
6--
7-- ----------------------------------------------------------------------------}
8
9module ParseMonad (
10        AlexInput, alexInputPrevChar, alexGetChar, alexGetByte,
11        AlexPosn(..), alexStartPos,
12
13        P, runP, StartCode, failP, lookupSMac, lookupRMac, newSMac, newRMac,
14        setStartCode, getStartCode, getInput, setInput,
15 ) where
16
17import AbsSyn hiding ( StartCode )
18import CharSet ( CharSet )
19import Map ( Map )
20import qualified Map hiding ( Map )
21import UTF8
22
23#if __GLASGOW_HASKELL__ < 710
24import Control.Applicative ( Applicative(..) )
25#endif
26import Control.Monad ( liftM, ap )
27import Data.Word (Word8)
28-- -----------------------------------------------------------------------------
29-- The input type
30--import Codec.Binary.UTF8.Light as UTF8
31
32type Byte = Word8
33
34type AlexInput = (AlexPosn,     -- current position,
35                  Char,         -- previous char
36                  [Byte],
37                  String)       -- current input string
38
39alexInputPrevChar :: AlexInput -> Char
40alexInputPrevChar (_,c,_,_) = c
41
42
43alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
44alexGetChar (_,_,[],[]) = Nothing
45alexGetChar (p,_,[],(c:s))  = let p' = alexMove p c in p' `seq`
46                                Just (c, (p', c, [], s))
47alexGetChar (_, _ ,_ : _, _) = undefined -- hide compiler warning
48
49alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
50alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s))
51alexGetByte (_,_,[],[]) = Nothing
52alexGetByte (p,_,[],(c:s))  = let p' = alexMove p c
53                                  (b:bs) = UTF8.encode c
54                              in p' `seq`  Just (b, (p', c, bs, s))
55
56-- -----------------------------------------------------------------------------
57-- Token positions
58
59-- `Posn' records the location of a token in the input text.  It has three
60-- fields: the address (number of chacaters preceding the token), line number
61-- and column of a token within the file. `start_pos' gives the position of the
62-- start of the file and `eof_pos' a standard encoding for the end of file.
63-- `move_pos' calculates the new position after traversing a given character,
64-- assuming the usual eight character tab stops.
65
66data AlexPosn = AlexPn !Int !Int !Int
67        deriving (Eq,Show)
68
69alexStartPos :: AlexPosn
70alexStartPos = AlexPn 0 1 1
71
72alexMove :: AlexPosn -> Char -> AlexPosn
73alexMove (AlexPn a l c) '\t' = AlexPn (a+1)  l     (((c+7) `div` 8)*8+1)
74alexMove (AlexPn a l _) '\n' = AlexPn (a+1) (l+1)   1
75alexMove (AlexPn a l c) _    = AlexPn (a+1)  l     (c+1)
76
77-- -----------------------------------------------------------------------------
78-- Alex lexing/parsing monad
79
80type ParseError = (Maybe AlexPosn, String)
81type StartCode = Int
82
83data PState = PState {
84                smac_env  :: Map String CharSet,
85                rmac_env  :: Map String RExp,
86                startcode :: Int,
87                input     :: AlexInput
88             }
89
90newtype P a = P { unP :: PState -> Either ParseError (PState,a) }
91
92instance Functor P where
93  fmap = liftM
94
95instance Applicative P where
96  pure a = P $ \env -> Right (env,a)
97  (<*>) = ap
98
99instance Monad P where
100 (P m) >>= k = P $ \env -> case m env of
101                        Left err -> Left err
102                        Right (env',ok) -> unP (k ok) env'
103 return = pure
104
105runP :: String -> (Map String CharSet, Map String RExp)
106        -> P a -> Either ParseError a
107runP str (senv,renv) (P p)
108  = case p initial_state of
109        Left err -> Left err
110        Right (_,a) -> Right a
111 where initial_state =
112          PState{ smac_env=senv, rmac_env=renv,
113             startcode = 0, input=(alexStartPos,'\n',[],str) }
114
115failP :: String -> P a
116failP str = P $ \PState{ input = (p,_,_,_) } -> Left (Just p,str)
117
118-- Macros are expanded during parsing, to simplify the abstract
119-- syntax.  The parsing monad passes around two environments mapping
120-- macro names to sets and regexps respectively.
121
122lookupSMac :: (AlexPosn,String) -> P CharSet
123lookupSMac (posn,smac)
124 = P $ \s@PState{ smac_env = senv } ->
125       case Map.lookup smac senv of
126        Just ok -> Right (s,ok)
127        Nothing -> Left (Just posn, "unknown set macro: $" ++ smac)
128
129lookupRMac :: String -> P RExp
130lookupRMac rmac
131 = P $ \s@PState{ rmac_env = renv } ->
132       case Map.lookup rmac renv of
133        Just ok -> Right (s,ok)
134        Nothing -> Left (Nothing, "unknown regex macro: %" ++ rmac)
135
136newSMac :: String -> CharSet -> P ()
137newSMac smac set
138  = P $ \s -> Right (s{smac_env = Map.insert smac set (smac_env s)}, ())
139
140newRMac :: String -> RExp -> P ()
141newRMac rmac rexp
142  = P $ \s -> Right (s{rmac_env = Map.insert rmac rexp (rmac_env s)}, ())
143
144setStartCode :: StartCode -> P ()
145setStartCode sc = P $ \s -> Right (s{ startcode = sc }, ())
146
147getStartCode :: P StartCode
148getStartCode = P $ \s -> Right (s, startcode s)
149
150getInput :: P AlexInput
151getInput = P $ \s -> Right (s, input s)
152
153setInput :: AlexInput -> P ()
154setInput inp = P $ \s -> Right (s{ input = inp }, ())
155