1-- ----------------------------------------------------------------------------- 2-- 3-- ParseMonad.hs, part of Alex 4-- 5-- (c) Simon Marlow 2003 6-- 7-- ----------------------------------------------------------------------------} 8 9module ParseMonad ( 10 AlexInput, alexInputPrevChar, alexGetChar, alexGetByte, 11 AlexPosn(..), alexStartPos, 12 13 P, runP, StartCode, failP, lookupSMac, lookupRMac, newSMac, newRMac, 14 setStartCode, getStartCode, getInput, setInput, 15 ) where 16 17import AbsSyn hiding ( StartCode ) 18import CharSet ( CharSet ) 19import Map ( Map ) 20import qualified Map hiding ( Map ) 21import UTF8 22 23#if __GLASGOW_HASKELL__ < 710 24import Control.Applicative ( Applicative(..) ) 25#endif 26import Control.Monad ( liftM, ap ) 27import Data.Word (Word8) 28-- ----------------------------------------------------------------------------- 29-- The input type 30--import Codec.Binary.UTF8.Light as UTF8 31 32type Byte = Word8 33 34type AlexInput = (AlexPosn, -- current position, 35 Char, -- previous char 36 [Byte], 37 String) -- current input string 38 39alexInputPrevChar :: AlexInput -> Char 40alexInputPrevChar (_,c,_,_) = c 41 42 43alexGetChar :: AlexInput -> Maybe (Char,AlexInput) 44alexGetChar (_,_,[],[]) = Nothing 45alexGetChar (p,_,[],(c:s)) = let p' = alexMove p c in p' `seq` 46 Just (c, (p', c, [], s)) 47alexGetChar (_, _ ,_ : _, _) = undefined -- hide compiler warning 48 49alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) 50alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s)) 51alexGetByte (_,_,[],[]) = Nothing 52alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c 53 (b:bs) = UTF8.encode c 54 in p' `seq` Just (b, (p', c, bs, s)) 55 56-- ----------------------------------------------------------------------------- 57-- Token positions 58 59-- `Posn' records the location of a token in the input text. It has three 60-- fields: the address (number of chacaters preceding the token), line number 61-- and column of a token within the file. `start_pos' gives the position of the 62-- start of the file and `eof_pos' a standard encoding for the end of file. 63-- `move_pos' calculates the new position after traversing a given character, 64-- assuming the usual eight character tab stops. 65 66data AlexPosn = AlexPn !Int !Int !Int 67 deriving (Eq,Show) 68 69alexStartPos :: AlexPosn 70alexStartPos = AlexPn 0 1 1 71 72alexMove :: AlexPosn -> Char -> AlexPosn 73alexMove (AlexPn a l c) '\t' = AlexPn (a+1) l (((c+7) `div` 8)*8+1) 74alexMove (AlexPn a l _) '\n' = AlexPn (a+1) (l+1) 1 75alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1) 76 77-- ----------------------------------------------------------------------------- 78-- Alex lexing/parsing monad 79 80type ParseError = (Maybe AlexPosn, String) 81type StartCode = Int 82 83data PState = PState { 84 smac_env :: Map String CharSet, 85 rmac_env :: Map String RExp, 86 startcode :: Int, 87 input :: AlexInput 88 } 89 90newtype P a = P { unP :: PState -> Either ParseError (PState,a) } 91 92instance Functor P where 93 fmap = liftM 94 95instance Applicative P where 96 pure a = P $ \env -> Right (env,a) 97 (<*>) = ap 98 99instance Monad P where 100 (P m) >>= k = P $ \env -> case m env of 101 Left err -> Left err 102 Right (env',ok) -> unP (k ok) env' 103 return = pure 104 105runP :: String -> (Map String CharSet, Map String RExp) 106 -> P a -> Either ParseError a 107runP str (senv,renv) (P p) 108 = case p initial_state of 109 Left err -> Left err 110 Right (_,a) -> Right a 111 where initial_state = 112 PState{ smac_env=senv, rmac_env=renv, 113 startcode = 0, input=(alexStartPos,'\n',[],str) } 114 115failP :: String -> P a 116failP str = P $ \PState{ input = (p,_,_,_) } -> Left (Just p,str) 117 118-- Macros are expanded during parsing, to simplify the abstract 119-- syntax. The parsing monad passes around two environments mapping 120-- macro names to sets and regexps respectively. 121 122lookupSMac :: (AlexPosn,String) -> P CharSet 123lookupSMac (posn,smac) 124 = P $ \s@PState{ smac_env = senv } -> 125 case Map.lookup smac senv of 126 Just ok -> Right (s,ok) 127 Nothing -> Left (Just posn, "unknown set macro: $" ++ smac) 128 129lookupRMac :: String -> P RExp 130lookupRMac rmac 131 = P $ \s@PState{ rmac_env = renv } -> 132 case Map.lookup rmac renv of 133 Just ok -> Right (s,ok) 134 Nothing -> Left (Nothing, "unknown regex macro: %" ++ rmac) 135 136newSMac :: String -> CharSet -> P () 137newSMac smac set 138 = P $ \s -> Right (s{smac_env = Map.insert smac set (smac_env s)}, ()) 139 140newRMac :: String -> RExp -> P () 141newRMac rmac rexp 142 = P $ \s -> Right (s{rmac_env = Map.insert rmac rexp (rmac_env s)}, ()) 143 144setStartCode :: StartCode -> P () 145setStartCode sc = P $ \s -> Right (s{ startcode = sc }, ()) 146 147getStartCode :: P StartCode 148getStartCode = P $ \s -> Right (s, startcode s) 149 150getInput :: P AlexInput 151getInput = P $ \s -> Right (s, input s) 152 153setInput :: AlexInput -> P () 154setInput inp = P $ \s -> Right (s{ input = inp }, ()) 155