1-- ----------------------------------------------------------------------------- 2-- Alex wrapper code. 3-- 4-- This code is in the PUBLIC DOMAIN; you may copy it freely and use 5-- it for any purpose whatsoever. 6 7#if defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) 8import Control.Applicative as App (Applicative (..)) 9#endif 10 11import Data.Word (Word8) 12#if defined(ALEX_BASIC_BYTESTRING) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) 13 14import Data.Int (Int64) 15import qualified Data.Char 16import qualified Data.ByteString.Lazy as ByteString 17import qualified Data.ByteString.Internal as ByteString (w2c) 18 19#elif defined(ALEX_STRICT_BYTESTRING) 20 21import qualified Data.Char 22import qualified Data.ByteString as ByteString 23import qualified Data.ByteString.Internal as ByteString hiding (ByteString) 24import qualified Data.ByteString.Unsafe as ByteString 25 26#else 27 28import Data.Char (ord) 29import qualified Data.Bits 30 31-- | Encode a Haskell String to a list of Word8 values, in UTF8 format. 32utf8Encode :: Char -> [Word8] 33utf8Encode = uncurry (:) . utf8Encode' 34 35utf8Encode' :: Char -> (Word8, [Word8]) 36utf8Encode' c = case go (ord c) of 37 (x, xs) -> (fromIntegral x, map fromIntegral xs) 38 where 39 go oc 40 | oc <= 0x7f = ( oc 41 , [ 42 ]) 43 44 | oc <= 0x7ff = ( 0xc0 + (oc `Data.Bits.shiftR` 6) 45 , [0x80 + oc Data.Bits..&. 0x3f 46 ]) 47 48 | oc <= 0xffff = ( 0xe0 + (oc `Data.Bits.shiftR` 12) 49 , [0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) 50 , 0x80 + oc Data.Bits..&. 0x3f 51 ]) 52 | otherwise = ( 0xf0 + (oc `Data.Bits.shiftR` 18) 53 , [0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f) 54 , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) 55 , 0x80 + oc Data.Bits..&. 0x3f 56 ]) 57 58#endif 59 60type Byte = Word8 61 62-- ----------------------------------------------------------------------------- 63-- The input type 64 65#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_GSCAN) 66type AlexInput = (AlexPosn, -- current position, 67 Char, -- previous char 68 [Byte], -- pending bytes on current char 69 String) -- current input string 70 71ignorePendingBytes :: AlexInput -> AlexInput 72ignorePendingBytes (p,c,_ps,s) = (p,c,[],s) 73 74alexInputPrevChar :: AlexInput -> Char 75alexInputPrevChar (_p,c,_bs,_s) = c 76 77alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) 78alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s)) 79alexGetByte (_,_,[],[]) = Nothing 80alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c 81 in case utf8Encode' c of 82 (b, bs) -> p' `seq` Just (b, (p', c, bs, s)) 83#endif 84 85#if defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) 86type AlexInput = (AlexPosn, -- current position, 87 Char, -- previous char 88 ByteString.ByteString, -- current input string 89 Int64) -- bytes consumed so far 90 91ignorePendingBytes :: AlexInput -> AlexInput 92ignorePendingBytes i = i -- no pending bytes when lexing bytestrings 93 94alexInputPrevChar :: AlexInput -> Char 95alexInputPrevChar (_,c,_,_) = c 96 97alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) 98alexGetByte (p,_,cs,n) = 99 case ByteString.uncons cs of 100 Nothing -> Nothing 101 Just (b, cs') -> 102 let c = ByteString.w2c b 103 p' = alexMove p c 104 n' = n+1 105 in p' `seq` cs' `seq` n' `seq` Just (b, (p', c, cs',n')) 106#endif 107 108#ifdef ALEX_BASIC_BYTESTRING 109data AlexInput = AlexInput { alexChar :: {-# UNPACK #-} !Char, -- previous char 110 alexStr :: !ByteString.ByteString, -- current input string 111 alexBytePos :: {-# UNPACK #-} !Int64} -- bytes consumed so far 112 113alexInputPrevChar :: AlexInput -> Char 114alexInputPrevChar = alexChar 115 116alexGetByte (AlexInput {alexStr=cs,alexBytePos=n}) = 117 case ByteString.uncons cs of 118 Nothing -> Nothing 119 Just (c, rest) -> 120 Just (c, AlexInput { 121 alexChar = ByteString.w2c c, 122 alexStr = rest, 123 alexBytePos = n+1}) 124#endif 125 126#ifdef ALEX_STRICT_BYTESTRING 127data AlexInput = AlexInput { alexChar :: {-# UNPACK #-} !Char, 128 alexStr :: {-# UNPACK #-} !ByteString.ByteString, 129 alexBytePos :: {-# UNPACK #-} !Int} 130 131alexInputPrevChar :: AlexInput -> Char 132alexInputPrevChar = alexChar 133 134alexGetByte (AlexInput {alexStr=cs,alexBytePos=n}) = 135 case ByteString.uncons cs of 136 Nothing -> Nothing 137 Just (c, rest) -> 138 Just (c, AlexInput { 139 alexChar = ByteString.w2c c, 140 alexStr = rest, 141 alexBytePos = n+1}) 142#endif 143 144-- ----------------------------------------------------------------------------- 145-- Token positions 146 147-- `Posn' records the location of a token in the input text. It has three 148-- fields: the address (number of chacaters preceding the token), line number 149-- and column of a token within the file. `start_pos' gives the position of the 150-- start of the file and `eof_pos' a standard encoding for the end of file. 151-- `move_pos' calculates the new position after traversing a given character, 152-- assuming the usual eight character tab stops. 153 154#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_GSCAN) 155data AlexPosn = AlexPn !Int !Int !Int 156 deriving (Eq,Show) 157 158alexStartPos :: AlexPosn 159alexStartPos = AlexPn 0 1 1 160 161alexMove :: AlexPosn -> Char -> AlexPosn 162alexMove (AlexPn a l c) '\t' = AlexPn (a+1) l (c+alex_tab_size-((c-1) `mod` alex_tab_size)) 163alexMove (AlexPn a l _) '\n' = AlexPn (a+1) (l+1) 1 164alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1) 165#endif 166 167-- ----------------------------------------------------------------------------- 168-- Monad (default and with ByteString input) 169 170#if defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) 171data AlexState = AlexState { 172 alex_pos :: !AlexPosn, -- position at current input location 173#ifndef ALEX_MONAD_BYTESTRING 174 alex_inp :: String, -- the current input 175 alex_chr :: !Char, -- the character before the input 176 alex_bytes :: [Byte], 177#else /* ALEX_MONAD_BYTESTRING */ 178 alex_bpos:: !Int64, -- bytes consumed so far 179 alex_inp :: ByteString.ByteString, -- the current input 180 alex_chr :: !Char, -- the character before the input 181#endif /* ALEX_MONAD_BYTESTRING */ 182 alex_scd :: !Int -- the current startcode 183#ifdef ALEX_MONAD_USER_STATE 184 , alex_ust :: AlexUserState -- AlexUserState will be defined in the user program 185#endif 186 } 187 188-- Compile with -funbox-strict-fields for best results! 189 190#ifndef ALEX_MONAD_BYTESTRING 191runAlex :: String -> Alex a -> Either String a 192runAlex input__ (Alex f) 193 = case f (AlexState {alex_bytes = [], 194#else /* ALEX_MONAD_BYTESTRING */ 195runAlex :: ByteString.ByteString -> Alex a -> Either String a 196runAlex input__ (Alex f) 197 = case f (AlexState {alex_bpos = 0, 198#endif /* ALEX_MONAD_BYTESTRING */ 199 alex_pos = alexStartPos, 200 alex_inp = input__, 201 alex_chr = '\n', 202#ifdef ALEX_MONAD_USER_STATE 203 alex_ust = alexInitUserState, 204#endif 205 alex_scd = 0}) of Left msg -> Left msg 206 Right ( _, a ) -> Right a 207 208newtype Alex a = Alex { unAlex :: AlexState -> Either String (AlexState, a) } 209 210instance Functor Alex where 211 fmap f a = Alex $ \s -> case unAlex a s of 212 Left msg -> Left msg 213 Right (s', a') -> Right (s', f a') 214 215instance Applicative Alex where 216 pure a = Alex $ \s -> Right (s, a) 217 fa <*> a = Alex $ \s -> case unAlex fa s of 218 Left msg -> Left msg 219 Right (s', f) -> case unAlex a s' of 220 Left msg -> Left msg 221 Right (s'', b) -> Right (s'', f b) 222 223instance Monad Alex where 224 m >>= k = Alex $ \s -> case unAlex m s of 225 Left msg -> Left msg 226 Right (s',a) -> unAlex (k a) s' 227 return = App.pure 228 229alexGetInput :: Alex AlexInput 230alexGetInput 231#ifndef ALEX_MONAD_BYTESTRING 232 = Alex $ \s@AlexState{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} -> 233 Right (s, (pos,c,bs,inp__)) 234#else /* ALEX_MONAD_BYTESTRING */ 235 = Alex $ \s@AlexState{alex_pos=pos,alex_bpos=bpos,alex_chr=c,alex_inp=inp__} -> 236 Right (s, (pos,c,inp__,bpos)) 237#endif /* ALEX_MONAD_BYTESTRING */ 238 239alexSetInput :: AlexInput -> Alex () 240#ifndef ALEX_MONAD_BYTESTRING 241alexSetInput (pos,c,bs,inp__) 242 = Alex $ \s -> case s{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} of 243#else /* ALEX_MONAD_BYTESTRING */ 244alexSetInput (pos,c,inp__,bpos) 245 = Alex $ \s -> case s{alex_pos=pos, 246 alex_bpos=bpos, 247 alex_chr=c, 248 alex_inp=inp__} of 249#endif /* ALEX_MONAD_BYTESTRING */ 250 state__@(AlexState{}) -> Right (state__, ()) 251 252alexError :: String -> Alex a 253alexError message = Alex $ const $ Left message 254 255alexGetStartCode :: Alex Int 256alexGetStartCode = Alex $ \s@AlexState{alex_scd=sc} -> Right (s, sc) 257 258alexSetStartCode :: Int -> Alex () 259alexSetStartCode sc = Alex $ \s -> Right (s{alex_scd=sc}, ()) 260 261#if !defined(ALEX_MONAD_BYTESTRING) && defined(ALEX_MONAD_USER_STATE) 262alexGetUserState :: Alex AlexUserState 263alexGetUserState = Alex $ \s@AlexState{alex_ust=ust} -> Right (s,ust) 264 265alexSetUserState :: AlexUserState -> Alex () 266alexSetUserState ss = Alex $ \s -> Right (s{alex_ust=ss}, ()) 267#endif /* !defined(ALEX_MONAD_BYTESTRING) && defined(ALEX_MONAD_USER_STATE) */ 268 269alexMonadScan = do 270#ifndef ALEX_MONAD_BYTESTRING 271 inp__ <- alexGetInput 272#else /* ALEX_MONAD_BYTESTRING */ 273 inp__@(_,_,_,n) <- alexGetInput 274#endif /* ALEX_MONAD_BYTESTRING */ 275 sc <- alexGetStartCode 276 case alexScan inp__ sc of 277 AlexEOF -> alexEOF 278 AlexError ((AlexPn _ line column),_,_,_) -> alexError $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column) 279 AlexSkip inp__' _len -> do 280 alexSetInput inp__' 281 alexMonadScan 282#ifndef ALEX_MONAD_BYTESTRING 283 AlexToken inp__' len action -> do 284#else /* ALEX_MONAD_BYTESTRING */ 285 AlexToken inp__'@(_,_,_,n') _ action -> let len = n'-n in do 286#endif /* ALEX_MONAD_BYTESTRING */ 287 alexSetInput inp__' 288 action (ignorePendingBytes inp__) len 289 290-- ----------------------------------------------------------------------------- 291-- Useful token actions 292 293#ifndef ALEX_MONAD_BYTESTRING 294type AlexAction result = AlexInput -> Int -> Alex result 295#else /* ALEX_MONAD_BYTESTRING */ 296type AlexAction result = AlexInput -> Int64 -> Alex result 297#endif /* ALEX_MONAD_BYTESTRING */ 298 299-- just ignore this token and scan another one 300-- skip :: AlexAction result 301skip _input _len = alexMonadScan 302 303-- ignore this token, but set the start code to a new value 304-- begin :: Int -> AlexAction result 305begin code _input _len = do alexSetStartCode code; alexMonadScan 306 307-- perform an action for this token, and set the start code to a new value 308andBegin :: AlexAction result -> Int -> AlexAction result 309(action `andBegin` code) input__ len = do 310 alexSetStartCode code 311 action input__ len 312 313#ifndef ALEX_MONAD_BYTESTRING 314token :: (AlexInput -> Int -> token) -> AlexAction token 315#else /* ALEX_MONAD_BYTESTRING */ 316token :: (AlexInput -> Int64 -> token) -> AlexAction token 317#endif /* ALEX_MONAD_BYTESTRING */ 318token t input__ len = return (t input__ len) 319#endif /* defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) */ 320 321 322-- ----------------------------------------------------------------------------- 323-- Basic wrapper 324 325#ifdef ALEX_BASIC 326type AlexInput = (Char,[Byte],String) 327 328alexInputPrevChar :: AlexInput -> Char 329alexInputPrevChar (c,_,_) = c 330 331-- alexScanTokens :: String -> [token] 332alexScanTokens str = go ('\n',[],str) 333 where go inp__@(_,_bs,s) = 334 case alexScan inp__ 0 of 335 AlexEOF -> [] 336 AlexError _ -> error "lexical error" 337 AlexSkip inp__' _ln -> go inp__' 338 AlexToken inp__' len act -> act (take len s) : go inp__' 339 340alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) 341alexGetByte (c,(b:bs),s) = Just (b,(c,bs,s)) 342alexGetByte (_,[],[]) = Nothing 343alexGetByte (_,[],(c:s)) = case utf8Encode' c of 344 (b, bs) -> Just (b, (c, bs, s)) 345#endif 346 347 348-- ----------------------------------------------------------------------------- 349-- Basic wrapper, ByteString version 350 351#ifdef ALEX_BASIC_BYTESTRING 352 353-- alexScanTokens :: ByteString.ByteString -> [token] 354alexScanTokens str = go (AlexInput '\n' str 0) 355 where go inp__ = 356 case alexScan inp__ 0 of 357 AlexEOF -> [] 358 AlexError _ -> error "lexical error" 359 AlexSkip inp__' _len -> go inp__' 360 AlexToken inp__' _ act -> 361 let len = alexBytePos inp__' - alexBytePos inp__ in 362 act (ByteString.take len (alexStr inp__)) : go inp__' 363 364#endif 365 366#ifdef ALEX_STRICT_BYTESTRING 367 368-- alexScanTokens :: ByteString.ByteString -> [token] 369alexScanTokens str = go (AlexInput '\n' str 0) 370 where go inp__ = 371 case alexScan inp__ 0 of 372 AlexEOF -> [] 373 AlexError _ -> error "lexical error" 374 AlexSkip inp__' _len -> go inp__' 375 AlexToken inp__' _ act -> 376 let len = alexBytePos inp__' - alexBytePos inp__ in 377 act (ByteString.take len (alexStr inp__)) : go inp__' 378 379#endif 380 381 382-- ----------------------------------------------------------------------------- 383-- Posn wrapper 384 385-- Adds text positions to the basic model. 386 387#ifdef ALEX_POSN 388--alexScanTokens :: String -> [token] 389alexScanTokens str0 = go (alexStartPos,'\n',[],str0) 390 where go inp__@(pos,_,_,str) = 391 case alexScan inp__ 0 of 392 AlexEOF -> [] 393 AlexError ((AlexPn _ line column),_,_,_) -> error $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column) 394 AlexSkip inp__' _ln -> go inp__' 395 AlexToken inp__' len act -> act pos (take len str) : go inp__' 396#endif 397 398 399-- ----------------------------------------------------------------------------- 400-- Posn wrapper, ByteString version 401 402#ifdef ALEX_POSN_BYTESTRING 403--alexScanTokens :: ByteString.ByteString -> [token] 404alexScanTokens str0 = go (alexStartPos,'\n',str0,0) 405 where go inp__@(pos,_,str,n) = 406 case alexScan inp__ 0 of 407 AlexEOF -> [] 408 AlexError ((AlexPn _ line column),_,_,_) -> error $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column) 409 AlexSkip inp__' _len -> go inp__' 410 AlexToken inp__'@(_,_,_,n') _ act -> 411 act pos (ByteString.take (n'-n) str) : go inp__' 412#endif 413 414 415-- ----------------------------------------------------------------------------- 416-- GScan wrapper 417 418-- For compatibility with previous versions of Alex, and because we can. 419 420#ifdef ALEX_GSCAN 421alexGScan stop__ state__ inp__ = 422 alex_gscan stop__ alexStartPos '\n' [] inp__ (0,state__) 423 424alex_gscan stop__ p c bs inp__ (sc,state__) = 425 case alexScan (p,c,bs,inp__) sc of 426 AlexEOF -> stop__ p c inp__ (sc,state__) 427 AlexError _ -> stop__ p c inp__ (sc,state__) 428 AlexSkip (p',c',bs',inp__') _len -> 429 alex_gscan stop__ p' c' bs' inp__' (sc,state__) 430 AlexToken (p',c',bs',inp__') len k -> 431 k p c inp__ len (\scs -> alex_gscan stop__ p' c' bs' inp__' scs) (sc,state__) 432#endif 433