1-- -----------------------------------------------------------------------------
2-- Alex wrapper code.
3--
4-- This code is in the PUBLIC DOMAIN; you may copy it freely and use
5-- it for any purpose whatsoever.
6
7#if defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING)
8import Control.Applicative as App (Applicative (..))
9#endif
10
11import Data.Word (Word8)
12#if defined(ALEX_BASIC_BYTESTRING) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING)
13
14import Data.Int (Int64)
15import qualified Data.Char
16import qualified Data.ByteString.Lazy     as ByteString
17import qualified Data.ByteString.Internal as ByteString (w2c)
18
19#elif defined(ALEX_STRICT_BYTESTRING)
20
21import qualified Data.Char
22import qualified Data.ByteString          as ByteString
23import qualified Data.ByteString.Internal as ByteString hiding (ByteString)
24import qualified Data.ByteString.Unsafe   as ByteString
25
26#else
27
28import Data.Char (ord)
29import qualified Data.Bits
30
31-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
32utf8Encode :: Char -> [Word8]
33utf8Encode = uncurry (:) . utf8Encode'
34
35utf8Encode' :: Char -> (Word8, [Word8])
36utf8Encode' c = case go (ord c) of
37                  (x, xs) -> (fromIntegral x, map fromIntegral xs)
38 where
39  go oc
40   | oc <= 0x7f       = ( oc
41                        , [
42                        ])
43
44   | oc <= 0x7ff      = ( 0xc0 + (oc `Data.Bits.shiftR` 6)
45                        , [0x80 + oc Data.Bits..&. 0x3f
46                        ])
47
48   | oc <= 0xffff     = ( 0xe0 + (oc `Data.Bits.shiftR` 12)
49                        , [0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
50                        , 0x80 + oc Data.Bits..&. 0x3f
51                        ])
52   | otherwise        = ( 0xf0 + (oc `Data.Bits.shiftR` 18)
53                        , [0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f)
54                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
55                        , 0x80 + oc Data.Bits..&. 0x3f
56                        ])
57
58#endif
59
60type Byte = Word8
61
62-- -----------------------------------------------------------------------------
63-- The input type
64
65#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_GSCAN)
66type AlexInput = (AlexPosn,     -- current position,
67                  Char,         -- previous char
68                  [Byte],       -- pending bytes on current char
69                  String)       -- current input string
70
71ignorePendingBytes :: AlexInput -> AlexInput
72ignorePendingBytes (p,c,_ps,s) = (p,c,[],s)
73
74alexInputPrevChar :: AlexInput -> Char
75alexInputPrevChar (_p,c,_bs,_s) = c
76
77alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
78alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s))
79alexGetByte (_,_,[],[]) = Nothing
80alexGetByte (p,_,[],(c:s))  = let p' = alexMove p c
81                              in case utf8Encode' c of
82                                   (b, bs) -> p' `seq`  Just (b, (p', c, bs, s))
83#endif
84
85#if defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING)
86type AlexInput = (AlexPosn,     -- current position,
87                  Char,         -- previous char
88                  ByteString.ByteString,        -- current input string
89                  Int64)           -- bytes consumed so far
90
91ignorePendingBytes :: AlexInput -> AlexInput
92ignorePendingBytes i = i   -- no pending bytes when lexing bytestrings
93
94alexInputPrevChar :: AlexInput -> Char
95alexInputPrevChar (_,c,_,_) = c
96
97alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
98alexGetByte (p,_,cs,n) =
99    case ByteString.uncons cs of
100        Nothing -> Nothing
101        Just (b, cs') ->
102            let c   = ByteString.w2c b
103                p'  = alexMove p c
104                n'  = n+1
105            in p' `seq` cs' `seq` n' `seq` Just (b, (p', c, cs',n'))
106#endif
107
108#ifdef ALEX_BASIC_BYTESTRING
109data AlexInput = AlexInput { alexChar :: {-# UNPACK #-} !Char,      -- previous char
110                             alexStr ::  !ByteString.ByteString,    -- current input string
111                             alexBytePos :: {-# UNPACK #-} !Int64}  -- bytes consumed so far
112
113alexInputPrevChar :: AlexInput -> Char
114alexInputPrevChar = alexChar
115
116alexGetByte (AlexInput {alexStr=cs,alexBytePos=n}) =
117    case ByteString.uncons cs of
118        Nothing -> Nothing
119        Just (c, rest) ->
120            Just (c, AlexInput {
121                alexChar = ByteString.w2c c,
122                alexStr =  rest,
123                alexBytePos = n+1})
124#endif
125
126#ifdef ALEX_STRICT_BYTESTRING
127data AlexInput = AlexInput { alexChar :: {-# UNPACK #-} !Char,
128                             alexStr :: {-# UNPACK #-} !ByteString.ByteString,
129                             alexBytePos :: {-# UNPACK #-} !Int}
130
131alexInputPrevChar :: AlexInput -> Char
132alexInputPrevChar = alexChar
133
134alexGetByte (AlexInput {alexStr=cs,alexBytePos=n}) =
135    case ByteString.uncons cs of
136        Nothing -> Nothing
137        Just (c, rest) ->
138            Just (c, AlexInput {
139                alexChar = ByteString.w2c c,
140                alexStr =  rest,
141                alexBytePos = n+1})
142#endif
143
144-- -----------------------------------------------------------------------------
145-- Token positions
146
147-- `Posn' records the location of a token in the input text.  It has three
148-- fields: the address (number of chacaters preceding the token), line number
149-- and column of a token within the file. `start_pos' gives the position of the
150-- start of the file and `eof_pos' a standard encoding for the end of file.
151-- `move_pos' calculates the new position after traversing a given character,
152-- assuming the usual eight character tab stops.
153
154#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_GSCAN)
155data AlexPosn = AlexPn !Int !Int !Int
156        deriving (Eq,Show)
157
158alexStartPos :: AlexPosn
159alexStartPos = AlexPn 0 1 1
160
161alexMove :: AlexPosn -> Char -> AlexPosn
162alexMove (AlexPn a l c) '\t' = AlexPn (a+1)  l     (c+alex_tab_size-((c-1) `mod` alex_tab_size))
163alexMove (AlexPn a l _) '\n' = AlexPn (a+1) (l+1)   1
164alexMove (AlexPn a l c) _    = AlexPn (a+1)  l     (c+1)
165#endif
166
167-- -----------------------------------------------------------------------------
168-- Monad (default and with ByteString input)
169
170#if defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING)
171data AlexState = AlexState {
172        alex_pos :: !AlexPosn,  -- position at current input location
173#ifndef ALEX_MONAD_BYTESTRING
174        alex_inp :: String,     -- the current input
175        alex_chr :: !Char,      -- the character before the input
176        alex_bytes :: [Byte],
177#else /* ALEX_MONAD_BYTESTRING */
178        alex_bpos:: !Int64,     -- bytes consumed so far
179        alex_inp :: ByteString.ByteString,      -- the current input
180        alex_chr :: !Char,      -- the character before the input
181#endif /* ALEX_MONAD_BYTESTRING */
182        alex_scd :: !Int        -- the current startcode
183#ifdef ALEX_MONAD_USER_STATE
184      , alex_ust :: AlexUserState -- AlexUserState will be defined in the user program
185#endif
186    }
187
188-- Compile with -funbox-strict-fields for best results!
189
190#ifndef ALEX_MONAD_BYTESTRING
191runAlex :: String -> Alex a -> Either String a
192runAlex input__ (Alex f)
193   = case f (AlexState {alex_bytes = [],
194#else /* ALEX_MONAD_BYTESTRING */
195runAlex :: ByteString.ByteString -> Alex a -> Either String a
196runAlex input__ (Alex f)
197   = case f (AlexState {alex_bpos = 0,
198#endif /* ALEX_MONAD_BYTESTRING */
199                        alex_pos = alexStartPos,
200                        alex_inp = input__,
201                        alex_chr = '\n',
202#ifdef ALEX_MONAD_USER_STATE
203                        alex_ust = alexInitUserState,
204#endif
205                        alex_scd = 0}) of Left msg -> Left msg
206                                          Right ( _, a ) -> Right a
207
208newtype Alex a = Alex { unAlex :: AlexState -> Either String (AlexState, a) }
209
210instance Functor Alex where
211  fmap f a = Alex $ \s -> case unAlex a s of
212                            Left msg -> Left msg
213                            Right (s', a') -> Right (s', f a')
214
215instance Applicative Alex where
216  pure a   = Alex $ \s -> Right (s, a)
217  fa <*> a = Alex $ \s -> case unAlex fa s of
218                            Left msg -> Left msg
219                            Right (s', f) -> case unAlex a s' of
220                                               Left msg -> Left msg
221                                               Right (s'', b) -> Right (s'', f b)
222
223instance Monad Alex where
224  m >>= k  = Alex $ \s -> case unAlex m s of
225                                Left msg -> Left msg
226                                Right (s',a) -> unAlex (k a) s'
227  return = App.pure
228
229alexGetInput :: Alex AlexInput
230alexGetInput
231#ifndef ALEX_MONAD_BYTESTRING
232 = Alex $ \s@AlexState{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} ->
233        Right (s, (pos,c,bs,inp__))
234#else /* ALEX_MONAD_BYTESTRING */
235 = Alex $ \s@AlexState{alex_pos=pos,alex_bpos=bpos,alex_chr=c,alex_inp=inp__} ->
236        Right (s, (pos,c,inp__,bpos))
237#endif /* ALEX_MONAD_BYTESTRING */
238
239alexSetInput :: AlexInput -> Alex ()
240#ifndef ALEX_MONAD_BYTESTRING
241alexSetInput (pos,c,bs,inp__)
242 = Alex $ \s -> case s{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} of
243#else /* ALEX_MONAD_BYTESTRING */
244alexSetInput (pos,c,inp__,bpos)
245 = Alex $ \s -> case s{alex_pos=pos,
246                       alex_bpos=bpos,
247                       alex_chr=c,
248                       alex_inp=inp__} of
249#endif /* ALEX_MONAD_BYTESTRING */
250                  state__@(AlexState{}) -> Right (state__, ())
251
252alexError :: String -> Alex a
253alexError message = Alex $ const $ Left message
254
255alexGetStartCode :: Alex Int
256alexGetStartCode = Alex $ \s@AlexState{alex_scd=sc} -> Right (s, sc)
257
258alexSetStartCode :: Int -> Alex ()
259alexSetStartCode sc = Alex $ \s -> Right (s{alex_scd=sc}, ())
260
261#if !defined(ALEX_MONAD_BYTESTRING) && defined(ALEX_MONAD_USER_STATE)
262alexGetUserState :: Alex AlexUserState
263alexGetUserState = Alex $ \s@AlexState{alex_ust=ust} -> Right (s,ust)
264
265alexSetUserState :: AlexUserState -> Alex ()
266alexSetUserState ss = Alex $ \s -> Right (s{alex_ust=ss}, ())
267#endif /* !defined(ALEX_MONAD_BYTESTRING) && defined(ALEX_MONAD_USER_STATE) */
268
269alexMonadScan = do
270#ifndef ALEX_MONAD_BYTESTRING
271  inp__ <- alexGetInput
272#else /* ALEX_MONAD_BYTESTRING */
273  inp__@(_,_,_,n) <- alexGetInput
274#endif /* ALEX_MONAD_BYTESTRING */
275  sc <- alexGetStartCode
276  case alexScan inp__ sc of
277    AlexEOF -> alexEOF
278    AlexError ((AlexPn _ line column),_,_,_) -> alexError $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column)
279    AlexSkip  inp__' _len -> do
280        alexSetInput inp__'
281        alexMonadScan
282#ifndef ALEX_MONAD_BYTESTRING
283    AlexToken inp__' len action -> do
284#else /* ALEX_MONAD_BYTESTRING */
285    AlexToken inp__'@(_,_,_,n') _ action -> let len = n'-n in do
286#endif /* ALEX_MONAD_BYTESTRING */
287        alexSetInput inp__'
288        action (ignorePendingBytes inp__) len
289
290-- -----------------------------------------------------------------------------
291-- Useful token actions
292
293#ifndef ALEX_MONAD_BYTESTRING
294type AlexAction result = AlexInput -> Int -> Alex result
295#else /* ALEX_MONAD_BYTESTRING */
296type AlexAction result = AlexInput -> Int64 -> Alex result
297#endif /* ALEX_MONAD_BYTESTRING */
298
299-- just ignore this token and scan another one
300-- skip :: AlexAction result
301skip _input _len = alexMonadScan
302
303-- ignore this token, but set the start code to a new value
304-- begin :: Int -> AlexAction result
305begin code _input _len = do alexSetStartCode code; alexMonadScan
306
307-- perform an action for this token, and set the start code to a new value
308andBegin :: AlexAction result -> Int -> AlexAction result
309(action `andBegin` code) input__ len = do
310  alexSetStartCode code
311  action input__ len
312
313#ifndef ALEX_MONAD_BYTESTRING
314token :: (AlexInput -> Int -> token) -> AlexAction token
315#else /* ALEX_MONAD_BYTESTRING */
316token :: (AlexInput -> Int64 -> token) -> AlexAction token
317#endif /* ALEX_MONAD_BYTESTRING */
318token t input__ len = return (t input__ len)
319#endif /* defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) */
320
321
322-- -----------------------------------------------------------------------------
323-- Basic wrapper
324
325#ifdef ALEX_BASIC
326type AlexInput = (Char,[Byte],String)
327
328alexInputPrevChar :: AlexInput -> Char
329alexInputPrevChar (c,_,_) = c
330
331-- alexScanTokens :: String -> [token]
332alexScanTokens str = go ('\n',[],str)
333  where go inp__@(_,_bs,s) =
334          case alexScan inp__ 0 of
335                AlexEOF -> []
336                AlexError _ -> error "lexical error"
337                AlexSkip  inp__' _ln     -> go inp__'
338                AlexToken inp__' len act -> act (take len s) : go inp__'
339
340alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
341alexGetByte (c,(b:bs),s) = Just (b,(c,bs,s))
342alexGetByte (_,[],[])    = Nothing
343alexGetByte (_,[],(c:s)) = case utf8Encode' c of
344                             (b, bs) -> Just (b, (c, bs, s))
345#endif
346
347
348-- -----------------------------------------------------------------------------
349-- Basic wrapper, ByteString version
350
351#ifdef ALEX_BASIC_BYTESTRING
352
353-- alexScanTokens :: ByteString.ByteString -> [token]
354alexScanTokens str = go (AlexInput '\n' str 0)
355  where go inp__ =
356          case alexScan inp__ 0 of
357                AlexEOF -> []
358                AlexError _ -> error "lexical error"
359                AlexSkip  inp__' _len  -> go inp__'
360                AlexToken inp__' _ act ->
361                  let len = alexBytePos inp__' - alexBytePos inp__ in
362                  act (ByteString.take len (alexStr inp__)) : go inp__'
363
364#endif
365
366#ifdef ALEX_STRICT_BYTESTRING
367
368-- alexScanTokens :: ByteString.ByteString -> [token]
369alexScanTokens str = go (AlexInput '\n' str 0)
370  where go inp__ =
371          case alexScan inp__ 0 of
372                AlexEOF -> []
373                AlexError _ -> error "lexical error"
374                AlexSkip  inp__' _len  -> go inp__'
375                AlexToken inp__' _ act ->
376                  let len = alexBytePos inp__' - alexBytePos inp__ in
377                  act (ByteString.take len (alexStr inp__)) : go inp__'
378
379#endif
380
381
382-- -----------------------------------------------------------------------------
383-- Posn wrapper
384
385-- Adds text positions to the basic model.
386
387#ifdef ALEX_POSN
388--alexScanTokens :: String -> [token]
389alexScanTokens str0 = go (alexStartPos,'\n',[],str0)
390  where go inp__@(pos,_,_,str) =
391          case alexScan inp__ 0 of
392                AlexEOF -> []
393                AlexError ((AlexPn _ line column),_,_,_) -> error $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column)
394                AlexSkip  inp__' _ln     -> go inp__'
395                AlexToken inp__' len act -> act pos (take len str) : go inp__'
396#endif
397
398
399-- -----------------------------------------------------------------------------
400-- Posn wrapper, ByteString version
401
402#ifdef ALEX_POSN_BYTESTRING
403--alexScanTokens :: ByteString.ByteString -> [token]
404alexScanTokens str0 = go (alexStartPos,'\n',str0,0)
405  where go inp__@(pos,_,str,n) =
406          case alexScan inp__ 0 of
407                AlexEOF -> []
408                AlexError ((AlexPn _ line column),_,_,_) -> error $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column)
409                AlexSkip  inp__' _len       -> go inp__'
410                AlexToken inp__'@(_,_,_,n') _ act ->
411                  act pos (ByteString.take (n'-n) str) : go inp__'
412#endif
413
414
415-- -----------------------------------------------------------------------------
416-- GScan wrapper
417
418-- For compatibility with previous versions of Alex, and because we can.
419
420#ifdef ALEX_GSCAN
421alexGScan stop__ state__ inp__ =
422  alex_gscan stop__ alexStartPos '\n' [] inp__ (0,state__)
423
424alex_gscan stop__ p c bs inp__ (sc,state__) =
425  case alexScan (p,c,bs,inp__) sc of
426        AlexEOF     -> stop__ p c inp__ (sc,state__)
427        AlexError _ -> stop__ p c inp__ (sc,state__)
428        AlexSkip (p',c',bs',inp__') _len ->
429          alex_gscan stop__ p' c' bs' inp__' (sc,state__)
430        AlexToken (p',c',bs',inp__') len k ->
431           k p c inp__ len (\scs -> alex_gscan stop__ p' c' bs' inp__' scs)                  (sc,state__)
432#endif
433