1 --
2 -- Lexical syntax for Haskell 98.
3 --
4 -- (c) Simon Marlow 2003, with the caveat that much of this is
5 -- translated directly from the syntax in the Haskell 98 report.
6 --
7 -- This isn't a complete Haskell 98 lexer - it doesn't handle layout
8 -- for one thing.  However, it could be adapted with a small
9 -- amount of effort.
10 --
11 
12 {
13 module Main (main) where
14 import Data.Char (chr)
15 }
16 
17 %wrapper "monad"
18 
19 $whitechar = [ \t\n\r\f\v]
20 $special   = [\(\)\,\;\[\]\`\{\}]
21 
22 $ascdigit  = 0-9
23 $unidigit  = [] -- TODO
24 $digit     = [$ascdigit $unidigit]
25 
26 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
27 $unisymbol = [] -- TODO
28 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
29 
30 $large     = [A-Z \xc0-\xd6 \xd8-\xde]
31 $small     = [a-z \xdf-\xf6 \xf8-\xff \_]
32 $alpha     = [$small $large]
33 
34 $graphic   = [$small $large $symbol $digit $special \:\"\']
35 
36 $octit	   = 0-7
37 $hexit     = [0-9 A-F a-f]
38 $idchar    = [$alpha $digit \']
39 $symchar   = [$symbol \:]
40 $nl        = [\n\r]
41 
42 @reservedid =
43 	as|case|class|data|default|deriving|do|else|hiding|if|
44 	import|in|infix|infixl|infixr|instance|let|module|newtype|
45 	of|qualified|then|type|where
46 
47 @reservedop =
48 	".." | ":" | "::" | "=" | \\ | "|" | "<-" | "->" | "@" | "~" | "=>"
49 
50 @varid  = $small $idchar*
51 @conid  = $large $idchar*
52 @varsym = $symbol $symchar*
53 @consym = \: $symchar*
54 
55 @decimal     = $digit+
56 @octal       = $octit+
57 @hexadecimal = $hexit+
58 @exponent    = [eE] [\-\+] @decimal
59 
60 $cntrl   = [$large \@\[\\\]\^\_]
61 @ascii   = \^ $cntrl | NUL | SOH | STX | ETX | EOT | ENQ | ACK
62 	 | BEL | BS | HT | LF | VT | FF | CR | SO | SI | DLE
63 	 | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN | EM
64 	 | SUB | ESC | FS | GS | RS | US | SP | DEL
65 $charesc = [abfnrtv\\\"\'\&]
66 @escape  = \\ ($charesc | @ascii | @decimal | o @octal | x @hexadecimal)
67 @gap     = \\ $whitechar+ \\
68 @string  = $graphic # [\"\\] | " " | @escape | @gap
69 
70 haskell :-
71 
72 <0> $white+			{ skip }
73 <0> "--"\-*[^$symbol].*		{ skip }
74 
75 "{-"				{ nested_comment }
76 
77 <0> $special			{ mkL LSpecial }
78 
79 <0> @reservedid			{ mkL LReservedId }
80 <0> @conid \. @varid		{ mkL LQVarId }
81 <0> @conid \. @conid		{ mkL LQConId }
82 <0> @varid			{ mkL LVarId }
83 <0> @conid			{ mkL LConId }
84 
85 <0> @reservedop			{ mkL LReservedOp }
86 <0> @conid \. @varsym		{ mkL LVarSym }
87 <0> @conid \. @consym		{ mkL LConSym }
88 <0> @varsym			{ mkL LVarSym }
89 <0> @consym			{ mkL LConSym }
90 
91 <0> @decimal
92   | 0[oO] @octal
93   | 0[xX] @hexadecimal		{ mkL LInteger }
94 
95 <0> @decimal \. @decimal @exponent?
96   | @decimal @exponent		{ mkL LFloat }
97 
98 <0> \' ($graphic # [\'\\] | " " | @escape) \'
99 				{ mkL LChar }
100 
101 <0> \" @string* \"		{ mkL LString }
102 
103 {
104 data Lexeme = L AlexPosn LexemeClass String
105 
106 data LexemeClass
107   = LInteger
108   | LFloat
109   | LChar
110   | LString
111   | LSpecial
112   | LReservedId
113   | LReservedOp
114   | LVarId
115   | LQVarId
116   | LConId
117   | LQConId
118   | LVarSym
119   | LQVarSym
120   | LConSym
121   | LQConSym
122   | LEOF
123   deriving Eq
124 
125 mkL :: LexemeClass -> AlexInput -> Int -> Alex Lexeme
126 mkL c (p,_,_,str) len = return (L p c (take len str))
127 
128 nested_comment :: AlexInput -> Int -> Alex Lexeme
129 nested_comment _ _ = do
130   input <- alexGetInput
131   go 1 input
132   where go 0 input = do alexSetInput input; alexMonadScan
133 	go n input = do
134           case alexGetByte input of
135 	    Nothing  -> err input
136 	    Just (c,input) -> do
137               case chr (fromIntegral c) of
138 	    	'-' -> do
139                   let temp = input
140                   case alexGetByte input of
141 		    Nothing  -> err input
142                     Just (125,input) -> go (n-1) input
143                     Just (45, input) -> go n temp
144                     Just (c,input)   -> go n input
145 	     	'\123' -> do
146                   case alexGetByte input of
147 		    Nothing  -> err input
148                     Just (c,input) | c == fromIntegral (ord '-') -> go (n+1) input
149 		    Just (c,input)   -> go n input
150 	    	c -> go n input
151 
152         err input = do alexSetInput input; lexError "error in nested comment"
153 
154 lexError s = do
155   (p,c,_,input) <- alexGetInput
156   alexError (showPosn p ++ ": " ++ s ++
157 		   (if (not (null input))
158 		     then " before " ++ show (head input)
159 		     else " at end of file"))
160 
161 scanner str = runAlex str $ do
162   let loop i = do tok@(L _ cl _) <- alexMonadScan;
163 		  if cl == LEOF
164 			then return i
165 			else do loop $! (i+1)
166   loop 0
167 
168 alexEOF = return (L undefined LEOF "")
169 
170 showPosn (AlexPn _ line col) = show line ++ ':': show col
171 
172 main = do
173   s <- getContents
174   print (scanner s)
175 }
176