1 -- 2 -- Lexical syntax for Haskell 98. 3 -- 4 -- (c) Simon Marlow 2003, with the caveat that much of this is 5 -- translated directly from the syntax in the Haskell 98 report. 6 -- 7 -- This isn't a complete Haskell 98 lexer - it doesn't handle layout 8 -- for one thing. However, it could be adapted with a small 9 -- amount of effort. 10 -- 11 12 { 13 module Main (main) where 14 import Data.Char (chr) 15 } 16 17 %wrapper "monad" 18 19 $whitechar = [ \t\n\r\f\v] 20 $special = [\(\)\,\;\[\]\`\{\}] 21 22 $ascdigit = 0-9 23 $unidigit = [] -- TODO 24 $digit = [$ascdigit $unidigit] 25 26 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~] 27 $unisymbol = [] -- TODO 28 $symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\'] 29 30 $large = [A-Z \xc0-\xd6 \xd8-\xde] 31 $small = [a-z \xdf-\xf6 \xf8-\xff \_] 32 $alpha = [$small $large] 33 34 $graphic = [$small $large $symbol $digit $special \:\"\'] 35 36 $octit = 0-7 37 $hexit = [0-9 A-F a-f] 38 $idchar = [$alpha $digit \'] 39 $symchar = [$symbol \:] 40 $nl = [\n\r] 41 42 @reservedid = 43 as|case|class|data|default|deriving|do|else|hiding|if| 44 import|in|infix|infixl|infixr|instance|let|module|newtype| 45 of|qualified|then|type|where 46 47 @reservedop = 48 ".." | ":" | "::" | "=" | \\ | "|" | "<-" | "->" | "@" | "~" | "=>" 49 50 @varid = $small $idchar* 51 @conid = $large $idchar* 52 @varsym = $symbol $symchar* 53 @consym = \: $symchar* 54 55 @decimal = $digit+ 56 @octal = $octit+ 57 @hexadecimal = $hexit+ 58 @exponent = [eE] [\-\+] @decimal 59 60 $cntrl = [$large \@\[\\\]\^\_] 61 @ascii = \^ $cntrl | NUL | SOH | STX | ETX | EOT | ENQ | ACK 62 | BEL | BS | HT | LF | VT | FF | CR | SO | SI | DLE 63 | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN | EM 64 | SUB | ESC | FS | GS | RS | US | SP | DEL 65 $charesc = [abfnrtv\\\"\'\&] 66 @escape = \\ ($charesc | @ascii | @decimal | o @octal | x @hexadecimal) 67 @gap = \\ $whitechar+ \\ 68 @string = $graphic # [\"\\] | " " | @escape | @gap 69 70 haskell :- 71 72 <0> $white+ { skip } 73 <0> "--"\-*[^$symbol].* { skip } 74 75 "{-" { nested_comment } 76 77 <0> $special { mkL LSpecial } 78 79 <0> @reservedid { mkL LReservedId } 80 <0> @conid \. @varid { mkL LQVarId } 81 <0> @conid \. @conid { mkL LQConId } 82 <0> @varid { mkL LVarId } 83 <0> @conid { mkL LConId } 84 85 <0> @reservedop { mkL LReservedOp } 86 <0> @conid \. @varsym { mkL LVarSym } 87 <0> @conid \. @consym { mkL LConSym } 88 <0> @varsym { mkL LVarSym } 89 <0> @consym { mkL LConSym } 90 91 <0> @decimal 92 | 0[oO] @octal 93 | 0[xX] @hexadecimal { mkL LInteger } 94 95 <0> @decimal \. @decimal @exponent? 96 | @decimal @exponent { mkL LFloat } 97 98 <0> \' ($graphic # [\'\\] | " " | @escape) \' 99 { mkL LChar } 100 101 <0> \" @string* \" { mkL LString } 102 103 { 104 data Lexeme = L AlexPosn LexemeClass String 105 106 data LexemeClass 107 = LInteger 108 | LFloat 109 | LChar 110 | LString 111 | LSpecial 112 | LReservedId 113 | LReservedOp 114 | LVarId 115 | LQVarId 116 | LConId 117 | LQConId 118 | LVarSym 119 | LQVarSym 120 | LConSym 121 | LQConSym 122 | LEOF 123 deriving Eq 124 125 mkL :: LexemeClass -> AlexInput -> Int -> Alex Lexeme 126 mkL c (p,_,_,str) len = return (L p c (take len str)) 127 128 nested_comment :: AlexInput -> Int -> Alex Lexeme 129 nested_comment _ _ = do 130 input <- alexGetInput 131 go 1 input 132 where go 0 input = do alexSetInput input; alexMonadScan 133 go n input = do 134 case alexGetByte input of 135 Nothing -> err input 136 Just (c,input) -> do 137 case chr (fromIntegral c) of 138 '-' -> do 139 let temp = input 140 case alexGetByte input of 141 Nothing -> err input 142 Just (125,input) -> go (n-1) input 143 Just (45, input) -> go n temp 144 Just (c,input) -> go n input 145 '\123' -> do 146 case alexGetByte input of 147 Nothing -> err input 148 Just (c,input) | c == fromIntegral (ord '-') -> go (n+1) input 149 Just (c,input) -> go n input 150 c -> go n input 151 152 err input = do alexSetInput input; lexError "error in nested comment" 153 154 lexError s = do 155 (p,c,_,input) <- alexGetInput 156 alexError (showPosn p ++ ": " ++ s ++ 157 (if (not (null input)) 158 then " before " ++ show (head input) 159 else " at end of file")) 160 161 scanner str = runAlex str $ do 162 let loop i = do tok@(L _ cl _) <- alexMonadScan; 163 if cl == LEOF 164 then return i 165 else do loop $! (i+1) 166 loop 0 167 168 alexEOF = return (L undefined LEOF "") 169 170 showPosn (AlexPn _ line col) = show line ++ ':': show col 171 172 main = do 173 s <- getContents 174 print (scanner s) 175 } 176