1module HSCParser where 2import Control.Applicative hiding ( many ) 3import Control.Monad ( MonadPlus(..), liftM, liftM2, ap ) 4import Data.Char ( isAlpha, isAlphaNum, isSpace, isDigit ) 5 6------------------------------------------------------------------------ 7-- A deterministic parser which remembers the text which has been parsed. 8 9newtype Parser a = Parser (SourcePos -> String -> ParseResult a) 10 11runParser :: Parser a -> String -> String -> ParseResult a 12runParser (Parser p) file_name = p (SourcePos file_name 1 1) 13 14data ParseResult a = Success !SourcePos String String a 15 | Failure !SourcePos String 16 17data SourcePos = SourcePos String !Int !Int 18 19updatePos :: SourcePos -> Char -> SourcePos 20updatePos (SourcePos name line col) ch = case ch of 21 '\n' -> SourcePos name (line + 1) 1 22 _ -> SourcePos name line (col + 1) 23 24instance Functor Parser where 25 fmap = liftM 26 27instance Applicative Parser where 28 pure a = Parser $ \pos s -> Success pos [] s a 29 (<*>) = ap 30 31instance Monad Parser where 32 return = pure 33 Parser m >>= k = 34 Parser $ \pos s -> case m pos s of 35 Success pos' out1 s' a -> case k a of 36 Parser k' -> case k' pos' s' of 37 Success pos'' out2 imp'' b -> 38 Success pos'' (out1++out2) imp'' b 39 Failure pos'' msg -> Failure pos'' msg 40 Failure pos' msg -> Failure pos' msg 41 42failp :: String -> Parser a 43failp msg = Parser $ \pos _ -> Failure pos msg 44 45instance Alternative Parser where 46 empty = mzero 47 (<|>) = mplus 48 49instance MonadPlus Parser where 50 mzero = failp "mzero" 51 Parser m `mplus` Parser n = 52 Parser $ \pos s -> case m pos s of 53 success@(Success _ _ _ _) -> success 54 Failure _ _ -> n pos s 55 56getPos :: Parser SourcePos 57getPos = Parser $ \pos s -> Success pos [] s pos 58 59setPos :: SourcePos -> Parser () 60setPos pos = Parser $ \_ s -> Success pos [] s () 61 62message :: Parser a -> String -> Parser a 63Parser m `message` msg = 64 Parser $ \pos s -> case m pos s of 65 success@(Success _ _ _ _) -> success 66 Failure pos' _ -> Failure pos' msg 67 68catchOutput_ :: Parser a -> Parser String 69catchOutput_ (Parser m) = 70 Parser $ \pos s -> case m pos s of 71 Success pos' out s' _ -> Success pos' [] s' out 72 Failure pos' msg -> Failure pos' msg 73 74fakeOutput :: Parser a -> String -> Parser a 75Parser m `fakeOutput` out = 76 Parser $ \pos s -> case m pos s of 77 Success pos' _ s' a -> Success pos' out s' a 78 Failure pos' msg -> Failure pos' msg 79 80lookAhead :: Parser String 81lookAhead = Parser $ \pos s -> Success pos [] s s 82 83satisfy :: (Char -> Bool) -> Parser Char 84satisfy p = 85 Parser $ \pos s -> case s of 86 c:cs | p c -> Success (updatePos pos c) [c] cs c 87 _ -> Failure pos "Bad character" 88 89satisfy_ :: (Char -> Bool) -> Parser () 90satisfy_ p = satisfy p >> return () 91 92char_ :: Char -> Parser () 93char_ c = do 94 satisfy_ (== c) `message` (show c++" expected") 95 96anyChar_ :: Parser () 97anyChar_ = do 98 satisfy_ (const True) `message` "Unexpected end of file" 99 100any2Chars_ :: Parser () 101any2Chars_ = anyChar_ >> anyChar_ 102 103any3Chars_ :: Parser () 104any3Chars_ = anyChar_ >> anyChar_ >> anyChar_ 105 106many :: Parser a -> Parser [a] 107many p = many1 p `mplus` return [] 108 109many1 :: Parser a -> Parser [a] 110many1 p = liftM2 (:) p (many p) 111 112many_ :: Parser a -> Parser () 113many_ p = many1_ p `mplus` return () 114 115many1_ :: Parser a -> Parser () 116many1_ p = p >> many_ p 117 118manySatisfy, manySatisfy1 :: (Char -> Bool) -> Parser String 119manySatisfy = many . satisfy 120manySatisfy1 = many1 . satisfy 121 122manySatisfy_, manySatisfy1_ :: (Char -> Bool) -> Parser () 123manySatisfy_ = many_ . satisfy 124manySatisfy1_ = many1_ . satisfy 125 126------------------------------------------------------------------------ 127-- Parser of hsc syntax. 128 129data Token 130 = Text SourcePos String 131 | Special SourcePos String String 132 133tokenIsSpecial :: Token -> Bool 134tokenIsSpecial (Text {}) = False 135tokenIsSpecial (Special {}) = True 136 137parser :: Parser [Token] 138parser = do 139 pos <- getPos 140 t <- catchOutput_ text 141 s <- lookAhead 142 rest <- case s of 143 [] -> return [] 144 _:_ -> liftM2 (:) (special `fakeOutput` []) parser 145 return (if null t then rest else Text pos t : rest) 146 147text :: Parser () 148text = do 149 s <- lookAhead 150 case s of 151 [] -> return () 152 c:_ | isAlpha c || c == '_' -> do 153 anyChar_ 154 manySatisfy_ (\c' -> isAlphaNum c' || c' == '_' || c' == '\'') 155 text 156 c:_ | isHsSymbol c -> do 157 symb <- catchOutput_ (manySatisfy_ isHsSymbol) 158 case symb of 159 "#" -> return () 160 '-':'-':symb' | all (== '-') symb' -> do 161 return () `fakeOutput` symb 162 manySatisfy_ (/= '\n') 163 text 164 _ -> do 165 return () `fakeOutput` unescapeHashes symb 166 text 167 '\"':_ -> do anyChar_; hsString '\"'; text 168 -- See Note [Single Quotes] 169 '\'':'\\':_ -> do anyChar_; hsString '\''; text -- Case 1 170 '\'':_:'\'':_ -> do any3Chars_; text -- Case 2 171 '\'':d:_ | isSpace d -> do -- Case 3 172 any2Chars_ 173 manySatisfy_ (\c' -> isSpace c') 174 manySatisfy_ (\c' -> isAlphaNum c' || c' == '_' || c' == '\'') 175 text 176 '\'':_ -> do -- Case 4 177 anyChar_ 178 manySatisfy_ (\c' -> isAlphaNum c' || c' == '_' || c' == '\'') 179 text 180 '{':'-':_ -> do 181 any2Chars_ 182 linePragma `mplus` columnPragma `mplus` hsComment 183 text 184 _:_ -> do anyChar_; text 185 186{- Note [Single Quotes] 187~~~~~~~~~~~~~~~~~~~~~~~ 188hsc2hs performs some tricks to figure out if we are looking at character 189literal or a promoted data constructor. In order, the cases considered are: 190 1911. quote-backslash: An escape sequence character literal. Since these 192 escape sequences have several different possible lengths, hsc2hs relies 193 on hsString to consume everything after this until another single quote 194 is encountered. See Note [Handling escaped characters]. 1952. quote-any-quote: A character literal. Consumes the triplet. 1963. quote-space: Here, the order of the patterns becomes important. This 197 case and the case below handle promoted data constructors. This one 198 is to handle data constructor that end in a quote. They have special 199 syntax for promotion that requires adding a leading space. After an 200 arbitrary number of initial space characters, consume 201 all alphanumeric characters and quotes, considering them part of the 202 identifier. 2034. quote: If nothing else matched, we assume we are dealing with a normal 204 promoted data constructor. Consume all alphanumeric characters and 205 quotes, considering them part of the identifier. 206 207Here are some lines of code for which at one of the described cases 208would be matched at some point: 209 210 data Foo = Foo' | Bar 211 212 main :: IO () 213 main = do 2141> putChar '\NUL' 2152> putChar 'x' 2163> let y = Proxy :: Proxy ' Foo' 2174> let x = Proxy :: Proxy 'Bar 218 pure () 219-} 220 221hsString :: Char -> Parser () 222hsString quote = do 223 s <- lookAhead 224 case s of 225 [] -> return () 226 c:_ | c == quote -> anyChar_ 227 -- See Note [Handling escaped characters] 228 '\\':c:_ 229 | isSpace c -> do 230 anyChar_ 231 manySatisfy_ isSpace 232 char_ '\\' `mplus` return () 233 hsString quote 234 | otherwise -> do any2Chars_; hsString quote 235 _:_ -> do anyChar_; hsString quote 236 237{- Note [Handling escaped characters] 238~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 239There are several accepted escape codes for string and character literals. 240The function hsString handles all escape sequences that start with space 241in its first guard and all others in the otherwise guard. It only needs 242to consume two characters to handle these non-space-prefixed escape 243sequences correctly. Consider these examples: 244 245* Single Character: \t -> 246* Multiple Characters: \DEL -> EL 247* Decimal: \1789 -> 789 248* Hexadecimal: \xbeef -> beef 249* Octal: \o3576 -> 3576 250 251Crucially, none of these suffixes left after dropping the leading two 252characters ever contain single quote, double quote, or backslash. 253Consequently, these leftover characters will be matched by the 254final pattern match (_:_) in hsString since the call to any2Chars_ 255is followed by recursing. 256-} 257 258hsComment :: Parser () 259hsComment = do 260 s <- lookAhead 261 case s of 262 [] -> return () 263 '-':'}':_ -> any2Chars_ 264 '{':'-':_ -> do any2Chars_; hsComment; hsComment 265 _:_ -> do anyChar_; hsComment 266 267linePragma :: Parser () 268linePragma = do 269 char_ '#' 270 manySatisfy_ isSpace 271 satisfy_ (\c -> c == 'L' || c == 'l') 272 satisfy_ (\c -> c == 'I' || c == 'i') 273 satisfy_ (\c -> c == 'N' || c == 'n') 274 satisfy_ (\c -> c == 'E' || c == 'e') 275 manySatisfy1_ isSpace 276 line <- liftM read $ manySatisfy1 isDigit 277 manySatisfy1_ isSpace 278 char_ '\"' 279 name <- manySatisfy (/= '\"') 280 char_ '\"' 281 manySatisfy_ isSpace 282 char_ '#' 283 char_ '-' 284 char_ '}' 285 setPos (SourcePos name (line - 1) 1) 286 287columnPragma :: Parser () 288columnPragma = do 289 char_ '#' 290 manySatisfy_ isSpace 291 satisfy_ (\c -> c == 'C' || c == 'c') 292 satisfy_ (\c -> c == 'O' || c == 'o') 293 satisfy_ (\c -> c == 'L' || c == 'l') 294 satisfy_ (\c -> c == 'U' || c == 'u') 295 satisfy_ (\c -> c == 'M' || c == 'm') 296 satisfy_ (\c -> c == 'N' || c == 'n') 297 manySatisfy1_ isSpace 298 column <- liftM read $ manySatisfy1 isDigit 299 manySatisfy_ isSpace 300 char_ '#' 301 char_ '-' 302 char_ '}' 303 SourcePos name line _ <- getPos 304 setPos (SourcePos name line column) 305 306isHsSymbol :: Char -> Bool 307isHsSymbol '!' = True; isHsSymbol '#' = True; isHsSymbol '$' = True 308isHsSymbol '%' = True; isHsSymbol '&' = True; isHsSymbol '*' = True 309isHsSymbol '+' = True; isHsSymbol '.' = True; isHsSymbol '/' = True 310isHsSymbol '<' = True; isHsSymbol '=' = True; isHsSymbol '>' = True 311isHsSymbol '?' = True; isHsSymbol '@' = True; isHsSymbol '\\' = True 312isHsSymbol '^' = True; isHsSymbol '|' = True; isHsSymbol '-' = True 313isHsSymbol '~' = True 314isHsSymbol _ = False 315 316unescapeHashes :: String -> String 317unescapeHashes [] = [] 318unescapeHashes ('#':'#':s) = '#' : unescapeHashes s 319unescapeHashes (c:s) = c : unescapeHashes s 320 321lookAheadC :: Parser String 322lookAheadC = liftM joinLines lookAhead 323 where 324 joinLines [] = [] 325 joinLines ('\\':'\n':s) = joinLines s 326 joinLines (c:s) = c : joinLines s 327 328satisfyC :: (Char -> Bool) -> Parser Char 329satisfyC p = do 330 s <- lookAhead 331 case s of 332 '\\':'\n':_ -> do any2Chars_ `fakeOutput` []; satisfyC p 333 _ -> satisfy p 334 335satisfyC_ :: (Char -> Bool) -> Parser () 336satisfyC_ p = satisfyC p >> return () 337 338charC_ :: Char -> Parser () 339charC_ c = satisfyC_ (== c) `message` (show c++" expected") 340 341anyCharC_ :: Parser () 342anyCharC_ = satisfyC_ (const True) `message` "Unexpected end of file" 343 344any2CharsC_ :: Parser () 345any2CharsC_ = anyCharC_ >> anyCharC_ 346 347manySatisfyC :: (Char -> Bool) -> Parser String 348manySatisfyC = many . satisfyC 349 350manySatisfyC_ :: (Char -> Bool) -> Parser () 351manySatisfyC_ = many_ . satisfyC 352 353special :: Parser Token 354special = do 355 manySatisfyC_ (\c -> isSpace c && c /= '\n') 356 s <- lookAheadC 357 case s of 358 '{':_ -> do 359 anyCharC_ 360 manySatisfyC_ isSpace 361 sp <- keyArg (== '\n') 362 charC_ '}' 363 return sp 364 _ -> keyArg (const False) 365 366keyArg :: (Char -> Bool) -> Parser Token 367keyArg eol = do 368 pos <- getPos 369 key <- keyword `message` "hsc keyword or '{' expected" 370 manySatisfyC_ (\c' -> isSpace c' && c' /= '\n' || eol c') 371 arg <- catchOutput_ (argument eol) 372 return (Special pos key arg) 373 374keyword :: Parser String 375keyword = do 376 c <- satisfyC (\c' -> isAlpha c' || c' == '_') 377 cs <- manySatisfyC (\c' -> isAlphaNum c' || c' == '_') 378 return (c:cs) 379 380argument :: (Char -> Bool) -> Parser () 381argument eol = do 382 s <- lookAheadC 383 case s of 384 [] -> return () 385 c:_ | eol c -> do anyCharC_; argument eol 386 '\n':_ -> return () 387 '\"':_ -> do anyCharC_; cString '\"'; argument eol 388 '\'':_ -> do anyCharC_; cString '\''; argument eol 389 '(':_ -> do anyCharC_; nested ')'; argument eol 390 ')':_ -> return () 391 '/':'*':_ -> do any2CharsC_; cComment; argument eol 392 '/':'/':_ -> do 393 any2CharsC_; manySatisfyC_ (/= '\n'); argument eol 394 '[':_ -> do anyCharC_; nested ']'; argument eol 395 ']':_ -> return () 396 '{':_ -> do anyCharC_; nested '}'; argument eol 397 '}':_ -> return () 398 _:_ -> do anyCharC_; argument eol 399 400nested :: Char -> Parser () 401nested c = do argument (== '\n'); charC_ c 402 403cComment :: Parser () 404cComment = do 405 s <- lookAheadC 406 case s of 407 [] -> return () 408 '*':'/':_ -> do any2CharsC_ 409 _:_ -> do anyCharC_; cComment 410 411cString :: Char -> Parser () 412cString quote = do 413 s <- lookAheadC 414 case s of 415 [] -> return () 416 c:_ | c == quote -> anyCharC_ 417 '\\':_:_ -> do any2CharsC_; cString quote 418 _:_ -> do anyCharC_; cString quote 419 420