1module HSCParser where
2import Control.Applicative      hiding ( many )
3import Control.Monad            ( MonadPlus(..), liftM, liftM2, ap )
4import Data.Char                ( isAlpha, isAlphaNum, isSpace, isDigit )
5
6------------------------------------------------------------------------
7-- A deterministic parser which remembers the text which has been parsed.
8
9newtype Parser a = Parser (SourcePos -> String -> ParseResult a)
10
11runParser :: Parser a -> String -> String -> ParseResult a
12runParser (Parser p) file_name = p (SourcePos file_name 1 1)
13
14data ParseResult a = Success !SourcePos String String a
15                   | Failure !SourcePos String
16
17data SourcePos = SourcePos String !Int !Int
18
19updatePos :: SourcePos -> Char -> SourcePos
20updatePos (SourcePos name line col) ch = case ch of
21    '\n' -> SourcePos name (line + 1) 1
22    _    -> SourcePos name line (col + 1)
23
24instance Functor Parser where
25    fmap = liftM
26
27instance Applicative Parser where
28    pure a = Parser $ \pos s -> Success pos [] s a
29    (<*>) = ap
30
31instance Monad Parser where
32    return = pure
33    Parser m >>= k =
34        Parser $ \pos s -> case m pos s of
35            Success pos' out1 s' a -> case k a of
36                Parser k' -> case k' pos' s' of
37                    Success pos'' out2 imp'' b ->
38                        Success pos'' (out1++out2) imp'' b
39                    Failure pos'' msg -> Failure pos'' msg
40            Failure pos' msg -> Failure pos' msg
41
42failp :: String -> Parser a
43failp msg = Parser $ \pos _ -> Failure pos msg
44
45instance Alternative Parser where
46    empty = mzero
47    (<|>) = mplus
48
49instance MonadPlus Parser where
50    mzero                     = failp "mzero"
51    Parser m `mplus` Parser n =
52        Parser $ \pos s -> case m pos s of
53            success@(Success _ _ _ _) -> success
54            Failure _ _               -> n pos s
55
56getPos :: Parser SourcePos
57getPos = Parser $ \pos s -> Success pos [] s pos
58
59setPos :: SourcePos -> Parser ()
60setPos pos = Parser $ \_ s -> Success pos [] s ()
61
62message :: Parser a -> String -> Parser a
63Parser m `message` msg =
64    Parser $ \pos s -> case m pos s of
65        success@(Success _ _ _ _) -> success
66        Failure pos' _            -> Failure pos' msg
67
68catchOutput_ :: Parser a -> Parser String
69catchOutput_ (Parser m) =
70    Parser $ \pos s -> case m pos s of
71        Success pos' out s' _ -> Success pos' [] s' out
72        Failure pos' msg      -> Failure pos' msg
73
74fakeOutput :: Parser a -> String -> Parser a
75Parser m `fakeOutput` out =
76    Parser $ \pos s -> case m pos s of
77        Success pos' _ s' a -> Success pos' out s' a
78        Failure pos' msg    -> Failure pos' msg
79
80lookAhead :: Parser String
81lookAhead = Parser $ \pos s -> Success pos [] s s
82
83satisfy :: (Char -> Bool) -> Parser Char
84satisfy p =
85    Parser $ \pos s -> case s of
86        c:cs | p c -> Success (updatePos pos c) [c] cs c
87        _          -> Failure pos "Bad character"
88
89satisfy_ :: (Char -> Bool) -> Parser ()
90satisfy_ p = satisfy p >> return ()
91
92char_ :: Char -> Parser ()
93char_ c = do
94    satisfy_ (== c) `message` (show c++" expected")
95
96anyChar_ :: Parser ()
97anyChar_ = do
98    satisfy_ (const True) `message` "Unexpected end of file"
99
100any2Chars_ :: Parser ()
101any2Chars_ = anyChar_ >> anyChar_
102
103any3Chars_ :: Parser ()
104any3Chars_ = anyChar_ >> anyChar_ >> anyChar_
105
106many :: Parser a -> Parser [a]
107many p = many1 p `mplus` return []
108
109many1 :: Parser a -> Parser [a]
110many1 p = liftM2 (:) p (many p)
111
112many_ :: Parser a -> Parser ()
113many_ p = many1_ p `mplus` return ()
114
115many1_ :: Parser a -> Parser ()
116many1_ p = p >> many_ p
117
118manySatisfy, manySatisfy1 :: (Char -> Bool) -> Parser String
119manySatisfy  = many  . satisfy
120manySatisfy1 = many1 . satisfy
121
122manySatisfy_, manySatisfy1_ :: (Char -> Bool) -> Parser ()
123manySatisfy_  = many_  . satisfy
124manySatisfy1_ = many1_ . satisfy
125
126------------------------------------------------------------------------
127-- Parser of hsc syntax.
128
129data Token
130    = Text    SourcePos String
131    | Special SourcePos String String
132
133tokenIsSpecial :: Token -> Bool
134tokenIsSpecial (Text    {}) = False
135tokenIsSpecial (Special {}) = True
136
137parser :: Parser [Token]
138parser = do
139    pos <- getPos
140    t <- catchOutput_ text
141    s <- lookAhead
142    rest <- case s of
143        []  -> return []
144        _:_ -> liftM2 (:) (special `fakeOutput` []) parser
145    return (if null t then rest else Text pos t : rest)
146
147text :: Parser ()
148text = do
149    s <- lookAhead
150    case s of
151        []        -> return ()
152        c:_ | isAlpha c || c == '_' -> do
153            anyChar_
154            manySatisfy_ (\c' -> isAlphaNum c' || c' == '_' || c' == '\'')
155            text
156        c:_ | isHsSymbol c -> do
157            symb <- catchOutput_ (manySatisfy_ isHsSymbol)
158            case symb of
159                "#" -> return ()
160                '-':'-':symb' | all (== '-') symb' -> do
161                    return () `fakeOutput` symb
162                    manySatisfy_ (/= '\n')
163                    text
164                _ -> do
165                    return () `fakeOutput` unescapeHashes symb
166                    text
167        '\"':_        -> do anyChar_; hsString '\"'; text
168        -- See Note [Single Quotes]
169        '\'':'\\':_ -> do anyChar_; hsString '\''; text -- Case 1
170        '\'':_:'\'':_ -> do any3Chars_; text -- Case 2
171        '\'':d:_ | isSpace d -> do -- Case 3
172          any2Chars_
173          manySatisfy_ (\c' -> isSpace c')
174          manySatisfy_ (\c' -> isAlphaNum c' || c' == '_' || c' == '\'')
175          text
176        '\'':_ -> do -- Case 4
177          anyChar_
178          manySatisfy_ (\c' -> isAlphaNum c' || c' == '_' || c' == '\'')
179          text
180        '{':'-':_ -> do
181          any2Chars_
182          linePragma `mplus` columnPragma `mplus` hsComment
183          text
184        _:_           -> do anyChar_; text
185
186{- Note [Single Quotes]
187~~~~~~~~~~~~~~~~~~~~~~~
188hsc2hs performs some tricks to figure out if we are looking at character
189literal or a promoted data constructor. In order, the cases considered are:
190
1911. quote-backslash: An escape sequence character literal. Since these
192   escape sequences have several different possible lengths, hsc2hs relies
193   on hsString to consume everything after this until another single quote
194   is encountered. See Note [Handling escaped characters].
1952. quote-any-quote: A character literal. Consumes the triplet.
1963. quote-space: Here, the order of the patterns becomes important. This
197   case and the case below handle promoted data constructors. This one
198   is to handle data constructor that end in a quote. They have special
199   syntax for promotion that requires adding a leading space. After an
200   arbitrary number of initial space characters, consume
201   all alphanumeric characters and quotes, considering them part of the
202   identifier.
2034. quote: If nothing else matched, we assume we are dealing with a normal
204   promoted data constructor. Consume all alphanumeric characters and
205   quotes, considering them part of the identifier.
206
207Here are some lines of code for which at one of the described cases
208would be matched at some point:
209
210    data Foo = Foo' | Bar
211
212    main :: IO ()
213    main = do
2141>    putChar '\NUL'
2152>    putChar 'x'
2163>    let y = Proxy :: Proxy ' Foo'
2174>    let x = Proxy :: Proxy 'Bar
218      pure ()
219-}
220
221hsString :: Char -> Parser ()
222hsString quote = do
223    s <- lookAhead
224    case s of
225        []               -> return ()
226        c:_ | c == quote -> anyChar_
227        -- See Note [Handling escaped characters]
228        '\\':c:_
229            | isSpace c  -> do
230                anyChar_
231                manySatisfy_ isSpace
232                char_ '\\' `mplus` return ()
233                hsString quote
234            | otherwise  -> do any2Chars_; hsString quote
235        _:_              -> do anyChar_; hsString quote
236
237{- Note [Handling escaped characters]
238~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
239There are several accepted escape codes for string and character literals.
240The function hsString handles all escape sequences that start with space
241in its first guard and all others in the otherwise guard. It only needs
242to consume two characters to handle these non-space-prefixed escape
243sequences correctly. Consider these examples:
244
245* Single Character: \t ->
246* Multiple Characters: \DEL -> EL
247* Decimal: \1789 -> 789
248* Hexadecimal: \xbeef -> beef
249* Octal: \o3576 -> 3576
250
251Crucially, none of these suffixes left after dropping the leading two
252characters ever contain single quote, double quote, or backslash.
253Consequently, these leftover characters will be matched by the
254final pattern match (_:_) in hsString since the call to any2Chars_
255is followed by recursing.
256-}
257
258hsComment :: Parser ()
259hsComment = do
260    s <- lookAhead
261    case s of
262        []        -> return ()
263        '-':'}':_ -> any2Chars_
264        '{':'-':_ -> do any2Chars_; hsComment; hsComment
265        _:_       -> do anyChar_; hsComment
266
267linePragma :: Parser ()
268linePragma = do
269    char_ '#'
270    manySatisfy_ isSpace
271    satisfy_ (\c -> c == 'L' || c == 'l')
272    satisfy_ (\c -> c == 'I' || c == 'i')
273    satisfy_ (\c -> c == 'N' || c == 'n')
274    satisfy_ (\c -> c == 'E' || c == 'e')
275    manySatisfy1_ isSpace
276    line <- liftM read $ manySatisfy1 isDigit
277    manySatisfy1_ isSpace
278    char_ '\"'
279    name <- manySatisfy (/= '\"')
280    char_ '\"'
281    manySatisfy_ isSpace
282    char_ '#'
283    char_ '-'
284    char_ '}'
285    setPos (SourcePos name (line - 1) 1)
286
287columnPragma :: Parser ()
288columnPragma = do
289    char_ '#'
290    manySatisfy_ isSpace
291    satisfy_ (\c -> c == 'C' || c == 'c')
292    satisfy_ (\c -> c == 'O' || c == 'o')
293    satisfy_ (\c -> c == 'L' || c == 'l')
294    satisfy_ (\c -> c == 'U' || c == 'u')
295    satisfy_ (\c -> c == 'M' || c == 'm')
296    satisfy_ (\c -> c == 'N' || c == 'n')
297    manySatisfy1_ isSpace
298    column <- liftM read $ manySatisfy1 isDigit
299    manySatisfy_ isSpace
300    char_ '#'
301    char_ '-'
302    char_ '}'
303    SourcePos name line _ <- getPos
304    setPos (SourcePos name line column)
305
306isHsSymbol :: Char -> Bool
307isHsSymbol '!' = True; isHsSymbol '#' = True; isHsSymbol '$'  = True
308isHsSymbol '%' = True; isHsSymbol '&' = True; isHsSymbol '*'  = True
309isHsSymbol '+' = True; isHsSymbol '.' = True; isHsSymbol '/'  = True
310isHsSymbol '<' = True; isHsSymbol '=' = True; isHsSymbol '>'  = True
311isHsSymbol '?' = True; isHsSymbol '@' = True; isHsSymbol '\\' = True
312isHsSymbol '^' = True; isHsSymbol '|' = True; isHsSymbol '-'  = True
313isHsSymbol '~' = True
314isHsSymbol _   = False
315
316unescapeHashes :: String -> String
317unescapeHashes []          = []
318unescapeHashes ('#':'#':s) = '#' : unescapeHashes s
319unescapeHashes (c:s)       = c   : unescapeHashes s
320
321lookAheadC :: Parser String
322lookAheadC = liftM joinLines lookAhead
323    where
324    joinLines []            = []
325    joinLines ('\\':'\n':s) = joinLines s
326    joinLines (c:s)         = c : joinLines s
327
328satisfyC :: (Char -> Bool) -> Parser Char
329satisfyC p = do
330    s <- lookAhead
331    case s of
332        '\\':'\n':_ -> do any2Chars_ `fakeOutput` []; satisfyC p
333        _           -> satisfy p
334
335satisfyC_ :: (Char -> Bool) -> Parser ()
336satisfyC_ p = satisfyC p >> return ()
337
338charC_ :: Char -> Parser ()
339charC_ c = satisfyC_ (== c) `message` (show c++" expected")
340
341anyCharC_ :: Parser ()
342anyCharC_ = satisfyC_ (const True) `message` "Unexpected end of file"
343
344any2CharsC_ :: Parser ()
345any2CharsC_ = anyCharC_ >> anyCharC_
346
347manySatisfyC :: (Char -> Bool) -> Parser String
348manySatisfyC = many . satisfyC
349
350manySatisfyC_ :: (Char -> Bool) -> Parser ()
351manySatisfyC_ = many_ . satisfyC
352
353special :: Parser Token
354special = do
355    manySatisfyC_ (\c -> isSpace c && c /= '\n')
356    s <- lookAheadC
357    case s of
358        '{':_ -> do
359            anyCharC_
360            manySatisfyC_ isSpace
361            sp <- keyArg (== '\n')
362            charC_ '}'
363            return sp
364        _ -> keyArg (const False)
365
366keyArg :: (Char -> Bool) -> Parser Token
367keyArg eol = do
368    pos <- getPos
369    key <- keyword `message` "hsc keyword or '{' expected"
370    manySatisfyC_ (\c' -> isSpace c' && c' /= '\n' || eol c')
371    arg <- catchOutput_ (argument eol)
372    return (Special pos key arg)
373
374keyword :: Parser String
375keyword = do
376    c  <- satisfyC (\c' -> isAlpha c' || c' == '_')
377    cs <- manySatisfyC (\c' -> isAlphaNum c' || c' == '_')
378    return (c:cs)
379
380argument :: (Char -> Bool) -> Parser ()
381argument eol = do
382    s <- lookAheadC
383    case s of
384        []          -> return ()
385        c:_ | eol c -> do anyCharC_;               argument eol
386        '\n':_      -> return ()
387        '\"':_      -> do anyCharC_; cString '\"'; argument eol
388        '\'':_      -> do anyCharC_; cString '\''; argument eol
389        '(':_       -> do anyCharC_; nested ')';   argument eol
390        ')':_       -> return ()
391        '/':'*':_   -> do any2CharsC_; cComment;   argument eol
392        '/':'/':_   -> do
393            any2CharsC_; manySatisfyC_ (/= '\n');  argument eol
394        '[':_       -> do anyCharC_; nested ']';   argument eol
395        ']':_       -> return ()
396        '{':_       -> do anyCharC_; nested '}';   argument eol
397        '}':_       -> return ()
398        _:_         -> do anyCharC_;               argument eol
399
400nested :: Char -> Parser ()
401nested c = do argument (== '\n'); charC_ c
402
403cComment :: Parser ()
404cComment = do
405    s <- lookAheadC
406    case s of
407        []        -> return ()
408        '*':'/':_ -> do any2CharsC_
409        _:_       -> do anyCharC_; cComment
410
411cString :: Char -> Parser ()
412cString quote = do
413    s <- lookAheadC
414    case s of
415        []               -> return ()
416        c:_ | c == quote -> anyCharC_
417        '\\':_:_         -> do any2CharsC_; cString quote
418        _:_              -> do anyCharC_; cString quote
419
420