1module PascalParser (
2    pascalUnit,
3    mainResultInit
4    )
5    where
6
7import Text.Parsec
8import Text.Parsec.Token
9import Text.Parsec.Expr
10import Control.Monad
11import Data.Maybe
12import Data.Char
13
14import PascalBasics
15import PascalUnitSyntaxTree
16
17
18mainResultInit :: Phrase
19mainResultInit = (\(Right a) -> a) $ parse phrase "<built-in>" "main:= 0;"
20
21knownTypes :: [String]
22knownTypes = ["shortstring", "ansistring", "char", "byte"]
23
24pascalUnit :: Parsec String u PascalUnit
25pascalUnit = do
26    comments
27    u <- choice [program, unit, systemUnit, redoUnit]
28    comments
29    return u
30
31iD :: Parsec String u Identifier
32iD = do
33    i <- identifier pas
34    comments
35    when (i == "not") $ unexpected "'not' used as an identifier"
36    return $ Identifier i BTUnknown
37
38unit :: Parsec String u PascalUnit
39unit = do
40    string' "unit" >> comments
41    name <- iD
42    void $ semi pas
43    comments
44    int <- interface
45    impl <- implementation
46    comments
47    return $ Unit name int impl Nothing Nothing
48
49
50reference :: Parsec String u Reference
51reference = term <?> "reference"
52    where
53    term = comments >> choice [
54        parens pas (liftM RefExpression expression >>= postfixes) >>= postfixes
55        , try $ typeCast >>= postfixes
56        , char' '@' >> liftM Address reference >>= postfixes
57        , liftM SimpleReference iD >>= postfixes
58        ] <?> "simple reference"
59
60    postfixes r = many postfix >>= return . foldl (flip ($)) r
61    postfix = choice [
62            parens pas (option [] parameters) >>= return . FunCall
63          , char' '^' >> return Dereference
64          , (brackets pas) (commaSep1 pas $ expression) >>= return . ArrayElement
65          , (char' '.' >> notFollowedBy (char' '.')) >> liftM (flip RecordField) reference
66        ]
67
68    typeCast = do
69        t <- choice $ map (\s -> try $ caseInsensitiveString s >>= \i -> notFollowedBy alphaNum >> return i) knownTypes
70        e <- parens pas expression
71        comments
72        return $ TypeCast (Identifier t BTUnknown) e
73
74varsDecl1, varsDecl :: Bool -> Parsec String u [TypeVarDeclaration]
75varsDecl1 = varsParser sepEndBy1
76varsDecl = varsParser sepEndBy
77
78varsParser ::
79    (Parsec String u TypeVarDeclaration
80        -> Parsec String u String
81        -> Parsec
82            String u [TypeVarDeclaration])
83    -> Bool
84    -> Parsec
85            String u [TypeVarDeclaration]
86varsParser m endsWithSemi = do
87    vs <- m (aVarDecl endsWithSemi) (semi pas)
88    return vs
89
90aVarDecl :: Bool -> Parsec String u TypeVarDeclaration
91aVarDecl endsWithSemi = do
92    isVar <- liftM (\i -> i == Just "var" || i == Just "out") $
93        if not endsWithSemi then
94            optionMaybe $ choice [
95                try $ string "var"
96                , try $ string "const"
97                , try $ string "out"
98                ]
99            else
100                return Nothing
101    comments
102    ids <- do
103        i <- (commaSep1 pas) $ (try iD <?> "variable declaration")
104        char' ':'
105        return i
106    comments
107    t <- typeDecl <?> "variable type declaration"
108    comments
109    initialization <- option Nothing $ do
110        char' '='
111        comments
112        e <- initExpression
113        comments
114        return (Just e)
115    return $ VarDeclaration isVar False (ids, t) initialization
116
117constsDecl :: Parsec String u [TypeVarDeclaration]
118constsDecl = do
119    vs <- many1 (try (aConstDecl >>= \i -> semi pas >> return i) >>= \i -> comments >> return i)
120    comments
121    return vs
122    where
123    aConstDecl = do
124        comments
125        i <- iD
126        t <- optionMaybe $ do
127            char' ':'
128            comments
129            t <- typeDecl
130            comments
131            return t
132        char' '='
133        comments
134        e <- initExpression
135        comments
136        return $ VarDeclaration False (isNothing t) ([i], fromMaybe (DeriveType e) t) (Just e)
137
138typeDecl :: Parsec String u TypeDecl
139typeDecl = choice [
140    char' '^' >> typeDecl >>= return . PointerTo
141    , try (string' "shortstring") >> return String
142    , try (string' "string") >> optionMaybe (brackets pas $ integer pas) >> return String
143    , try (string' "ansistring") >> optionMaybe (brackets pas $ integer pas) >> return AString
144    , arrayDecl
145    , recordDecl
146    , setDecl
147    , functionType
148    , sequenceDecl >>= return . Sequence
149    , try iD >>= return . SimpleType
150    , rangeDecl >>= return . RangeType
151    ] <?> "type declaration"
152    where
153    arrayDecl = do
154        try $ do
155            optional $ (try $ string' "packed") >> comments
156            string' "array"
157        comments
158        r <- option [] $ do
159            char' '['
160            r <- commaSep pas rangeDecl
161            char' ']'
162            comments
163            return r
164        string' "of"
165        comments
166        t <- typeDecl
167        if null r then
168            return $ ArrayDecl Nothing t
169            else
170            return $ foldr (\a b -> ArrayDecl (Just a) b) (ArrayDecl (Just $ last r) t) (init r)
171    recordDecl = do
172        try $ do
173            optional $ (try $ string' "packed") >> comments
174            string' "record"
175        comments
176        vs <- varsDecl True
177        union <- optionMaybe $ do
178            string' "case"
179            comments
180            void $ iD
181            comments
182            string' "of"
183            comments
184            many unionCase
185        string' "end"
186        return $ RecordType vs union
187    setDecl = do
188        try $ string' "set" >> void space
189        comments
190        string' "of"
191        comments
192        liftM Set typeDecl
193    unionCase = do
194        void $ try $ commaSep pas $ (void $ iD) <|> (void $ integer pas)
195        char' ':'
196        comments
197        u <- parens pas $ varsDecl True
198        char' ';'
199        comments
200        return u
201    sequenceDecl = (parens pas) $ (commaSep pas) (iD >>= \i -> optional (spaces >> char' '=' >> spaces >> integer pas) >> return i)
202    functionType = do
203        fp <- try (string "function") <|> try (string "procedure")
204        comments
205        vs <- option [] $ parens pas $ varsDecl False
206        comments
207        ret <- if (fp == "function") then do
208            char' ':'
209            comments
210            ret <- typeDecl
211            comments
212            return ret
213            else
214            return VoidType
215        optional $ try $ char' ';' >> comments >> string' "cdecl"
216        comments
217        return $ FunctionType ret vs
218
219typesDecl :: Parsec String u [TypeVarDeclaration]
220typesDecl = many (aTypeDecl >>= \t -> comments >> return t)
221    where
222    aTypeDecl = do
223        i <- try $ do
224            i <- iD <?> "type declaration"
225            comments
226            char' '='
227            return i
228        comments
229        t <- typeDecl
230        comments
231        void $ semi pas
232        comments
233        return $ TypeDeclaration i t
234
235rangeDecl :: Parsec String u Range
236rangeDecl = choice [
237    try $ rangeft
238    , iD >>= return . Range
239    ] <?> "range declaration"
240    where
241    rangeft = do
242        e1 <- initExpression
243        string' ".."
244        e2 <- initExpression
245        return $ RangeFromTo e1 e2
246
247typeVarDeclaration :: Bool -> Parsec String u [TypeVarDeclaration]
248typeVarDeclaration isImpl = (liftM concat . many . choice) [
249    varSection,
250    constSection,
251    typeSection,
252    funcDecl,
253    operatorDecl
254    ]
255    where
256
257    fixInit v = concat $ map (\x -> case x of
258                    VarDeclaration a b (ids, t) c ->
259                        let typeId = (Identifier ((\(Identifier i _) -> i) (head ids) ++ "_tt") BTUnknown) in
260                        let res =  [TypeDeclaration typeId t, VarDeclaration a b (ids, (SimpleType typeId)) c] in
261                        case t of
262                            RecordType _ _ -> res -- create a separated type declaration
263                            ArrayDecl _ _ -> res
264                            _ -> [x]
265                    _ -> error ("checkInit:\n" ++ (show v))) v
266
267    varSection = do
268        try $ string' "var"
269        comments
270        v <- varsDecl1 True <?> "variable declaration"
271        comments
272        return $ fixInit v
273
274    constSection = do
275        try $ string' "const"
276        comments
277        c <- constsDecl <?> "const declaration"
278        comments
279        return $ fixInit c
280
281    typeSection = do
282        try $ string' "type"
283        comments
284        t <- typesDecl <?> "type declaration"
285        comments
286        return t
287
288    operatorDecl = do
289        try $ string' "operator"
290        comments
291        i <- manyTill anyChar space
292        comments
293        vs <- parens pas $ varsDecl False
294        comments
295        rid <- iD
296        comments
297        char' ':'
298        comments
299        ret <- typeDecl
300        comments
301        -- return ret
302        -- ^^^^^^^^^^ wth was this???
303        char' ';'
304        comments
305        forward <- liftM isJust $ optionMaybe (try (string' "forward;") >> comments)
306        inline <- liftM (any (== "inline;")) $ many functionDecorator
307        b <- if isImpl && (not forward) then
308                liftM Just functionBody
309                else
310                return Nothing
311        return $ [OperatorDeclaration i rid inline ret vs b]
312
313
314    funcDecl = do
315        fp <- try (string "function") <|> try (string "procedure")
316        comments
317        i <- iD
318        vs <- option [] $ parens pas $ varsDecl False
319        comments
320        ret <- if (fp == "function") then do
321            char' ':'
322            comments
323            ret <- typeDecl
324            comments
325            return ret
326            else
327            return VoidType
328        char' ';'
329        comments
330        forward <- liftM isJust $ optionMaybe (try (string "forward;") >> comments)
331        decorators <- many functionDecorator
332        let inline = any (== "inline;") decorators
333            overload = any (== "overload;") decorators
334            external = any (== "external;") decorators
335        -- TODO: don't mangle external functions names (and remove fpcrtl.h defines hacks)
336        b <- if isImpl && (not forward) && (not external) then
337                liftM Just functionBody
338                else
339                return Nothing
340        return $ [FunctionDeclaration i inline overload external ret vs b]
341
342    functionDecorator = do
343        d <- choice [
344            try $ string "inline;"
345            , try $ caseInsensitiveString "cdecl;"
346            , try $ string "overload;"
347            , try $ string "export;"
348            , try $ string "varargs;"
349            , try (string' "external") >> comments >> iD >> comments >>
350                optional (string' "name" >> comments >> stringLiteral pas) >> string' ";" >> return "external;"
351            ]
352        comments
353        return d
354
355
356program :: Parsec String u PascalUnit
357program = do
358    string' "program"
359    comments
360    name <- iD
361    (char' ';')
362    comments
363    comments
364    u <- uses
365    comments
366    tv <- typeVarDeclaration True
367    comments
368    p <- phrase
369    comments
370    char' '.'
371    comments
372    return $ Program name (Implementation u (TypesAndVars tv)) p
373
374interface :: Parsec String u Interface
375interface = do
376    string' "interface"
377    comments
378    u <- uses
379    comments
380    tv <- typeVarDeclaration False
381    comments
382    return $ Interface u (TypesAndVars tv)
383
384implementation :: Parsec String u Implementation
385implementation = do
386    string' "implementation"
387    comments
388    u <- uses
389    comments
390    tv <- typeVarDeclaration True
391    string' "end."
392    comments
393    return $ Implementation u (TypesAndVars tv)
394
395expression :: Parsec String u Expression
396expression = do
397    buildExpressionParser table term <?> "expression"
398    where
399    term = comments >> choice [
400        builtInFunction expression >>= \(n, e) -> return $ BuiltInFunCall e (SimpleReference (Identifier n BTUnknown))
401        , try (parens pas expression >>= \e -> notFollowedBy (comments >> char' '.') >> return e)
402        , brackets pas (commaSep pas iD) >>= return . SetExpression
403        , try $ integer pas >>= \i -> notFollowedBy (char' '.') >> (return . NumberLiteral . show) i
404        , float pas >>= return . FloatLiteral . show
405        , try $ integer pas >>= return . NumberLiteral . show
406        , try (string' "_S" >> stringLiteral pas) >>= return . StringLiteral
407        , try (string' "_P" >> stringLiteral pas) >>= return . PCharLiteral
408        , stringLiteral pas >>= return . strOrChar
409        , try (string' "#$") >> many hexDigit >>= \c -> comments >> return (HexCharCode c)
410        , char' '#' >> many digit >>= \c -> comments >> return (CharCode c)
411        , char' '$' >> many hexDigit >>=  \h -> comments >> return (HexNumber h)
412        --, char' '-' >> expression >>= return . PrefixOp "-"
413        , char' '-' >> reference >>= return . PrefixOp "-" . Reference
414        , (try $ string' "not" >> notFollowedBy comments) >> unexpected "'not'"
415        , try $ string' "nil" >> return Null
416        , reference >>= return . Reference
417        ] <?> "simple expression"
418
419    table = [
420          [  Prefix (reservedOp pas "not">> return (PrefixOp "not"))
421           , Prefix (try (char' '-') >> return (PrefixOp "-"))]
422           ,
423          [  Infix (char' '*' >> return (BinOp "*")) AssocLeft
424           , Infix (char' '/' >> return (BinOp "/")) AssocLeft
425           , Infix (try (string' "div") >> return (BinOp "div")) AssocLeft
426           , Infix (try (string' "mod") >> return (BinOp "mod")) AssocLeft
427           , Infix (try (string' "in") >> return (BinOp "in")) AssocNone
428           , Infix (try $ string' "and" >> return (BinOp "and")) AssocLeft
429           , Infix (try $ string' "shl" >> return (BinOp "shl")) AssocLeft
430           , Infix (try $ string' "shr" >> return (BinOp "shr")) AssocLeft
431          ]
432        , [  Infix (char' '+' >> return (BinOp "+")) AssocLeft
433           , Infix (char' '-' >> return (BinOp "-")) AssocLeft
434           , Infix (try $ string' "or" >> return (BinOp "or")) AssocLeft
435           , Infix (try $ string' "xor" >> return (BinOp "xor")) AssocLeft
436          ]
437        , [  Infix (try (string' "<>") >> return (BinOp "<>")) AssocNone
438           , Infix (try (string' "<=") >> return (BinOp "<=")) AssocNone
439           , Infix (try (string' ">=") >> return (BinOp ">=")) AssocNone
440           , Infix (char' '<' >> return (BinOp "<")) AssocNone
441           , Infix (char' '>' >> return (BinOp ">")) AssocNone
442          ]
443        {-, [  Infix (try $ string' "shl" >> return (BinOp "shl")) AssocNone
444             , Infix (try $ string' "shr" >> return (BinOp "shr")) AssocNone
445          ]
446        , [
447             Infix (try $ string' "or" >> return (BinOp "or")) AssocLeft
448           , Infix (try $ string' "xor" >> return (BinOp "xor")) AssocLeft
449          ]-}
450        , [
451             Infix (char' '=' >> return (BinOp "=")) AssocNone
452          ]
453        ]
454    strOrChar [a] = CharCode . show . ord $ a
455    strOrChar a = StringLiteral a
456
457phrasesBlock :: Parsec String u Phrase
458phrasesBlock = do
459    try $ string' "begin"
460    comments
461    p <- manyTill phrase (try $ string' "end" >> notFollowedBy alphaNum)
462    comments
463    return $ Phrases p
464
465phrase :: Parsec String u Phrase
466phrase = do
467    o <- choice [
468        phrasesBlock
469        , ifBlock
470        , whileCycle
471        , repeatCycle
472        , switchCase
473        , withBlock
474        , forCycle
475        , (try $ reference >>= \r -> string' ":=" >> return r) >>= \r -> comments >> expression >>= return . Assignment r
476        , builtInFunction expression >>= \(n, e) -> return $ BuiltInFunctionCall e (SimpleReference (Identifier n BTUnknown))
477        , procCall
478        , char' ';' >> comments >> return NOP
479        ]
480    optional $ char' ';'
481    comments
482    return o
483
484ifBlock :: Parsec String u Phrase
485ifBlock = do
486    try $ string "if" >> notFollowedBy (alphaNum <|> char '_')
487    comments
488    e <- expression
489    comments
490    string' "then"
491    comments
492    o1 <- phrase
493    comments
494    o2 <- optionMaybe $ do
495        try $ string' "else" >> void space
496        comments
497        o <- option NOP phrase
498        comments
499        return o
500    return $ IfThenElse e o1 o2
501
502whileCycle :: Parsec String u Phrase
503whileCycle = do
504    try $ string' "while"
505    comments
506    e <- expression
507    comments
508    string' "do"
509    comments
510    o <- phrase
511    return $ WhileCycle e o
512
513withBlock :: Parsec String u Phrase
514withBlock = do
515    try $ string' "with" >> void space
516    comments
517    rs <- (commaSep1 pas) reference
518    comments
519    string' "do"
520    comments
521    o <- phrase
522    return $ foldr WithBlock o rs
523
524repeatCycle :: Parsec String u Phrase
525repeatCycle = do
526    try $ string' "repeat" >> void space
527    comments
528    o <- many phrase
529    string' "until"
530    comments
531    e <- expression
532    comments
533    return $ RepeatCycle e o
534
535forCycle :: Parsec String u Phrase
536forCycle = do
537    try $ string' "for" >> void space
538    comments
539    i <- iD
540    comments
541    string' ":="
542    comments
543    e1 <- expression
544    comments
545    up <- liftM (== Just "to") $
546            optionMaybe $ choice [
547                try $ string "to"
548                , try $ string "downto"
549                ]
550    --choice [string' "to", string' "downto"]
551    comments
552    e2 <- expression
553    comments
554    string' "do"
555    comments
556    p <- phrase
557    comments
558    return $ ForCycle i e1 e2 p up
559
560switchCase :: Parsec String u Phrase
561switchCase = do
562    try $ string' "case"
563    comments
564    e <- expression
565    comments
566    string' "of"
567    comments
568    cs <- many1 aCase
569    o2 <- optionMaybe $ do
570        try $ string' "else" >> notFollowedBy alphaNum
571        comments
572        o <- many phrase
573        comments
574        return o
575    string' "end"
576    comments
577    return $ SwitchCase e cs o2
578    where
579    aCase = do
580        e <- (commaSep pas) $ (liftM InitRange rangeDecl <|> initExpression)
581        comments
582        char' ':'
583        comments
584        p <- phrase
585        comments
586        return (e, p)
587
588procCall :: Parsec String u Phrase
589procCall = do
590    r <- reference
591    p <- option [] $ (parens pas) parameters
592    return $ ProcCall r p
593
594parameters :: Parsec String u [Expression]
595parameters = (commaSep pas) expression <?> "parameters"
596
597functionBody :: Parsec String u (TypesAndVars, Phrase)
598functionBody = do
599    tv <- typeVarDeclaration True
600    comments
601    p <- phrasesBlock
602    char' ';'
603    comments
604    return (TypesAndVars tv, p)
605
606uses :: Parsec String u Uses
607uses = liftM Uses (option [] u)
608    where
609        u = do
610            string' "uses"
611            comments
612            ulist <- (iD >>= \i -> comments >> return i) `sepBy1` (char' ',' >> comments)
613            char' ';'
614            comments
615            return ulist
616
617initExpression :: Parsec String u InitExpression
618initExpression = buildExpressionParser table term <?> "initialization expression"
619    where
620    term = comments >> choice [
621        liftM (uncurry BuiltInFunction) $ builtInFunction initExpression
622        , try $ brackets pas (commaSep pas $ initExpression) >>= return . InitSet
623        , try $ parens pas (commaSep pas $ initExpression) >>= \ia -> when ((notRecord $ head ia) && (null $ tail ia)) mzero >> return (InitArray ia)
624        , try $ parens pas (sepEndBy recField (char' ';' >> comments)) >>= return . InitRecord
625        , parens pas initExpression
626        , try $ integer pas >>= \i -> notFollowedBy (char' '.') >> (return . InitNumber . show) i
627        , try $ float pas >>= return . InitFloat . show
628        , try $ integer pas >>= return . InitNumber . show
629        , try (string' "_S" >> stringLiteral pas) >>= return . InitString
630        , try (string' "_P" >> stringLiteral pas) >>= return . InitPChar
631        , stringLiteral pas >>= return . InitString
632        , char' '#' >> many digit >>= \c -> comments >> return (InitChar c)
633        , char' '$' >> many hexDigit >>= \h -> comments >> return (InitHexNumber h)
634        , char' '@' >> initExpression >>= \c -> comments >> return (InitAddress c)
635        , try $ string' "nil" >> return InitNull
636        , try itypeCast
637        , iD >>= return . InitReference
638        ]
639
640    notRecord (InitRecord _) = False
641    notRecord _ = True
642
643    recField = do
644        i <- iD
645        spaces
646        char' ':'
647        spaces
648        e <- initExpression
649        spaces
650        return (i ,e)
651
652    table = [
653          [
654             Prefix (char' '-' >> return (InitPrefixOp "-"))
655            ,Prefix (try (string' "not") >> return (InitPrefixOp "not"))
656          ]
657        , [  Infix (char' '*' >> return (InitBinOp "*")) AssocLeft
658           , Infix (char' '/' >> return (InitBinOp "/")) AssocLeft
659           , Infix (try (string' "div") >> return (InitBinOp "div")) AssocLeft
660           , Infix (try (string' "mod") >> return (InitBinOp "mod")) AssocLeft
661           , Infix (try $ string' "and" >> return (InitBinOp "and")) AssocLeft
662           , Infix (try $ string' "shl" >> return (InitBinOp "shl")) AssocNone
663           , Infix (try $ string' "shr" >> return (InitBinOp "shr")) AssocNone
664          ]
665        , [  Infix (char' '+' >> return (InitBinOp "+")) AssocLeft
666           , Infix (char' '-' >> return (InitBinOp "-")) AssocLeft
667           , Infix (try $ string' "or" >> return (InitBinOp "or")) AssocLeft
668           , Infix (try $ string' "xor" >> return (InitBinOp "xor")) AssocLeft
669          ]
670        , [  Infix (try (string' "<>") >> return (InitBinOp "<>")) AssocNone
671           , Infix (try (string' "<=") >> return (InitBinOp "<=")) AssocNone
672           , Infix (try (string' ">=") >> return (InitBinOp ">=")) AssocNone
673           , Infix (char' '<' >> return (InitBinOp "<")) AssocNone
674           , Infix (char' '>' >> return (InitBinOp ">")) AssocNone
675           , Infix (char' '=' >> return (InitBinOp "=")) AssocNone
676          ]
677        {--, [  Infix (try $ string' "and" >> return (InitBinOp "and")) AssocLeft
678           , Infix (try $ string' "or" >> return (InitBinOp "or")) AssocLeft
679           , Infix (try $ string' "xor" >> return (InitBinOp "xor")) AssocLeft
680          ]
681        , [  Infix (try $ string' "shl" >> return (InitBinOp "shl")) AssocNone
682           , Infix (try $ string' "shr" >> return (InitBinOp "shr")) AssocNone
683          ]--}
684        --, [Prefix (try (string' "not") >> return (InitPrefixOp "not"))]
685        ]
686
687    itypeCast = do
688        --t <- choice $ map (\s -> try $ caseInsensitiveString s >>= \i -> notFollowedBy alphaNum >> return i) knownTypes
689        t <- iD
690        i <- parens pas initExpression
691        comments
692        return $ InitTypeCast t i
693
694builtInFunction :: Parsec String u a -> Parsec String u (String, [a])
695builtInFunction e = do
696    name <- choice $ map (\s -> try $ caseInsensitiveString s >>= \i -> notFollowedBy alphaNum >> return i) builtin
697    spaces
698    exprs <- option [] $ parens pas $ option [] $ commaSep1 pas $ e
699    spaces
700    return (name, exprs)
701
702systemUnit :: Parsec String u PascalUnit
703systemUnit = do
704    string' "system;"
705    comments
706    string' "type"
707    comments
708    t <- typesDecl
709    string' "var"
710    v <- varsDecl True
711    return $ System (t ++ v)
712
713redoUnit :: Parsec String u PascalUnit
714redoUnit = do
715    string' "redo;"
716    comments
717    string' "type"
718    comments
719    t <- typesDecl
720    string' "var"
721    v <- varsDecl True
722    return $ Redo (t ++ v)
723
724