1#
2# lua_parser.py
3#
4# A simple parser for the Lua language.
5#
6# Copyright 2020, Paul McGuire
7#
8
9"""
10from https://www.lua.org/manual/5.1/manual.html#8
11
12    chunk ::= {stat [';']} [laststat [';']]
13
14    block ::= chunk
15
16    stat ::=  varlist '=' explist |
17         functioncall |
18         do block end |
19         while exp do block end |
20         repeat block until exp |
21         if exp then block {elseif exp then block} [else block] end |
22         for Name '=' exp ',' exp [',' exp] do block end |
23         for namelist in explist do block end |
24         function funcname funcbody |
25         local function Name funcbody |
26         local namelist ['=' explist]
27
28    laststat ::= return [explist] | break
29
30    funcname ::= Name {'.' Name} [':' Name]
31
32    varlist ::= var {',' var}
33
34    var ::=  Name | prefixexp '[' exp ']' | prefixexp '.' Name
35
36    namelist ::= Name {',' Name}
37
38    explist ::= {exp ','} exp
39
40    exp ::=  nil | false | true | Number | String | '...' | function |
41         prefixexp | tableconstructor | exp binop exp | unop exp
42
43    prefixexp ::= var | functioncall | '(' exp ')'
44
45    functioncall ::=  prefixexp args | prefixexp ':' Name args
46
47    args ::=  '(' [explist] ')' | tableconstructor | String
48
49    function ::= function funcbody
50
51    funcbody ::= '(' [parlist] ')' block end
52
53    parlist ::= namelist [',' '...'] | '...'
54
55    tableconstructor ::= '{' [fieldlist] '}'
56
57    fieldlist ::= field {fieldsep field} [fieldsep]
58
59    field ::= '[' exp ']' '=' exp | Name '=' exp | exp
60
61    fieldsep ::= ',' | ';'
62
63    binop ::= '+' | '-' | '*' | '/' | '^' | '%' | '..' |
64         '<' | '<=' | '>' | '>=' | '==' | '~=' |
65         and | or
66
67    unop ::= '-' | not | '#'
68
69operator precedence:
70
71     or
72     and
73     <     >     <=    >=    ~=    ==
74     |
75     ~
76     &
77     <<    >>
78     ..
79     +     -
80     *     /     //    %
81     unary operators (not   #     -     ~)
82     ^
83
84"""
85import pyparsing as pp
86
87ppc = pp.pyparsing_common
88pp.ParserElement.enablePackrat()
89
90LBRACK, RBRACK, LBRACE, RBRACE, LPAR, RPAR, EQ, COMMA, SEMI, COLON = map(
91    pp.Suppress, "[]{}()=,;:"
92)
93OPT_SEMI = pp.Optional(SEMI).suppress()
94ELLIPSIS = pp.Literal("...")
95keywords = {
96    k.upper(): pp.Keyword(k)
97    for k in """\
98    return break do end while if then elseif else for in function local repeat until nil false true and or not
99    """.split()
100}
101vars().update(keywords)
102any_keyword = pp.MatchFirst(keywords.values()).setName("<keyword>")
103
104comment_intro = pp.Literal("--")
105short_comment = comment_intro + pp.restOfLine
106long_comment = comment_intro + LBRACK + ... + RBRACK
107lua_comment = long_comment | short_comment
108
109# must use negative lookahead to ensure we don't parse a keyword as an identifier
110ident = ~any_keyword + ppc.identifier
111
112name = pp.delimitedList(ident, delim=".", combine=True)
113
114namelist = pp.delimitedList(name)
115number = ppc.number
116
117# does not parse levels
118multiline_string = pp.QuotedString("[[", endQuoteChar="]]", multiline=True)
119string = pp.QuotedString("'") | pp.QuotedString('"') | multiline_string
120
121exp = pp.Forward()
122
123#     explist1 ::= {exp ','} exp
124explist1 = pp.delimitedList(exp)
125
126stat = pp.Forward()
127
128#    laststat ::= return [explist1]  |  break
129laststat = pp.Group(RETURN + explist1) | BREAK
130
131#    block ::= {stat [';']} [laststat[';']]
132block = pp.Group(stat + OPT_SEMI)[1, ...] + pp.Optional(laststat + OPT_SEMI)
133
134#    field ::= '[' exp ']' '=' exp  |  Name '=' exp  |  exp
135field = pp.Group(
136    LBRACK + exp + RBRACK + EQ + pp.Group(exp) | name + EQ + pp.Group(exp) | exp
137)
138
139#    fieldsep ::= ','  |  ';'
140fieldsep = COMMA | SEMI
141
142#    fieldlist ::= field {fieldsep field} [fieldsep]
143field_list = pp.delimitedList(field, delim=fieldsep) + pp.Optional(fieldsep)
144
145#    tableconstructor ::= '{' [fieldlist] '}'
146tableconstructor = pp.Group(LBRACE + pp.Optional(field_list) + RBRACE)
147
148#    parlist1 ::= namelist [',' '...']  |  '...'
149parlist = namelist + pp.Optional(COMMA + ELLIPSIS) | ELLIPSIS
150
151#    funcname ::= Name {'.' Name} [':' Name]
152funcname = pp.Group(name + COLON + name) | name
153
154#    function ::= function funcbody
155#    funcbody ::= '(' [parlist1] ')' block end
156funcbody = pp.Group(LPAR + parlist + RPAR) + block + END
157function = FUNCTION + funcbody
158
159#    args ::=  '(' [explist1] ')'  |  tableconstructor  |  String
160args = LPAR + pp.Optional(explist1) + RPAR | tableconstructor | string
161
162# this portion of the spec is left-recursive, must break LR loop
163#    varlist1 ::= var {',' var}
164#    var ::=  Name  |  prefixexp '[' exp ']'  |  prefixexp '.' Name
165#    prefixexp ::= var  |  functioncall  |  '(' exp ')'
166#    functioncall ::=  prefixexp args  |  prefixexp ':' Name args
167
168prefixexp = name | LPAR + exp + RPAR
169functioncall = prefixexp + args | prefixexp + COLON + name + args
170var = pp.Forward()
171var_atom = functioncall | name | LPAR + exp + RPAR
172index_ref = pp.Group(LBRACK + exp + RBRACK)
173var <<= pp.delimitedList(pp.Group(var_atom + index_ref) | var_atom, delim=".")
174
175varlist1 = pp.delimitedList(var)
176
177# exp ::=  nil  |  false  |  true  |  Number  |  String  |  '...'  |
178#              function  |  prefixexp  |  tableconstructor
179exp_atom = (
180    NIL
181    | FALSE
182    | TRUE
183    | number
184    | string
185    | ELLIPSIS
186    | functioncall
187    | var  # prefixexp
188    | tableconstructor
189)
190
191# precedence of operations from https://www.lua.org/manual/5.3/manual.html#3.4.8
192exp <<= pp.infixNotation(
193    exp_atom,
194    [
195        ("^", 2, pp.opAssoc.LEFT),
196        (NOT | pp.oneOf("# - ~"), 1, pp.opAssoc.RIGHT),
197        (pp.oneOf("* / // %"), 2, pp.opAssoc.LEFT),
198        (pp.oneOf("+ -"), 2, pp.opAssoc.LEFT),
199        ("..", 2, pp.opAssoc.LEFT),
200        (pp.oneOf("<< >>"), 2, pp.opAssoc.LEFT),
201        ("&", 2, pp.opAssoc.LEFT),
202        ("~", 2, pp.opAssoc.LEFT),
203        ("|", 2, pp.opAssoc.LEFT),
204        (pp.oneOf("< > <= >= ~= =="), 2, pp.opAssoc.LEFT),
205        (AND, 2, pp.opAssoc.LEFT),
206        (OR, 2, pp.opAssoc.LEFT),
207    ],
208)
209
210assignment_stat = pp.Optional(LOCAL) + varlist1 + EQ + explist1
211func_call_stat = pp.Optional(LOCAL) + functioncall
212do_stat = DO + block + END
213while_stat = WHILE + exp + block + END
214repeat_stat = REPEAT + block + UNTIL + exp
215for_loop_stat = (
216    FOR + name + EQ + exp + COMMA + exp + pp.Optional(COMMA + exp) + DO + block + END
217)
218for_seq_stat = FOR + namelist + IN + explist1 + DO + block + END
219if_stat = (
220    IF
221    + exp
222    + THEN
223    + block
224    + pp.Group(ELSEIF + exp + THEN + block)[...]
225    + pp.Optional(pp.Group(ELSE + block))
226    + END
227)
228function_def = pp.Optional(LOCAL) + FUNCTION + funcname + funcbody
229
230for var_name in """
231        assignment_stat
232        func_call_stat
233        do_stat
234        while_stat
235        repeat_stat
236        for_loop_stat
237        for_seq_stat
238        if_stat
239        function_def
240        """.split():
241    vars()[var_name].setName(var_name)
242
243#    stat ::=  varlist1 '=' explist1  |
244#              functioncall  |
245#              do block end  |
246#              while exp do block end  |
247#              repeat block until exp  |
248#              if exp then block {elseif exp then block} [else block] end  |
249#              for Name '=' exp ',' exp [',' exp] do block end  |
250#              for namelist in explist1 do block end  |
251#              function funcname funcbody  |
252#              local function Name funcbody  |
253#              local namelist ['=' explist1]
254stat <<= pp.Group(
255    assignment_stat
256    | do_stat
257    | while_stat
258    | repeat_stat
259    | for_loop_stat
260    | for_seq_stat
261    | func_call_stat
262    | if_stat
263    | function_def
264)
265
266lua_script = stat[...]
267
268# ignore comments
269lua_script.ignore(lua_comment)
270
271if __name__ == "__main__":
272
273    sample = r"""
274    function test(x)
275        local t = {foo=1, bar=2, arg=x}
276        n = 0
277        if t['foo'] then
278            n = n + 1
279        end
280        if 10 > 8 then
281            n = n + 2
282        end
283        if (10 > 8) then
284            n = n + 2
285        end
286    end
287    """
288
289    try:
290        result = lua_script.parseString(sample)
291        result.pprint()
292    except pp.ParseException as pe:
293        print(pe.explain())
294