1local lexer = require "luacheck.lexer"
2local utils = require "luacheck.utils"
3
4local parser = {}
5
6-- A table with range info, or simply range, has `line`, `offset`, and `end_offset` fields.
7-- `line` is the line of the first character.
8-- Parser state table has range info for the current token, and all AST
9-- node tables have range info for themself, including parens around expressions
10-- that are otherwise not reflected in the AST structure.
11
12parser.SyntaxError = utils.class()
13
14function parser.SyntaxError:__init(msg, range, prev_range)
15   self.msg = msg
16   self.line = range.line
17   self.offset = range.offset
18   self.end_offset = range.end_offset
19
20   if prev_range then
21      self.prev_line = prev_range.line
22      self.prev_offset = prev_range.offset
23      self.prev_end_offset = prev_range.end_offset
24   end
25end
26
27function parser.syntax_error(msg, range, prev_range)
28   error(parser.SyntaxError(msg, range, prev_range), 0)
29end
30
31local function mark_line_endings(state, token_type)
32   for line = state.line, state.lexer.line - 1 do
33      state.line_endings[line] = token_type
34   end
35end
36
37local function skip_token(state)
38   while true do
39      local token, token_value, line, offset, error_end_offset = lexer.next_token(state.lexer)
40      state.token = token
41      state.token_value = token_value
42      state.line = line
43      state.offset = offset
44      state.end_offset = error_end_offset or (state.lexer.offset - 1)
45
46      if not token then
47         parser.syntax_error(token_value, state)
48      end
49
50      if token == "short_comment" then
51         state.comments[#state.comments + 1] = {
52            contents = token_value,
53            line = line,
54            offset = offset,
55            end_offset = state.end_offset
56         }
57
58         state.line_endings[line] = "comment"
59      elseif token == "long_comment" then
60         mark_line_endings(state, "comment")
61      else
62         if token ~= "eof" then
63            mark_line_endings(state, "string")
64            state.code_lines[line] = true
65            state.code_lines[state.lexer.line] = true
66         end
67
68         return
69      end
70   end
71end
72
73local function token_name(token)
74   return token == "name" and "identifier" or (token == "eof" and "<eof>" or ("'" .. token .. "'"))
75end
76
77local function parse_error(state, msg, prev_range, token_prefix, message_suffix)
78   local token_repr
79
80   if state.token == "eof" then
81      token_repr = "<eof>"
82   else
83      token_repr = lexer.get_quoted_substring_or_line(state.lexer, state.line, state.offset, state.end_offset)
84   end
85
86   if token_prefix then
87      token_repr = token_prefix .. " " .. token_repr
88   end
89
90   msg = msg .. " near " .. token_repr
91
92   if message_suffix then
93      msg = msg .. " " .. message_suffix
94   end
95
96   parser.syntax_error(msg, state, prev_range)
97end
98
99local function check_token(state, token)
100   if state.token ~= token then
101      parse_error(state, "expected " .. token_name(token))
102   end
103end
104
105local function check_and_skip_token(state, token)
106   check_token(state, token)
107   skip_token(state)
108end
109
110local function test_and_skip_token(state, token)
111   if state.token == token then
112      skip_token(state)
113      return true
114   end
115end
116
117local function copy_range(range)
118   return {
119      line = range.line,
120      offset = range.offset,
121      end_offset = range.end_offset
122   }
123end
124
125local new_state
126local parse_block
127local missing_closing_token_error
128
129-- Attempt to guess a better location for missing `end` and `until` errors (usually they uselessly point to eof).
130-- Guessed error token should be selected in such a way that inserting previously missing closing token
131-- in front of it should fix the error or at least move its opening token forward.
132-- The idea is to track the stack of opening tokens and their indentations.
133-- For the first statement or closing token with the same or smaller indentation than the opening token
134-- on the top of the stack:
135-- * If it has the same indentation but is not the appropriate closing token for the opening one, pick it
136--   as the guessed error location.
137-- * If it has a lower indentation level, pick it as the guessed error location even it closes the opening token.
138-- Example:
139-- local function f()
140--    <code>
141--
142--    if cond then                   <- `if` is the guessed opening token
143--       <code>
144--
145--    <code not starting with `end`> <- first token on this line is the guessed error location
146-- end
147-- Another one:
148-- local function g()
149--    <code>
150--
151--    if cond then  <- `if` is the guessed opening token
152--       <code>
153--
154-- end              <- `end` is the guessed error location
155
156local opening_token_to_closing = {
157   ["("] = ")",
158   ["["] = "]",
159   ["{"] = "}",
160   ["do"] = "end",
161   ["if"] = "end",
162   ["else"] = "end",
163   ["elseif"] = "end",
164   ["while"] = "end",
165   ["repeat"] = "until",
166   ["for"] = "end",
167   ["function"] = "end"
168}
169
170local function get_indentation(state, line)
171   local ws_start, ws_end = state.lexer.src:find("^[ \t\v\f]*", state.lexer.line_offsets[line])
172   return ws_end - ws_start
173end
174
175local UnpairedTokenGuesser = utils.class()
176
177function UnpairedTokenGuesser:__init(state, error_opening_range, error_closing_token)
178   self.old_state = state
179   self.error_offset = state.offset
180   self.error_opening_range = error_opening_range
181   self.error_closing_token = error_closing_token
182   self.opening_tokens_stack = utils.Stack()
183end
184
185function UnpairedTokenGuesser:guess()
186   -- Need to reinitialize lexer (e.g. to skip shebang again).
187   self.state = new_state(self.old_state.lexer.src)
188   self.state.unpaired_token_guesser = self
189   skip_token(self.state)
190   parse_block(self.state)
191   error("No syntax error in second parse", 0)
192end
193
194function UnpairedTokenGuesser:on_block_start(opening_token_range, opening_token)
195   local token_wrapper = copy_range(opening_token_range)
196   token_wrapper.token = opening_token
197   token_wrapper.closing_token = opening_token_to_closing[opening_token]
198   token_wrapper.eligible = token_wrapper.closing_token == self.error_closing_token
199   token_wrapper.indentation = get_indentation(self.state, opening_token_range.line)
200   self.opening_tokens_stack:push(token_wrapper)
201end
202
203function UnpairedTokenGuesser:set_guessed()
204   -- Keep the first detected location.
205   if self.guessed then
206      return
207   end
208
209   self.guessed = self.opening_tokens_stack.top
210   self.guessed.error_token = self.state.token
211   self.guessed.error_range = copy_range(self.state)
212end
213
214function UnpairedTokenGuesser:check_token()
215   local top = self.opening_tokens_stack.top
216
217   if top and top.eligible and self.state.line > top.line then
218      local token_indentation = get_indentation(self.state, self.state.line)
219
220      if token_indentation < top.indentation then
221         self:set_guessed()
222      elseif token_indentation == top.indentation then
223         local token = self.state.token
224
225         if token ~= top.closing_token and
226               ((top.token ~= "if" and top.token ~= "elseif") or (token ~= "elseif" and token ~= "else")) then
227            self:set_guessed()
228         end
229      end
230   end
231
232   if self.state.offset == self.error_offset then
233      if self.guessed and self.guessed.error_range.offset ~= self.state.offset then
234         self.state.line = self.guessed.error_range.line
235         self.state.offset = self.guessed.error_range.offset
236         self.state.end_offset = self.guessed.error_range.end_offset
237         self.state.token = self.guessed.error_token
238         missing_closing_token_error(self.state, self.guessed, self.guessed.token, self.guessed.closing_token, true)
239      end
240   end
241end
242
243function UnpairedTokenGuesser:on_block_end()
244   self:check_token()
245   self.opening_tokens_stack:pop()
246
247   if not self.opening_tokens_stack.top then
248      -- Inserting an end token into a balanced sequence of tokens adds an error earlier than original one.
249      self.guessed = nil
250   end
251end
252
253function UnpairedTokenGuesser:on_statement()
254   self:check_token()
255end
256
257function missing_closing_token_error(state, opening_range, opening_token, closing_token, is_guess)
258   local msg = "expected " .. token_name(closing_token)
259
260   if opening_range and opening_range.line ~= state.line then
261      msg = msg .. " (to close " .. token_name(opening_token) .. " on line " .. tostring(opening_range.line) .. ")"
262   end
263
264   local token_prefix
265   local message_suffix
266
267   if is_guess then
268      if state.token == closing_token then
269         -- "expected 'end' near 'end'" seems confusing.
270         token_prefix = "less indented"
271      end
272
273      message_suffix = "(indentation-based guess)"
274   end
275
276   parse_error(state, msg, opening_range, token_prefix, message_suffix)
277end
278
279local function check_closing_token(state, opening_range, opening_token)
280   local closing_token = opening_token_to_closing[opening_token] or "eof"
281
282   if state.token == closing_token then
283      return
284   end
285
286   if (opening_token == "if" or opening_token == "elseif") and (state.token == "else" or state.token == "elseif") then
287      return
288   end
289
290   if closing_token == "end" or closing_token == "until" then
291      if not state.unpaired_token_guesser then
292         UnpairedTokenGuesser(state, opening_range, closing_token):guess()
293      end
294   end
295
296   missing_closing_token_error(state, opening_range, opening_token, closing_token)
297end
298
299local function check_and_skip_closing_token(state, opening_range, opening_token)
300   check_closing_token(state, opening_range, opening_token)
301   skip_token(state)
302end
303
304local function check_name(state)
305   check_token(state, "name")
306   return state.token_value
307end
308
309local function new_outer_node(range, tag, node)
310   node = node or {}
311   node.line = range.line
312   node.offset = range.offset
313   node.end_offset = range.end_offset
314   node.tag = tag
315   return node
316end
317
318local function new_inner_node(start_range, end_range, tag, node)
319   node = node or {}
320   node.line = start_range.line
321   node.offset = start_range.offset
322   node.end_offset = end_range.end_offset
323   node.tag = tag
324   return node
325end
326
327local parse_expression
328
329local function parse_expression_list(state, list)
330   list = list or {}
331
332   repeat
333      list[#list + 1] = parse_expression(state)
334   until not test_and_skip_token(state, ",")
335
336   return list
337end
338
339local function parse_id(state, tag)
340   local ast_node = new_outer_node(state, tag or "Id")
341   ast_node[1] = check_name(state)
342   -- Skip name.
343   skip_token(state)
344   return ast_node
345end
346
347local function atom(tag)
348   return function(state)
349      local ast_node = new_outer_node(state, tag)
350      ast_node[1] = state.token_value
351      skip_token(state)
352      return ast_node
353   end
354end
355
356local simple_expressions = {}
357
358simple_expressions.number = atom("Number")
359simple_expressions.string = atom("String")
360simple_expressions["nil"] = atom("Nil")
361simple_expressions["true"] = atom("True")
362simple_expressions["false"] = atom("False")
363simple_expressions["..."] = atom("Dots")
364
365simple_expressions["{"] = function(state)
366   local start_range = copy_range(state)
367   local ast_node = {}
368   skip_token(state)
369
370   repeat
371      if state.token == "}" then
372         break
373      end
374
375      local key_node, value_node
376      local first_token_range = copy_range(state)
377
378      if state.token == "name" then
379         local name = state.token_value
380         skip_token(state)  -- Skip name.
381
382         if test_and_skip_token(state, "=") then
383            -- `name` = `expr`.
384            key_node = new_outer_node(first_token_range, "String", {name})
385            value_node = parse_expression(state)
386         else
387            -- `name` is beginning of an expression in array part.
388            -- Backtrack lexer to before name.
389            state.lexer.line = first_token_range.line
390            state.lexer.offset = first_token_range.offset
391            skip_token(state)  -- Load name again.
392            value_node = parse_expression(state)
393         end
394      elseif state.token == "[" then
395         -- [ `expr` ] = `expr`.
396         skip_token(state)
397         key_node = parse_expression(state)
398         check_and_skip_closing_token(state, first_token_range, "[")
399         check_and_skip_token(state, "=")
400         value_node = parse_expression(state)
401      else
402         -- Expression in array part.
403         value_node = parse_expression(state)
404      end
405
406      if key_node then
407         -- Pair.
408         ast_node[#ast_node + 1] = new_inner_node(first_token_range, value_node, "Pair", {key_node, value_node})
409      else
410         -- Array part item.
411         ast_node[#ast_node + 1] = value_node
412      end
413   until not (test_and_skip_token(state, ",") or test_and_skip_token(state, ";"))
414
415   new_inner_node(start_range, state, "Table", ast_node)
416   check_and_skip_closing_token(state, start_range, "{")
417   return ast_node
418end
419
420-- Parses argument list and the statements.
421local function parse_function(state, function_range)
422   local paren_range = copy_range(state)
423   check_and_skip_token(state, "(")
424   local args = {}
425
426   -- Are there arguments?
427   if state.token ~= ")" then
428      repeat
429         if state.token == "name" then
430            args[#args + 1] = parse_id(state)
431         elseif state.token == "..." then
432            args[#args + 1] = simple_expressions["..."](state)
433            break
434         else
435            parse_error(state, "expected argument")
436         end
437      until not test_and_skip_token(state, ",")
438   end
439
440   check_and_skip_closing_token(state, paren_range, "(")
441   local body = parse_block(state, function_range, "function")
442   local end_range = copy_range(state)
443   -- Skip "function".
444   skip_token(state)
445   return new_inner_node(function_range, end_range, "Function", {args, body, end_range = end_range})
446end
447
448simple_expressions["function"] = function(state)
449   local function_range = copy_range(state)
450   -- Skip "function".
451   skip_token(state)
452   return parse_function(state, function_range)
453end
454
455-- A call handler parses arguments of a call with given base node that determines resulting node start location,
456-- given tag, and array to which the arguments should be appended.
457local call_handlers = {}
458
459call_handlers["("] = function(state, base_node, tag, node)
460   local paren_range = copy_range(state)
461   -- Skip "(".
462   skip_token(state)
463
464   if state.token ~= ")" then
465      parse_expression_list(state, node)
466   end
467
468   new_inner_node(base_node, state, tag, node)
469   check_and_skip_closing_token(state, paren_range, "(")
470   return node
471end
472
473call_handlers["{"] = function(state, base_node, tag, node)
474   local arg_node = simple_expressions[state.token](state)
475   node[#node + 1] = arg_node
476   return new_inner_node(base_node, arg_node, tag, node)
477end
478
479call_handlers.string = call_handlers["{"]
480
481local suffix_handlers = {}
482
483suffix_handlers["."] = function(state, base_node)
484   -- Skip ".".
485   skip_token(state)
486   local index_node = parse_id(state, "String")
487   return new_inner_node(base_node, index_node, "Index", {base_node, index_node})
488end
489
490suffix_handlers["["] = function(state, base_node)
491   local bracket_range = copy_range(state)
492   -- Skip "[".
493   skip_token(state)
494   local index_node = parse_expression(state)
495   local ast_node = new_inner_node(base_node, state, "Index", {base_node, index_node})
496   check_and_skip_closing_token(state, bracket_range, "[")
497   return ast_node
498end
499
500suffix_handlers[":"] = function(state, base_node)
501   -- Skip ":".
502   skip_token(state)
503   local method_name = parse_id(state, "String")
504   local call_handler = call_handlers[state.token]
505
506   if not call_handler then
507      parse_error(state, "expected method arguments")
508   end
509
510   return call_handler(state, base_node, "Invoke", {base_node, method_name})
511end
512
513suffix_handlers["("] = function(state, base_node)
514   return call_handlers[state.token](state, base_node, "Call", {base_node})
515end
516
517suffix_handlers["{"] = suffix_handlers["("]
518suffix_handlers.string = suffix_handlers["("]
519
520local function parse_simple_expression(state, kind, no_literals)
521   local expression
522
523   if state.token == "(" then
524      local paren_range = copy_range(state)
525      skip_token(state)
526      local inner_expression = parse_expression(state)
527      expression = new_inner_node(paren_range, state, "Paren", {inner_expression})
528      check_and_skip_closing_token(state, paren_range, "(")
529   elseif state.token == "name" then
530      expression = parse_id(state)
531   else
532      local literal_handler = simple_expressions[state.token]
533
534      if not literal_handler or no_literals then
535         parse_error(state, "expected " .. (kind or "expression"))
536      end
537
538      return literal_handler(state)
539   end
540
541   while true do
542      local suffix_handler = suffix_handlers[state.token]
543
544      if suffix_handler then
545         expression = suffix_handler(state, expression)
546      else
547         return expression
548      end
549   end
550end
551
552local unary_operators = {
553   ["not"] = "not",
554   ["-"] = "unm",
555   ["~"] = "bnot",
556   ["#"] = "len"
557}
558
559local unary_priority = 12
560
561local binary_operators = {
562   ["+"] = "add", ["-"] = "sub",
563   ["*"] = "mul", ["%"] = "mod",
564   ["^"] = "pow",
565   ["/"] = "div", ["//"] = "idiv",
566   ["&"] = "band", ["|"] = "bor", ["~"] = "bxor",
567   ["<<"] = "shl", [">>"] = "shr",
568   [".."] = "concat",
569   ["~="] = "ne", ["=="] = "eq",
570   ["<"] = "lt", ["<="] = "le",
571   [">"] = "gt", [">="] = "ge",
572   ["and"] = "and", ["or"] = "or"
573}
574
575local left_priorities = {
576   add = 10, sub = 10,
577   mul = 11, mod = 11,
578   pow = 14,
579   div = 11, idiv = 11,
580   band = 6, bor = 4, bxor = 5,
581   shl = 7, shr = 7,
582   concat = 9,
583   ne = 3, eq = 3,
584   lt = 3, le = 3,
585   gt = 3, ge = 3,
586   ["and"] = 2, ["or"] = 1
587}
588
589local right_priorities = {
590   add = 10, sub = 10,
591   mul = 11, mod = 11,
592   pow = 13,
593   div = 11, idiv = 11,
594   band = 6, bor = 4, bxor = 5,
595   shl = 7, shr = 7,
596   concat = 8,
597   ne = 3, eq = 3,
598   lt = 3, le = 3,
599   gt = 3, ge = 3,
600   ["and"] = 2, ["or"] = 1
601}
602
603local function parse_subexpression(state, limit, kind)
604   local expression
605   local unary_operator = unary_operators[state.token]
606
607   if unary_operator then
608      local operator_range = copy_range(state)
609      -- Skip operator.
610      skip_token(state)
611      local operand = parse_subexpression(state, unary_priority)
612      expression = new_inner_node(operator_range, operand, "Op", {unary_operator, operand})
613   else
614      expression = parse_simple_expression(state, kind)
615   end
616
617   -- Expand while operators have priorities higher than `limit`.
618   while true do
619      local binary_operator = binary_operators[state.token]
620
621      if not binary_operator or left_priorities[binary_operator] <= limit then
622         break
623      end
624
625       -- Skip operator.
626      skip_token(state)
627      -- Read subexpression with higher priority.
628      local subexpression = parse_subexpression(state, right_priorities[binary_operator])
629      expression = new_inner_node(expression, subexpression, "Op", {binary_operator, expression, subexpression})
630   end
631
632   return expression
633end
634
635function parse_expression(state, kind)
636   return parse_subexpression(state, 0, kind)
637end
638
639local statements = {}
640
641statements["if"] = function(state)
642   local start_range = copy_range(state)
643   -- Skip "if".
644   skip_token(state)
645   local ast_node = {}
646
647   -- The loop is entered after skipping "if" or "elseif".
648   -- Block start token info is set to the last skipped "if", "elseif", or "else" token.
649   local block_start_token = "if"
650   local block_start_range = start_range
651
652   while true do
653      ast_node[#ast_node + 1] = parse_expression(state, "condition")
654      -- Add range of the "then" token to the block statement array.
655      local branch_range = copy_range(state)
656      check_and_skip_token(state, "then")
657      ast_node[#ast_node + 1] = parse_block(state, block_start_range, block_start_token, branch_range)
658
659      if state.token == "else" then
660         branch_range = copy_range(state)
661         block_start_token = "else"
662         block_start_range = branch_range
663         skip_token(state)
664         ast_node[#ast_node + 1] = parse_block(state, block_start_range, block_start_token, branch_range)
665         break
666      elseif state.token == "elseif" then
667         block_start_token = "elseif"
668         block_start_range = copy_range(state)
669         skip_token(state)
670      else
671         break
672      end
673   end
674
675   new_inner_node(start_range, state, "If", ast_node)
676   -- Skip "end".
677   skip_token(state)
678   return ast_node
679end
680
681statements["while"] = function(state)
682   local start_range = copy_range(state)
683   -- Skip "while".
684   skip_token(state)
685   local condition = parse_expression(state, "condition")
686   check_and_skip_token(state, "do")
687   local block = parse_block(state, start_range, "while")
688   local ast_node = new_inner_node(start_range, state, "While", {condition, block})
689   -- Skip "end".
690   skip_token(state)
691   return ast_node
692end
693
694statements["do"] = function(state)
695   local start_range = copy_range(state)
696   -- Skip "do".
697   skip_token(state)
698   local block = parse_block(state, start_range, "do")
699   local ast_node = new_inner_node(start_range, state, "Do", block)
700   -- Skip "end".
701   skip_token(state)
702   return ast_node
703end
704
705statements["for"] = function(state)
706   local start_range = copy_range(state)
707   -- Skip "for".
708   skip_token(state)
709
710   local ast_node = {}
711   local tag
712   local first_var = parse_id(state)
713
714   if state.token == "=" then
715      -- Numeric "for" loop.
716      tag = "Fornum"
717      -- Skip "=".
718      skip_token(state)
719      ast_node[1] = first_var
720      ast_node[2] = parse_expression(state)
721      check_and_skip_token(state, ",")
722      ast_node[3] = parse_expression(state)
723
724      if test_and_skip_token(state, ",") then
725         ast_node[4] = parse_expression(state)
726      end
727
728      check_and_skip_token(state, "do")
729      ast_node[#ast_node + 1] = parse_block(state, start_range, "for")
730   elseif state.token == "," or state.token == "in" then
731      -- Generic "for" loop.
732      tag = "Forin"
733
734      local iter_vars = {first_var}
735      while test_and_skip_token(state, ",") do
736         iter_vars[#iter_vars + 1] = parse_id(state)
737      end
738
739      ast_node[1] = iter_vars
740      check_and_skip_token(state, "in")
741      ast_node[2] = parse_expression_list(state)
742      check_and_skip_token(state, "do")
743      ast_node[3] = parse_block(state, start_range, "for")
744   else
745      parse_error(state, "expected '=', ',' or 'in'")
746   end
747
748   new_inner_node(start_range, state, tag, ast_node)
749   -- Skip "end".
750   skip_token(state)
751   return ast_node
752end
753
754statements["repeat"] = function(state)
755   local start_range = copy_range(state)
756   -- Skip "repeat".
757   skip_token(state)
758   local block = parse_block(state, start_range, "repeat")
759   -- Skip "until".
760   skip_token(state)
761   local condition = parse_expression(state, "condition")
762   return new_inner_node(start_range, condition, "Repeat", {block, condition})
763end
764
765statements["function"] = function(state)
766   local start_range = copy_range(state)
767   -- Skip "function".
768   skip_token(state)
769   local lhs = parse_id(state)
770   local implicit_self_range
771
772   while (not implicit_self_range) and (state.token == "." or state.token == ":") do
773      implicit_self_range = (state.token == ":") and copy_range(state)
774      -- Skip "." or ":".
775      skip_token(state)
776      local index_node = parse_id(state, "String")
777      lhs = new_inner_node(lhs, index_node, "Index", {lhs, index_node})
778   end
779
780   local function_node = parse_function(state, start_range)
781
782   if implicit_self_range then
783      -- Insert implicit "self" argument.
784      local self_arg = new_outer_node(implicit_self_range, "Id", {"self", implicit = true})
785      table.insert(function_node[1], 1, self_arg)
786   end
787
788   return new_inner_node(start_range, function_node, "Set", {{lhs}, {function_node}})
789end
790
791statements["local"] = function(state)
792   local start_range = copy_range(state)
793   -- Skip "local".
794   skip_token(state)
795
796   if state.token == "function" then
797      -- Local function.
798      local function_range = copy_range(state)
799      -- Skip "function".
800      skip_token(state)
801      local var = parse_id(state)
802      local function_node = parse_function(state, function_range)
803      return new_inner_node(start_range, function_node, "Localrec", {{var}, {function_node}})
804   end
805
806   -- Local definition, potentially with assignment.
807   local lhs = {}
808   local rhs
809
810   repeat
811      lhs[#lhs + 1] = parse_id(state)
812
813      -- Check if a Lua 5.4 attribute is present
814      if state.token == "<" then
815         -- For now, just consume and ignore it.
816         skip_token(state)
817         check_name(state)
818         skip_token(state)
819         check_and_skip_token(state, ">")
820      end
821   until not test_and_skip_token(state, ",")
822
823   if test_and_skip_token(state, "=") then
824      rhs = parse_expression_list(state)
825   end
826
827   return new_inner_node(start_range, rhs and rhs[#rhs] or lhs[#lhs], "Local", {lhs, rhs})
828end
829
830statements["::"] = function(state)
831   local start_range = copy_range(state)
832   -- Skip "::".
833   skip_token(state)
834   local name = check_name(state)
835   -- Skip label name.
836   skip_token(state)
837   local ast_node = new_inner_node(start_range, state, "Label", {name})
838   check_and_skip_token(state, "::")
839   return ast_node
840end
841
842local closing_tokens = utils.array_to_set({"end", "eof", "else", "elseif", "until"})
843
844statements["return"] = function(state)
845   local start_range = copy_range(state)
846   -- Skip "return".
847   skip_token(state)
848
849   if closing_tokens[state.token] or state.token == ";" then
850      -- No return values.
851      return new_outer_node(start_range, "Return")
852   else
853      local returns = parse_expression_list(state)
854      return new_inner_node(start_range, returns[#returns], "Return", returns)
855   end
856end
857
858statements["break"] = function(state)
859   local ast_node = new_outer_node(state, "Break")
860   -- Skip "break".
861   skip_token(state)
862   return ast_node
863end
864
865statements["goto"] = function(state)
866   local start_range = copy_range(state)
867   -- Skip "goto".
868   skip_token(state)
869   local name = check_name(state)
870   local ast_node = new_outer_node(start_range, "Goto", {name})
871   -- Skip label name.
872   skip_token(state)
873   return ast_node
874end
875
876local function parse_expression_statement(state)
877   local lhs
878   local start_range = copy_range(state)
879
880   -- Handle lhs of an assignment or a single expression.
881   repeat
882      local item_start_range = lhs and copy_range(state) or start_range
883      local expected = lhs and "identifier or field" or "statement"
884      local primary_expression = parse_simple_expression(state, expected, true)
885
886      if primary_expression.tag == "Paren" then
887         -- (expr) in lhs is invalid.
888         parser.syntax_error("expected " .. expected .. " near '('", item_start_range)
889      end
890
891      if primary_expression.tag == "Call" or primary_expression.tag == "Invoke" then
892         if lhs then
893            -- The is an assingment, and a call is not valid in lhs.
894            parse_error(state, "expected call or indexing")
895         else
896            -- This is a call.
897            return primary_expression
898         end
899      end
900
901      -- This is an assignment.
902      lhs = lhs or {}
903      lhs[#lhs + 1] = primary_expression
904   until not test_and_skip_token(state, ",")
905
906   check_and_skip_token(state, "=")
907   local rhs = parse_expression_list(state)
908   return new_inner_node(start_range, rhs[#rhs], "Set", {lhs, rhs})
909end
910
911local function parse_statement(state)
912   return (statements[state.token] or parse_expression_statement)(state)
913end
914
915function parse_block(state, opening_token_range, opening_token, block)
916   local unpaired_token_guesser = state.unpaired_token_guesser
917
918   if unpaired_token_guesser and opening_token then
919      unpaired_token_guesser:on_block_start(opening_token_range, opening_token)
920   end
921
922   block = block or {}
923   local after_statement = false
924
925   while not closing_tokens[state.token] do
926      local first_token = state.token
927
928      if first_token == ";" then
929         if not after_statement then
930            table.insert(state.hanging_semicolons, copy_range(state))
931         end
932
933         -- Skip ";".
934         skip_token(state)
935         -- Further semicolons are considered hanging.
936         after_statement = false
937      else
938         if unpaired_token_guesser then
939            unpaired_token_guesser:on_statement()
940         end
941
942         local statement = parse_statement(state)
943         after_statement = true
944         block[#block + 1] = statement
945
946         if statement.tag == "Return" then
947            -- "return" must be the last statement.
948            -- However, one ";" after it is allowed.
949            test_and_skip_token(state, ";")
950            break
951         end
952      end
953   end
954
955   if unpaired_token_guesser and opening_token then
956      unpaired_token_guesser:on_block_end()
957   end
958
959   check_closing_token(state, opening_token_range, opening_token)
960
961   return block
962end
963
964function new_state(src, line_offsets, line_lengths)
965   return {
966      lexer = lexer.new_state(src, line_offsets, line_lengths),
967      -- Set of line numbers containing code.
968      code_lines = {},
969      -- Maps line numbers to "comment", "string", or nil based on whether the line ending is within a token
970      line_endings = {},
971      -- Array of {contents = string} with range info.
972      comments = {},
973       -- Array of ranges of semicolons not following a statement.
974      hanging_semicolons = {}
975   }
976end
977
978-- Parses source characters.
979-- Returns AST (in almost MetaLua format), array of comments - tables {contents = string} with range info,
980-- set of line numbers containing code, map of types of tokens wrapping line endings (nil, "string", or "comment"),
981-- array of ranges of hanging semicolons (not after statements), array of line start offsets, array of line lengths.
982-- The last two tables can be passed as arguments to be filled.
983-- On error throws an instance of parser.SyntaxError: table {msg = msg, prev_range = prev_range?} with range info,
984-- prev_range may refer to some extra relevant location.
985function parser.parse(src, line_offsets, line_lengths)
986   local state = new_state(src, line_offsets, line_lengths)
987   skip_token(state)
988   local ast = parse_block(state)
989   return ast, state.comments, state.code_lines, state.line_endings, state.hanging_semicolons,
990      state.lexer.line_offsets, state.lexer.line_lengths
991end
992
993return parser
994