1local lexer = require "luacheck.lexer" 2local utils = require "luacheck.utils" 3 4local parser = {} 5 6-- A table with range info, or simply range, has `line`, `offset`, and `end_offset` fields. 7-- `line` is the line of the first character. 8-- Parser state table has range info for the current token, and all AST 9-- node tables have range info for themself, including parens around expressions 10-- that are otherwise not reflected in the AST structure. 11 12parser.SyntaxError = utils.class() 13 14function parser.SyntaxError:__init(msg, range, prev_range) 15 self.msg = msg 16 self.line = range.line 17 self.offset = range.offset 18 self.end_offset = range.end_offset 19 20 if prev_range then 21 self.prev_line = prev_range.line 22 self.prev_offset = prev_range.offset 23 self.prev_end_offset = prev_range.end_offset 24 end 25end 26 27function parser.syntax_error(msg, range, prev_range) 28 error(parser.SyntaxError(msg, range, prev_range), 0) 29end 30 31local function mark_line_endings(state, token_type) 32 for line = state.line, state.lexer.line - 1 do 33 state.line_endings[line] = token_type 34 end 35end 36 37local function skip_token(state) 38 while true do 39 local token, token_value, line, offset, error_end_offset = lexer.next_token(state.lexer) 40 state.token = token 41 state.token_value = token_value 42 state.line = line 43 state.offset = offset 44 state.end_offset = error_end_offset or (state.lexer.offset - 1) 45 46 if not token then 47 parser.syntax_error(token_value, state) 48 end 49 50 if token == "short_comment" then 51 state.comments[#state.comments + 1] = { 52 contents = token_value, 53 line = line, 54 offset = offset, 55 end_offset = state.end_offset 56 } 57 58 state.line_endings[line] = "comment" 59 elseif token == "long_comment" then 60 mark_line_endings(state, "comment") 61 else 62 if token ~= "eof" then 63 mark_line_endings(state, "string") 64 state.code_lines[line] = true 65 state.code_lines[state.lexer.line] = true 66 end 67 68 return 69 end 70 end 71end 72 73local function token_name(token) 74 return token == "name" and "identifier" or (token == "eof" and "<eof>" or ("'" .. token .. "'")) 75end 76 77local function parse_error(state, msg, prev_range, token_prefix, message_suffix) 78 local token_repr 79 80 if state.token == "eof" then 81 token_repr = "<eof>" 82 else 83 token_repr = lexer.get_quoted_substring_or_line(state.lexer, state.line, state.offset, state.end_offset) 84 end 85 86 if token_prefix then 87 token_repr = token_prefix .. " " .. token_repr 88 end 89 90 msg = msg .. " near " .. token_repr 91 92 if message_suffix then 93 msg = msg .. " " .. message_suffix 94 end 95 96 parser.syntax_error(msg, state, prev_range) 97end 98 99local function check_token(state, token) 100 if state.token ~= token then 101 parse_error(state, "expected " .. token_name(token)) 102 end 103end 104 105local function check_and_skip_token(state, token) 106 check_token(state, token) 107 skip_token(state) 108end 109 110local function test_and_skip_token(state, token) 111 if state.token == token then 112 skip_token(state) 113 return true 114 end 115end 116 117local function copy_range(range) 118 return { 119 line = range.line, 120 offset = range.offset, 121 end_offset = range.end_offset 122 } 123end 124 125local new_state 126local parse_block 127local missing_closing_token_error 128 129-- Attempt to guess a better location for missing `end` and `until` errors (usually they uselessly point to eof). 130-- Guessed error token should be selected in such a way that inserting previously missing closing token 131-- in front of it should fix the error or at least move its opening token forward. 132-- The idea is to track the stack of opening tokens and their indentations. 133-- For the first statement or closing token with the same or smaller indentation than the opening token 134-- on the top of the stack: 135-- * If it has the same indentation but is not the appropriate closing token for the opening one, pick it 136-- as the guessed error location. 137-- * If it has a lower indentation level, pick it as the guessed error location even it closes the opening token. 138-- Example: 139-- local function f() 140-- <code> 141-- 142-- if cond then <- `if` is the guessed opening token 143-- <code> 144-- 145-- <code not starting with `end`> <- first token on this line is the guessed error location 146-- end 147-- Another one: 148-- local function g() 149-- <code> 150-- 151-- if cond then <- `if` is the guessed opening token 152-- <code> 153-- 154-- end <- `end` is the guessed error location 155 156local opening_token_to_closing = { 157 ["("] = ")", 158 ["["] = "]", 159 ["{"] = "}", 160 ["do"] = "end", 161 ["if"] = "end", 162 ["else"] = "end", 163 ["elseif"] = "end", 164 ["while"] = "end", 165 ["repeat"] = "until", 166 ["for"] = "end", 167 ["function"] = "end" 168} 169 170local function get_indentation(state, line) 171 local ws_start, ws_end = state.lexer.src:find("^[ \t\v\f]*", state.lexer.line_offsets[line]) 172 return ws_end - ws_start 173end 174 175local UnpairedTokenGuesser = utils.class() 176 177function UnpairedTokenGuesser:__init(state, error_opening_range, error_closing_token) 178 self.old_state = state 179 self.error_offset = state.offset 180 self.error_opening_range = error_opening_range 181 self.error_closing_token = error_closing_token 182 self.opening_tokens_stack = utils.Stack() 183end 184 185function UnpairedTokenGuesser:guess() 186 -- Need to reinitialize lexer (e.g. to skip shebang again). 187 self.state = new_state(self.old_state.lexer.src) 188 self.state.unpaired_token_guesser = self 189 skip_token(self.state) 190 parse_block(self.state) 191 error("No syntax error in second parse", 0) 192end 193 194function UnpairedTokenGuesser:on_block_start(opening_token_range, opening_token) 195 local token_wrapper = copy_range(opening_token_range) 196 token_wrapper.token = opening_token 197 token_wrapper.closing_token = opening_token_to_closing[opening_token] 198 token_wrapper.eligible = token_wrapper.closing_token == self.error_closing_token 199 token_wrapper.indentation = get_indentation(self.state, opening_token_range.line) 200 self.opening_tokens_stack:push(token_wrapper) 201end 202 203function UnpairedTokenGuesser:set_guessed() 204 -- Keep the first detected location. 205 if self.guessed then 206 return 207 end 208 209 self.guessed = self.opening_tokens_stack.top 210 self.guessed.error_token = self.state.token 211 self.guessed.error_range = copy_range(self.state) 212end 213 214function UnpairedTokenGuesser:check_token() 215 local top = self.opening_tokens_stack.top 216 217 if top and top.eligible and self.state.line > top.line then 218 local token_indentation = get_indentation(self.state, self.state.line) 219 220 if token_indentation < top.indentation then 221 self:set_guessed() 222 elseif token_indentation == top.indentation then 223 local token = self.state.token 224 225 if token ~= top.closing_token and 226 ((top.token ~= "if" and top.token ~= "elseif") or (token ~= "elseif" and token ~= "else")) then 227 self:set_guessed() 228 end 229 end 230 end 231 232 if self.state.offset == self.error_offset then 233 if self.guessed and self.guessed.error_range.offset ~= self.state.offset then 234 self.state.line = self.guessed.error_range.line 235 self.state.offset = self.guessed.error_range.offset 236 self.state.end_offset = self.guessed.error_range.end_offset 237 self.state.token = self.guessed.error_token 238 missing_closing_token_error(self.state, self.guessed, self.guessed.token, self.guessed.closing_token, true) 239 end 240 end 241end 242 243function UnpairedTokenGuesser:on_block_end() 244 self:check_token() 245 self.opening_tokens_stack:pop() 246 247 if not self.opening_tokens_stack.top then 248 -- Inserting an end token into a balanced sequence of tokens adds an error earlier than original one. 249 self.guessed = nil 250 end 251end 252 253function UnpairedTokenGuesser:on_statement() 254 self:check_token() 255end 256 257function missing_closing_token_error(state, opening_range, opening_token, closing_token, is_guess) 258 local msg = "expected " .. token_name(closing_token) 259 260 if opening_range and opening_range.line ~= state.line then 261 msg = msg .. " (to close " .. token_name(opening_token) .. " on line " .. tostring(opening_range.line) .. ")" 262 end 263 264 local token_prefix 265 local message_suffix 266 267 if is_guess then 268 if state.token == closing_token then 269 -- "expected 'end' near 'end'" seems confusing. 270 token_prefix = "less indented" 271 end 272 273 message_suffix = "(indentation-based guess)" 274 end 275 276 parse_error(state, msg, opening_range, token_prefix, message_suffix) 277end 278 279local function check_closing_token(state, opening_range, opening_token) 280 local closing_token = opening_token_to_closing[opening_token] or "eof" 281 282 if state.token == closing_token then 283 return 284 end 285 286 if (opening_token == "if" or opening_token == "elseif") and (state.token == "else" or state.token == "elseif") then 287 return 288 end 289 290 if closing_token == "end" or closing_token == "until" then 291 if not state.unpaired_token_guesser then 292 UnpairedTokenGuesser(state, opening_range, closing_token):guess() 293 end 294 end 295 296 missing_closing_token_error(state, opening_range, opening_token, closing_token) 297end 298 299local function check_and_skip_closing_token(state, opening_range, opening_token) 300 check_closing_token(state, opening_range, opening_token) 301 skip_token(state) 302end 303 304local function check_name(state) 305 check_token(state, "name") 306 return state.token_value 307end 308 309local function new_outer_node(range, tag, node) 310 node = node or {} 311 node.line = range.line 312 node.offset = range.offset 313 node.end_offset = range.end_offset 314 node.tag = tag 315 return node 316end 317 318local function new_inner_node(start_range, end_range, tag, node) 319 node = node or {} 320 node.line = start_range.line 321 node.offset = start_range.offset 322 node.end_offset = end_range.end_offset 323 node.tag = tag 324 return node 325end 326 327local parse_expression 328 329local function parse_expression_list(state, list) 330 list = list or {} 331 332 repeat 333 list[#list + 1] = parse_expression(state) 334 until not test_and_skip_token(state, ",") 335 336 return list 337end 338 339local function parse_id(state, tag) 340 local ast_node = new_outer_node(state, tag or "Id") 341 ast_node[1] = check_name(state) 342 -- Skip name. 343 skip_token(state) 344 return ast_node 345end 346 347local function atom(tag) 348 return function(state) 349 local ast_node = new_outer_node(state, tag) 350 ast_node[1] = state.token_value 351 skip_token(state) 352 return ast_node 353 end 354end 355 356local simple_expressions = {} 357 358simple_expressions.number = atom("Number") 359simple_expressions.string = atom("String") 360simple_expressions["nil"] = atom("Nil") 361simple_expressions["true"] = atom("True") 362simple_expressions["false"] = atom("False") 363simple_expressions["..."] = atom("Dots") 364 365simple_expressions["{"] = function(state) 366 local start_range = copy_range(state) 367 local ast_node = {} 368 skip_token(state) 369 370 repeat 371 if state.token == "}" then 372 break 373 end 374 375 local key_node, value_node 376 local first_token_range = copy_range(state) 377 378 if state.token == "name" then 379 local name = state.token_value 380 skip_token(state) -- Skip name. 381 382 if test_and_skip_token(state, "=") then 383 -- `name` = `expr`. 384 key_node = new_outer_node(first_token_range, "String", {name}) 385 value_node = parse_expression(state) 386 else 387 -- `name` is beginning of an expression in array part. 388 -- Backtrack lexer to before name. 389 state.lexer.line = first_token_range.line 390 state.lexer.offset = first_token_range.offset 391 skip_token(state) -- Load name again. 392 value_node = parse_expression(state) 393 end 394 elseif state.token == "[" then 395 -- [ `expr` ] = `expr`. 396 skip_token(state) 397 key_node = parse_expression(state) 398 check_and_skip_closing_token(state, first_token_range, "[") 399 check_and_skip_token(state, "=") 400 value_node = parse_expression(state) 401 else 402 -- Expression in array part. 403 value_node = parse_expression(state) 404 end 405 406 if key_node then 407 -- Pair. 408 ast_node[#ast_node + 1] = new_inner_node(first_token_range, value_node, "Pair", {key_node, value_node}) 409 else 410 -- Array part item. 411 ast_node[#ast_node + 1] = value_node 412 end 413 until not (test_and_skip_token(state, ",") or test_and_skip_token(state, ";")) 414 415 new_inner_node(start_range, state, "Table", ast_node) 416 check_and_skip_closing_token(state, start_range, "{") 417 return ast_node 418end 419 420-- Parses argument list and the statements. 421local function parse_function(state, function_range) 422 local paren_range = copy_range(state) 423 check_and_skip_token(state, "(") 424 local args = {} 425 426 -- Are there arguments? 427 if state.token ~= ")" then 428 repeat 429 if state.token == "name" then 430 args[#args + 1] = parse_id(state) 431 elseif state.token == "..." then 432 args[#args + 1] = simple_expressions["..."](state) 433 break 434 else 435 parse_error(state, "expected argument") 436 end 437 until not test_and_skip_token(state, ",") 438 end 439 440 check_and_skip_closing_token(state, paren_range, "(") 441 local body = parse_block(state, function_range, "function") 442 local end_range = copy_range(state) 443 -- Skip "function". 444 skip_token(state) 445 return new_inner_node(function_range, end_range, "Function", {args, body, end_range = end_range}) 446end 447 448simple_expressions["function"] = function(state) 449 local function_range = copy_range(state) 450 -- Skip "function". 451 skip_token(state) 452 return parse_function(state, function_range) 453end 454 455-- A call handler parses arguments of a call with given base node that determines resulting node start location, 456-- given tag, and array to which the arguments should be appended. 457local call_handlers = {} 458 459call_handlers["("] = function(state, base_node, tag, node) 460 local paren_range = copy_range(state) 461 -- Skip "(". 462 skip_token(state) 463 464 if state.token ~= ")" then 465 parse_expression_list(state, node) 466 end 467 468 new_inner_node(base_node, state, tag, node) 469 check_and_skip_closing_token(state, paren_range, "(") 470 return node 471end 472 473call_handlers["{"] = function(state, base_node, tag, node) 474 local arg_node = simple_expressions[state.token](state) 475 node[#node + 1] = arg_node 476 return new_inner_node(base_node, arg_node, tag, node) 477end 478 479call_handlers.string = call_handlers["{"] 480 481local suffix_handlers = {} 482 483suffix_handlers["."] = function(state, base_node) 484 -- Skip ".". 485 skip_token(state) 486 local index_node = parse_id(state, "String") 487 return new_inner_node(base_node, index_node, "Index", {base_node, index_node}) 488end 489 490suffix_handlers["["] = function(state, base_node) 491 local bracket_range = copy_range(state) 492 -- Skip "[". 493 skip_token(state) 494 local index_node = parse_expression(state) 495 local ast_node = new_inner_node(base_node, state, "Index", {base_node, index_node}) 496 check_and_skip_closing_token(state, bracket_range, "[") 497 return ast_node 498end 499 500suffix_handlers[":"] = function(state, base_node) 501 -- Skip ":". 502 skip_token(state) 503 local method_name = parse_id(state, "String") 504 local call_handler = call_handlers[state.token] 505 506 if not call_handler then 507 parse_error(state, "expected method arguments") 508 end 509 510 return call_handler(state, base_node, "Invoke", {base_node, method_name}) 511end 512 513suffix_handlers["("] = function(state, base_node) 514 return call_handlers[state.token](state, base_node, "Call", {base_node}) 515end 516 517suffix_handlers["{"] = suffix_handlers["("] 518suffix_handlers.string = suffix_handlers["("] 519 520local function parse_simple_expression(state, kind, no_literals) 521 local expression 522 523 if state.token == "(" then 524 local paren_range = copy_range(state) 525 skip_token(state) 526 local inner_expression = parse_expression(state) 527 expression = new_inner_node(paren_range, state, "Paren", {inner_expression}) 528 check_and_skip_closing_token(state, paren_range, "(") 529 elseif state.token == "name" then 530 expression = parse_id(state) 531 else 532 local literal_handler = simple_expressions[state.token] 533 534 if not literal_handler or no_literals then 535 parse_error(state, "expected " .. (kind or "expression")) 536 end 537 538 return literal_handler(state) 539 end 540 541 while true do 542 local suffix_handler = suffix_handlers[state.token] 543 544 if suffix_handler then 545 expression = suffix_handler(state, expression) 546 else 547 return expression 548 end 549 end 550end 551 552local unary_operators = { 553 ["not"] = "not", 554 ["-"] = "unm", 555 ["~"] = "bnot", 556 ["#"] = "len" 557} 558 559local unary_priority = 12 560 561local binary_operators = { 562 ["+"] = "add", ["-"] = "sub", 563 ["*"] = "mul", ["%"] = "mod", 564 ["^"] = "pow", 565 ["/"] = "div", ["//"] = "idiv", 566 ["&"] = "band", ["|"] = "bor", ["~"] = "bxor", 567 ["<<"] = "shl", [">>"] = "shr", 568 [".."] = "concat", 569 ["~="] = "ne", ["=="] = "eq", 570 ["<"] = "lt", ["<="] = "le", 571 [">"] = "gt", [">="] = "ge", 572 ["and"] = "and", ["or"] = "or" 573} 574 575local left_priorities = { 576 add = 10, sub = 10, 577 mul = 11, mod = 11, 578 pow = 14, 579 div = 11, idiv = 11, 580 band = 6, bor = 4, bxor = 5, 581 shl = 7, shr = 7, 582 concat = 9, 583 ne = 3, eq = 3, 584 lt = 3, le = 3, 585 gt = 3, ge = 3, 586 ["and"] = 2, ["or"] = 1 587} 588 589local right_priorities = { 590 add = 10, sub = 10, 591 mul = 11, mod = 11, 592 pow = 13, 593 div = 11, idiv = 11, 594 band = 6, bor = 4, bxor = 5, 595 shl = 7, shr = 7, 596 concat = 8, 597 ne = 3, eq = 3, 598 lt = 3, le = 3, 599 gt = 3, ge = 3, 600 ["and"] = 2, ["or"] = 1 601} 602 603local function parse_subexpression(state, limit, kind) 604 local expression 605 local unary_operator = unary_operators[state.token] 606 607 if unary_operator then 608 local operator_range = copy_range(state) 609 -- Skip operator. 610 skip_token(state) 611 local operand = parse_subexpression(state, unary_priority) 612 expression = new_inner_node(operator_range, operand, "Op", {unary_operator, operand}) 613 else 614 expression = parse_simple_expression(state, kind) 615 end 616 617 -- Expand while operators have priorities higher than `limit`. 618 while true do 619 local binary_operator = binary_operators[state.token] 620 621 if not binary_operator or left_priorities[binary_operator] <= limit then 622 break 623 end 624 625 -- Skip operator. 626 skip_token(state) 627 -- Read subexpression with higher priority. 628 local subexpression = parse_subexpression(state, right_priorities[binary_operator]) 629 expression = new_inner_node(expression, subexpression, "Op", {binary_operator, expression, subexpression}) 630 end 631 632 return expression 633end 634 635function parse_expression(state, kind) 636 return parse_subexpression(state, 0, kind) 637end 638 639local statements = {} 640 641statements["if"] = function(state) 642 local start_range = copy_range(state) 643 -- Skip "if". 644 skip_token(state) 645 local ast_node = {} 646 647 -- The loop is entered after skipping "if" or "elseif". 648 -- Block start token info is set to the last skipped "if", "elseif", or "else" token. 649 local block_start_token = "if" 650 local block_start_range = start_range 651 652 while true do 653 ast_node[#ast_node + 1] = parse_expression(state, "condition") 654 -- Add range of the "then" token to the block statement array. 655 local branch_range = copy_range(state) 656 check_and_skip_token(state, "then") 657 ast_node[#ast_node + 1] = parse_block(state, block_start_range, block_start_token, branch_range) 658 659 if state.token == "else" then 660 branch_range = copy_range(state) 661 block_start_token = "else" 662 block_start_range = branch_range 663 skip_token(state) 664 ast_node[#ast_node + 1] = parse_block(state, block_start_range, block_start_token, branch_range) 665 break 666 elseif state.token == "elseif" then 667 block_start_token = "elseif" 668 block_start_range = copy_range(state) 669 skip_token(state) 670 else 671 break 672 end 673 end 674 675 new_inner_node(start_range, state, "If", ast_node) 676 -- Skip "end". 677 skip_token(state) 678 return ast_node 679end 680 681statements["while"] = function(state) 682 local start_range = copy_range(state) 683 -- Skip "while". 684 skip_token(state) 685 local condition = parse_expression(state, "condition") 686 check_and_skip_token(state, "do") 687 local block = parse_block(state, start_range, "while") 688 local ast_node = new_inner_node(start_range, state, "While", {condition, block}) 689 -- Skip "end". 690 skip_token(state) 691 return ast_node 692end 693 694statements["do"] = function(state) 695 local start_range = copy_range(state) 696 -- Skip "do". 697 skip_token(state) 698 local block = parse_block(state, start_range, "do") 699 local ast_node = new_inner_node(start_range, state, "Do", block) 700 -- Skip "end". 701 skip_token(state) 702 return ast_node 703end 704 705statements["for"] = function(state) 706 local start_range = copy_range(state) 707 -- Skip "for". 708 skip_token(state) 709 710 local ast_node = {} 711 local tag 712 local first_var = parse_id(state) 713 714 if state.token == "=" then 715 -- Numeric "for" loop. 716 tag = "Fornum" 717 -- Skip "=". 718 skip_token(state) 719 ast_node[1] = first_var 720 ast_node[2] = parse_expression(state) 721 check_and_skip_token(state, ",") 722 ast_node[3] = parse_expression(state) 723 724 if test_and_skip_token(state, ",") then 725 ast_node[4] = parse_expression(state) 726 end 727 728 check_and_skip_token(state, "do") 729 ast_node[#ast_node + 1] = parse_block(state, start_range, "for") 730 elseif state.token == "," or state.token == "in" then 731 -- Generic "for" loop. 732 tag = "Forin" 733 734 local iter_vars = {first_var} 735 while test_and_skip_token(state, ",") do 736 iter_vars[#iter_vars + 1] = parse_id(state) 737 end 738 739 ast_node[1] = iter_vars 740 check_and_skip_token(state, "in") 741 ast_node[2] = parse_expression_list(state) 742 check_and_skip_token(state, "do") 743 ast_node[3] = parse_block(state, start_range, "for") 744 else 745 parse_error(state, "expected '=', ',' or 'in'") 746 end 747 748 new_inner_node(start_range, state, tag, ast_node) 749 -- Skip "end". 750 skip_token(state) 751 return ast_node 752end 753 754statements["repeat"] = function(state) 755 local start_range = copy_range(state) 756 -- Skip "repeat". 757 skip_token(state) 758 local block = parse_block(state, start_range, "repeat") 759 -- Skip "until". 760 skip_token(state) 761 local condition = parse_expression(state, "condition") 762 return new_inner_node(start_range, condition, "Repeat", {block, condition}) 763end 764 765statements["function"] = function(state) 766 local start_range = copy_range(state) 767 -- Skip "function". 768 skip_token(state) 769 local lhs = parse_id(state) 770 local implicit_self_range 771 772 while (not implicit_self_range) and (state.token == "." or state.token == ":") do 773 implicit_self_range = (state.token == ":") and copy_range(state) 774 -- Skip "." or ":". 775 skip_token(state) 776 local index_node = parse_id(state, "String") 777 lhs = new_inner_node(lhs, index_node, "Index", {lhs, index_node}) 778 end 779 780 local function_node = parse_function(state, start_range) 781 782 if implicit_self_range then 783 -- Insert implicit "self" argument. 784 local self_arg = new_outer_node(implicit_self_range, "Id", {"self", implicit = true}) 785 table.insert(function_node[1], 1, self_arg) 786 end 787 788 return new_inner_node(start_range, function_node, "Set", {{lhs}, {function_node}}) 789end 790 791statements["local"] = function(state) 792 local start_range = copy_range(state) 793 -- Skip "local". 794 skip_token(state) 795 796 if state.token == "function" then 797 -- Local function. 798 local function_range = copy_range(state) 799 -- Skip "function". 800 skip_token(state) 801 local var = parse_id(state) 802 local function_node = parse_function(state, function_range) 803 return new_inner_node(start_range, function_node, "Localrec", {{var}, {function_node}}) 804 end 805 806 -- Local definition, potentially with assignment. 807 local lhs = {} 808 local rhs 809 810 repeat 811 lhs[#lhs + 1] = parse_id(state) 812 813 -- Check if a Lua 5.4 attribute is present 814 if state.token == "<" then 815 -- For now, just consume and ignore it. 816 skip_token(state) 817 check_name(state) 818 skip_token(state) 819 check_and_skip_token(state, ">") 820 end 821 until not test_and_skip_token(state, ",") 822 823 if test_and_skip_token(state, "=") then 824 rhs = parse_expression_list(state) 825 end 826 827 return new_inner_node(start_range, rhs and rhs[#rhs] or lhs[#lhs], "Local", {lhs, rhs}) 828end 829 830statements["::"] = function(state) 831 local start_range = copy_range(state) 832 -- Skip "::". 833 skip_token(state) 834 local name = check_name(state) 835 -- Skip label name. 836 skip_token(state) 837 local ast_node = new_inner_node(start_range, state, "Label", {name}) 838 check_and_skip_token(state, "::") 839 return ast_node 840end 841 842local closing_tokens = utils.array_to_set({"end", "eof", "else", "elseif", "until"}) 843 844statements["return"] = function(state) 845 local start_range = copy_range(state) 846 -- Skip "return". 847 skip_token(state) 848 849 if closing_tokens[state.token] or state.token == ";" then 850 -- No return values. 851 return new_outer_node(start_range, "Return") 852 else 853 local returns = parse_expression_list(state) 854 return new_inner_node(start_range, returns[#returns], "Return", returns) 855 end 856end 857 858statements["break"] = function(state) 859 local ast_node = new_outer_node(state, "Break") 860 -- Skip "break". 861 skip_token(state) 862 return ast_node 863end 864 865statements["goto"] = function(state) 866 local start_range = copy_range(state) 867 -- Skip "goto". 868 skip_token(state) 869 local name = check_name(state) 870 local ast_node = new_outer_node(start_range, "Goto", {name}) 871 -- Skip label name. 872 skip_token(state) 873 return ast_node 874end 875 876local function parse_expression_statement(state) 877 local lhs 878 local start_range = copy_range(state) 879 880 -- Handle lhs of an assignment or a single expression. 881 repeat 882 local item_start_range = lhs and copy_range(state) or start_range 883 local expected = lhs and "identifier or field" or "statement" 884 local primary_expression = parse_simple_expression(state, expected, true) 885 886 if primary_expression.tag == "Paren" then 887 -- (expr) in lhs is invalid. 888 parser.syntax_error("expected " .. expected .. " near '('", item_start_range) 889 end 890 891 if primary_expression.tag == "Call" or primary_expression.tag == "Invoke" then 892 if lhs then 893 -- The is an assingment, and a call is not valid in lhs. 894 parse_error(state, "expected call or indexing") 895 else 896 -- This is a call. 897 return primary_expression 898 end 899 end 900 901 -- This is an assignment. 902 lhs = lhs or {} 903 lhs[#lhs + 1] = primary_expression 904 until not test_and_skip_token(state, ",") 905 906 check_and_skip_token(state, "=") 907 local rhs = parse_expression_list(state) 908 return new_inner_node(start_range, rhs[#rhs], "Set", {lhs, rhs}) 909end 910 911local function parse_statement(state) 912 return (statements[state.token] or parse_expression_statement)(state) 913end 914 915function parse_block(state, opening_token_range, opening_token, block) 916 local unpaired_token_guesser = state.unpaired_token_guesser 917 918 if unpaired_token_guesser and opening_token then 919 unpaired_token_guesser:on_block_start(opening_token_range, opening_token) 920 end 921 922 block = block or {} 923 local after_statement = false 924 925 while not closing_tokens[state.token] do 926 local first_token = state.token 927 928 if first_token == ";" then 929 if not after_statement then 930 table.insert(state.hanging_semicolons, copy_range(state)) 931 end 932 933 -- Skip ";". 934 skip_token(state) 935 -- Further semicolons are considered hanging. 936 after_statement = false 937 else 938 if unpaired_token_guesser then 939 unpaired_token_guesser:on_statement() 940 end 941 942 local statement = parse_statement(state) 943 after_statement = true 944 block[#block + 1] = statement 945 946 if statement.tag == "Return" then 947 -- "return" must be the last statement. 948 -- However, one ";" after it is allowed. 949 test_and_skip_token(state, ";") 950 break 951 end 952 end 953 end 954 955 if unpaired_token_guesser and opening_token then 956 unpaired_token_guesser:on_block_end() 957 end 958 959 check_closing_token(state, opening_token_range, opening_token) 960 961 return block 962end 963 964function new_state(src, line_offsets, line_lengths) 965 return { 966 lexer = lexer.new_state(src, line_offsets, line_lengths), 967 -- Set of line numbers containing code. 968 code_lines = {}, 969 -- Maps line numbers to "comment", "string", or nil based on whether the line ending is within a token 970 line_endings = {}, 971 -- Array of {contents = string} with range info. 972 comments = {}, 973 -- Array of ranges of semicolons not following a statement. 974 hanging_semicolons = {} 975 } 976end 977 978-- Parses source characters. 979-- Returns AST (in almost MetaLua format), array of comments - tables {contents = string} with range info, 980-- set of line numbers containing code, map of types of tokens wrapping line endings (nil, "string", or "comment"), 981-- array of ranges of hanging semicolons (not after statements), array of line start offsets, array of line lengths. 982-- The last two tables can be passed as arguments to be filled. 983-- On error throws an instance of parser.SyntaxError: table {msg = msg, prev_range = prev_range?} with range info, 984-- prev_range may refer to some extra relevant location. 985function parser.parse(src, line_offsets, line_lengths) 986 local state = new_state(src, line_offsets, line_lengths) 987 skip_token(state) 988 local ast = parse_block(state) 989 return ast, state.comments, state.code_lines, state.line_endings, state.hanging_semicolons, 990 state.lexer.line_offsets, state.lexer.line_lengths 991end 992 993return parser 994