1<?php 2/** 3 * Copyright 2011 Paul Copperman <paul.copperman@gmail.com> 4 * Copyright 2018 Timo Tijhof 5 * Copyright 2021 Roan Kattouw <roan.kattouw@gmail.com> 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 * @file 20 * @license Apache-2.0 21 * @license MIT 22 * @license GPL-2.0-or-later 23 * @license LGPL-2.1-or-later 24 */ 25 26namespace Wikimedia\Minify; 27 28/** 29 * JavaScript Minifier 30 * 31 * This class is meant to safely minify JavaScript code, while leaving syntactically correct 32 * programs intact. Other libraries, such as JSMin require a certain coding style to work 33 * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather 34 * slow, because they construct a complete parse tree before outputting the code minified. 35 * So this class is meant to allow arbitrary (but syntactically correct) input, while being 36 * fast enough to be used for on-the-fly minifying. 37 * 38 * This class was written with ECMA-262 Edition 6 in mind ("ECMAScript 6"). Parsing features 39 * new to later editions of ECMAScript might not be supported. It's assumed that the input is 40 * syntactically correct; if it's not, this class may not detect that, and may produce incorrect 41 * output. 42 * 43 * See <https://262.ecma-international.org/6.0/>. 44 */ 45class JavaScriptMinifier { 46 47 /* Parsing states. 48 * The state machine is necessary to decide whether to parse a slash as division 49 * operator or as regexp literal, and to know where semicolon insertion is possible. 50 * States are generally named after the next expected item. We only distinguish states when the 51 * distinction is relevant for our purpose. The meaning of these states is documented 52 * in $model below. 53 * 54 * Negative numbers are used to indicate that the state is inside a generator function, 55 * which changes the behavior of 'yield' 56 */ 57 private const STATEMENT = 1; 58 private const CONDITION = 2; 59 private const FUNC = 3; 60 private const GENFUNC = 4; 61 private const PROPERTY_ASSIGNMENT = 5; 62 private const EXPRESSION = 6; 63 private const EXPRESSION_NO_NL = 7; 64 private const EXPRESSION_OP = 8; 65 private const EXPRESSION_DOT = 9; 66 private const EXPRESSION_END = 10; 67 private const EXPRESSION_ARROWFUNC = 11; 68 private const EXPRESSION_TERNARY = 12; 69 private const EXPRESSION_TERNARY_OP = 13; 70 private const EXPRESSION_TERNARY_DOT = 14; 71 private const EXPRESSION_TERNARY_ARROWFUNC = 15; 72 private const PAREN_EXPRESSION = 16; 73 private const PAREN_EXPRESSION_OP = 17; 74 private const PAREN_EXPRESSION_DOT = 18; 75 private const PAREN_EXPRESSION_ARROWFUNC = 19; 76 private const PROPERTY_EXPRESSION = 20; 77 private const PROPERTY_EXPRESSION_OP = 21; 78 private const PROPERTY_EXPRESSION_DOT = 22; 79 private const PROPERTY_EXPRESSION_ARROWFUNC = 23; 80 private const CLASS_DEF = 24; 81 private const IMPORT_EXPORT = 25; 82 private const TEMPLATE_STRING_HEAD = 26; 83 private const TEMPLATE_STRING_TAIL = 27; 84 85 /* Token types */ 86 private const TYPE_UN_OP = 101; // unary operators 87 private const TYPE_INCR_OP = 102; // ++ and -- 88 private const TYPE_BIN_OP = 103; // binary operators (except .) 89 private const TYPE_ADD_OP = 104; // + and - which can be either unary or binary ops 90 private const TYPE_DOT = 105; // . 91 private const TYPE_HOOK = 106; // ? 92 private const TYPE_COLON = 107; // : 93 private const TYPE_COMMA = 108; // , 94 private const TYPE_SEMICOLON = 109; // ; 95 private const TYPE_BRACE_OPEN = 110; // { 96 private const TYPE_BRACE_CLOSE = 111; // } 97 private const TYPE_PAREN_OPEN = 112; // ( and [ 98 private const TYPE_PAREN_CLOSE = 113; // ) and ] 99 private const TYPE_ARROW = 114; // => 100 private const TYPE_RETURN = 115; // keywords: break, continue, return, throw 101 private const TYPE_IF = 116; // keywords: catch, for, with, switch, while, if 102 private const TYPE_DO = 117; // keywords: case, finally, else, do, try 103 private const TYPE_VAR = 118; // keywords: var, let, const 104 private const TYPE_YIELD = 119; // keywords: yield 105 private const TYPE_FUNC = 120; // keywords: function 106 private const TYPE_CLASS = 121; // keywords: class 107 private const TYPE_LITERAL = 122; // all literals, identifiers, unrecognised tokens, and other keywords 108 private const TYPE_SPECIAL = 123; // For special treatment of tokens that usually mean something else 109 110 private const ACTION_GOTO = 201; // Go to another state 111 private const ACTION_PUSH = 202; // Push a state to the stack 112 private const ACTION_POP = 203; // Pop the state from the top of the stack, and go to that state 113 114 // Sanity limit to avoid excessive memory usage 115 private const STACK_LIMIT = 1000; 116 117 // Length of the longest token in $tokenTypes made of punctuation characters, 118 // as defined in $opChars. Update this if you add longer tokens to $tokenTypes. 119 // 120 // Currently the longest punctuation token is `>>>=`, which is 4 characters. 121 private const LONGEST_PUNCTUATION_TOKEN = 4; 122 123 /** 124 * @var int $maxLineLength 125 * 126 * Maximum line length 127 * 128 * This is not a strict maximum, but a guideline. Longer lines will be 129 * produced when literals (e.g. quoted strings) longer than this are 130 * encountered, or when required to guard against semicolon insertion. 131 * 132 * This is a private member (instead of constant) to allow tests to 133 * set it to 1, to verify ASI and line-breaking behaviour. 134 */ 135 private static $maxLineLength = 1000; 136 137 private static $expandedStates = false; 138 139 /** 140 * @var array $opChars 141 * 142 * Characters which can be combined without whitespace between them. 143 */ 144 private static $opChars = [ 145 // ECMAScript 6.0 § 11.7 Punctuators 146 // Unlike the spec, these are individual symbols, not sequences. 147 '{' => true, 148 '}' => true, 149 '(' => true, 150 ')' => true, 151 '[' => true, 152 ']' => true, 153 '.' => true, 154 ';' => true, 155 ',' => true, 156 '<' => true, 157 '>' => true, 158 '=' => true, 159 '!' => true, 160 '+' => true, 161 '-' => true, 162 '*' => true, 163 '%' => true, 164 '&' => true, 165 '|' => true, 166 '^' => true, 167 '~' => true, 168 '?' => true, 169 ':' => true, 170 '/' => true, 171 // ECMAScript 6.0 § 11.8.4 String Literals 172 '"' => true, 173 "'" => true, 174 // ECMAScript 6.0 § 11.8.6 Template Literal Lexical Components 175 '`' => true, 176 ]; 177 178 /** 179 * @var array $tokenTypes 180 * 181 * Tokens and their types. 182 */ 183 private static $tokenTypes = [ 184 // ECMAScript 6.0 § 12.5 Unary Operators 185 // UnaryExpression includes PostfixExpression, which includes 'new'. 186 'new' => self::TYPE_UN_OP, 187 'delete' => self::TYPE_UN_OP, 188 'void' => self::TYPE_UN_OP, 189 'typeof' => self::TYPE_UN_OP, 190 '~' => self::TYPE_UN_OP, 191 '!' => self::TYPE_UN_OP, 192 // ECMAScript 6.0 § 12.2 Primary Expression, among others 193 '...' => self::TYPE_UN_OP, 194 // ECMAScript 6.0 § 12.7 Additive Operators 195 '++' => self::TYPE_INCR_OP, 196 '--' => self::TYPE_INCR_OP, 197 '+' => self::TYPE_ADD_OP, 198 '-' => self::TYPE_ADD_OP, 199 // ECMAScript 6.0 § 12.6 Multiplicative Operators 200 '*' => self::TYPE_BIN_OP, 201 '/' => self::TYPE_BIN_OP, 202 '%' => self::TYPE_BIN_OP, 203 // ECMAScript 6.0 § 12.8 Bitwise Shift Operators 204 '<<' => self::TYPE_BIN_OP, 205 '>>' => self::TYPE_BIN_OP, 206 '>>>' => self::TYPE_BIN_OP, 207 // ECMAScript 6.0 § 12.9 Relational Operators 208 '<' => self::TYPE_BIN_OP, 209 '>' => self::TYPE_BIN_OP, 210 '<=' => self::TYPE_BIN_OP, 211 '>=' => self::TYPE_BIN_OP, 212 'instanceof' => self::TYPE_BIN_OP, 213 'in' => self::TYPE_BIN_OP, 214 // ECMAScript 6.0 § 12.10 Equality Operators 215 '==' => self::TYPE_BIN_OP, 216 '!=' => self::TYPE_BIN_OP, 217 '===' => self::TYPE_BIN_OP, 218 '!==' => self::TYPE_BIN_OP, 219 // ECMAScript 6.0 § 12.11 Binary Bitwise Operators 220 '&' => self::TYPE_BIN_OP, 221 '^' => self::TYPE_BIN_OP, 222 '|' => self::TYPE_BIN_OP, 223 // ECMAScript 6.0 § 12.12 Binary Logical Operators 224 '&&' => self::TYPE_BIN_OP, 225 '||' => self::TYPE_BIN_OP, 226 // ECMAScript 6.0 § 12.13 Conditional Operator 227 // Also known as ternary. 228 '?' => self::TYPE_HOOK, 229 ':' => self::TYPE_COLON, 230 // ECMAScript 6.0 § 12.14 Assignment Operators 231 '=' => self::TYPE_BIN_OP, 232 '*=' => self::TYPE_BIN_OP, 233 '/=' => self::TYPE_BIN_OP, 234 '%=' => self::TYPE_BIN_OP, 235 '+=' => self::TYPE_BIN_OP, 236 '-=' => self::TYPE_BIN_OP, 237 '<<=' => self::TYPE_BIN_OP, 238 '>>=' => self::TYPE_BIN_OP, 239 '>>>=' => self::TYPE_BIN_OP, 240 '&=' => self::TYPE_BIN_OP, 241 '^=' => self::TYPE_BIN_OP, 242 '|=' => self::TYPE_BIN_OP, 243 // ECMAScript 6.0 § 12.15 Comma Operator 244 ',' => self::TYPE_COMMA, 245 246 // The keywords that disallow LineTerminator before their 247 // (sometimes optional) Expression or Identifier. 248 // 249 // keyword ; 250 // keyword [no LineTerminator here] Identifier ; 251 // keyword [no LineTerminator here] Expression ; 252 // 253 // See also ECMAScript 6.0 § 11.9.1 Rules of Automatic Semicolon Insertion 254 'continue' => self::TYPE_RETURN, 255 'break' => self::TYPE_RETURN, 256 'return' => self::TYPE_RETURN, 257 'throw' => self::TYPE_RETURN, 258 // yield is only a keyword inside generator functions, otherwise it's an identifier 259 // This is handled with the negative states hack: if the state is negative, TYPE_YIELD 260 // is treated as TYPE_RETURN, if it's positive it's treated as TYPE_LITERAL 261 'yield' => self::TYPE_YIELD, 262 263 // The keywords require a parenthesised Expression or Identifier 264 // before the next Statement. 265 // 266 // keyword ( Expression ) Statement 267 // keyword ( Identifier ) Statement 268 // 269 // See also ECMAScript 6.0: 270 // - § 13.6 The if Statement 271 // - § 13.7 Iteration Statements (do, while, for) 272 // - § 12.10 The with Statement 273 // - § 12.11 The switch Statement 274 // - § 12.13 The throw Statement 275 'if' => self::TYPE_IF, 276 'catch' => self::TYPE_IF, 277 'while' => self::TYPE_IF, 278 'for' => self::TYPE_IF, 279 'switch' => self::TYPE_IF, 280 'with' => self::TYPE_IF, 281 282 // The keywords followed by a Statement, Expression, or Block. 283 // 284 // else Statement 285 // do Statement 286 // case Expression 287 // try Block 288 // finally Block 289 // 290 // See also ECMAScript 6.0: 291 // - § 13.6 The if Statement (else) 292 // - § 13.7 Iteration Statements (do, while, for) 293 // - § 13.12 The switch Statement (case) 294 // - § 13.15 The try Statement 295 'else' => self::TYPE_DO, 296 'do' => self::TYPE_DO, 297 'case' => self::TYPE_DO, 298 'try' => self::TYPE_DO, 299 'finally' => self::TYPE_DO, 300 301 // Keywords followed by a variable declaration 302 // This is different from the group above, because a { begins 303 // object destructuring, rather than a block 304 'var' => self::TYPE_VAR, 305 'let' => self::TYPE_VAR, 306 'const' => self::TYPE_VAR, 307 308 // ECMAScript 6.0 § 14.1 Function Definitions 309 'function' => self::TYPE_FUNC, 310 // ECMAScript 6.0 § 14.2 Arrow Function Definitions 311 '=>' => self::TYPE_ARROW, 312 313 // Class declaration or expression: 314 // class Identifier { ClassBody } 315 // class { ClassBody } 316 // class Identifier extends Expression { ClassBody } 317 // class extends Expression { ClassBody } 318 'class' => self::TYPE_CLASS, 319 320 // ECMAScript 6.0 § 12.3 Left-Hand-Side Expressions (MemberExpression) 321 // A dot can also be part of a DecimalLiteral, but in that case we handle the entire 322 // DecimalLiteral as one token. A separate '.' token is always part of a MemberExpression. 323 '.' => self::TYPE_DOT, 324 325 // Can be one of: 326 // - Block (ECMAScript 6.0 § 13.2 Block) 327 // - ObjectLiteral (ECMAScript 6.0 § 12.2 Primary Expression) 328 '{' => self::TYPE_BRACE_OPEN, 329 '}' => self::TYPE_BRACE_CLOSE, 330 331 // Can be one of: 332 // - Parenthesised Identifier or Expression after a 333 // TYPE_IF or TYPE_FUNC keyword. 334 // - PrimaryExpression (ECMAScript 6.0 § 12.2 Primary Expression) 335 // - CallExpression (ECMAScript 6.0 § 12.3 Left-Hand-Side Expressions) 336 // - Beginning or an ArrowFunction (ECMAScript 6.0 § 14.2 Arrow Function Definitions) 337 '(' => self::TYPE_PAREN_OPEN, 338 ')' => self::TYPE_PAREN_CLOSE, 339 340 // Can be one of: 341 // - ArrayLiteral (ECMAScript 6.0 § 12.2 Primary Expressions) 342 // - ComputedPropertyName (ECMAScript 6.0 § 12.2.6 Object Initializer) 343 '[' => self::TYPE_PAREN_OPEN, 344 ']' => self::TYPE_PAREN_CLOSE, 345 346 // Can be one of: 347 // - End of any statement 348 // - EmptyStatement (ECMAScript 6.0 § 13.4 Empty Statement) 349 ';' => self::TYPE_SEMICOLON, 350 ]; 351 352 /** 353 * @var array $model 354 * 355 * The main table for the state machine. Defines the desired action for every state/token pair. 356 * 357 * The state pushed onto the stack by ACTION_PUSH will be returned to by ACTION_POP. 358 * A state/token pair may not specify both ACTION_POP and ACTION_GOTO. If that does happen, 359 * ACTION_POP takes precedence. 360 * 361 * This table is augmented by self::ensureExpandedStates(). 362 */ 363 private static $model = [ 364 // Statement - This is the initial state. 365 self::STATEMENT => [ 366 self::TYPE_UN_OP => [ 367 self::ACTION_GOTO => self::EXPRESSION, 368 ], 369 self::TYPE_INCR_OP => [ 370 self::ACTION_GOTO => self::EXPRESSION, 371 ], 372 self::TYPE_ADD_OP => [ 373 self::ACTION_GOTO => self::EXPRESSION, 374 ], 375 self::TYPE_BRACE_OPEN => [ 376 // Use of '{' in statement context, creates a Block. 377 self::ACTION_PUSH => self::STATEMENT, 378 ], 379 self::TYPE_BRACE_CLOSE => [ 380 // Ends a Block 381 self::ACTION_POP => true, 382 ], 383 self::TYPE_PAREN_OPEN => [ 384 self::ACTION_PUSH => self::EXPRESSION_OP, 385 self::ACTION_GOTO => self::PAREN_EXPRESSION, 386 ], 387 self::TYPE_RETURN => [ 388 self::ACTION_GOTO => self::EXPRESSION_NO_NL, 389 ], 390 self::TYPE_IF => [ 391 self::ACTION_GOTO => self::CONDITION, 392 ], 393 self::TYPE_VAR => [ 394 self::ACTION_GOTO => self::EXPRESSION, 395 ], 396 self::TYPE_FUNC => [ 397 self::ACTION_PUSH => self::STATEMENT, 398 self::ACTION_GOTO => self::FUNC, 399 ], 400 self::TYPE_CLASS => [ 401 self::ACTION_PUSH => self::STATEMENT, 402 self::ACTION_GOTO => self::CLASS_DEF, 403 ], 404 self::TYPE_SPECIAL => [ 405 'import' => [ 406 self::ACTION_GOTO => self::IMPORT_EXPORT, 407 ], 408 'export' => [ 409 self::ACTION_GOTO => self::IMPORT_EXPORT, 410 ], 411 ], 412 self::TYPE_LITERAL => [ 413 self::ACTION_GOTO => self::EXPRESSION_OP, 414 ], 415 ], 416 // The state after if/catch/while/for/switch/with 417 // Waits for an expression in parentheses, then goes to STATEMENT 418 self::CONDITION => [ 419 self::TYPE_PAREN_OPEN => [ 420 self::ACTION_PUSH => self::STATEMENT, 421 self::ACTION_GOTO => self::PAREN_EXPRESSION, 422 ], 423 ], 424 // The state after the function keyword. Waits for {, then goes to STATEMENT. 425 // The function body's closing } will pop the stack, so the state to return to 426 // after the function should be pushed to the stack first 427 self::FUNC => [ 428 // Needed to prevent * in an expression in the argument list from improperly 429 // triggering GENFUNC 430 self::TYPE_PAREN_OPEN => [ 431 self::ACTION_PUSH => self::FUNC, 432 self::ACTION_GOTO => self::PAREN_EXPRESSION, 433 ], 434 self::TYPE_BRACE_OPEN => [ 435 self::ACTION_GOTO => self::STATEMENT, 436 ], 437 self::TYPE_SPECIAL => [ 438 '*' => [ 439 self::ACTION_GOTO => self::GENFUNC, 440 ], 441 ], 442 ], 443 // After function*. Waits for { , then goes to a generator function statement. 444 self::GENFUNC => [ 445 self::TYPE_BRACE_OPEN => [ 446 // Note negative value: generator function states are negative 447 self::ACTION_GOTO => -self::STATEMENT 448 ], 449 ], 450 // Property assignment - This is an object literal declaration. 451 // For example: `{ key: value, key2, [computedKey3]: value3, method4() { ... } }` 452 self::PROPERTY_ASSIGNMENT => [ 453 // Note that keywords like if, class, var, delete, instanceof etc. can be used as keys, 454 // and should be treated as literals here, as they are in EXPRESSION_DOT. In this state, 455 // that is implicitly true because TYPE_LITERAL has no action, so it stays in this state. 456 // If we later add a state transition for TYPE_LITERAL, that same transition should 457 // also be applied to TYPE_RETURN, TYPE_IF, TYPE_DO, TYPE_VAR, TYPE_FUNC and TYPE_CLASS. 458 self::TYPE_COLON => [ 459 self::ACTION_GOTO => self::PROPERTY_EXPRESSION, 460 ], 461 // For {, which begins a method 462 self::TYPE_BRACE_OPEN => [ 463 self::ACTION_PUSH => self::PROPERTY_ASSIGNMENT, 464 // This is not flipped, see "Special cases" below 465 self::ACTION_GOTO => self::STATEMENT, 466 ], 467 self::TYPE_BRACE_CLOSE => [ 468 self::ACTION_POP => true, 469 ], 470 // For [, which begins a computed key 471 self::TYPE_PAREN_OPEN => [ 472 self::ACTION_PUSH => self::PROPERTY_ASSIGNMENT, 473 self::ACTION_GOTO => self::PAREN_EXPRESSION, 474 ], 475 self::TYPE_SPECIAL => [ 476 '*' => [ 477 self::ACTION_PUSH => self::PROPERTY_ASSIGNMENT, 478 self::ACTION_GOTO => self::GENFUNC, 479 ], 480 ], 481 ], 482 // Place in an expression where we expect an operand or a unary operator: the start 483 // of an expression or after an operator. Note that unary operators (including INCR_OP 484 // and ADD_OP) cause us to stay in this state, while operands take us to EXPRESSION_OP 485 self::EXPRESSION => [ 486 self::TYPE_SEMICOLON => [ 487 self::ACTION_GOTO => self::STATEMENT, 488 ], 489 self::TYPE_BRACE_OPEN => [ 490 self::ACTION_PUSH => self::EXPRESSION_OP, 491 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, 492 ], 493 self::TYPE_BRACE_CLOSE => [ 494 self::ACTION_POP => true, 495 ], 496 self::TYPE_PAREN_OPEN => [ 497 self::ACTION_PUSH => self::EXPRESSION_OP, 498 self::ACTION_GOTO => self::PAREN_EXPRESSION, 499 ], 500 self::TYPE_FUNC => [ 501 self::ACTION_PUSH => self::EXPRESSION_OP, 502 self::ACTION_GOTO => self::FUNC, 503 ], 504 self::TYPE_CLASS => [ 505 self::ACTION_PUSH => self::EXPRESSION_OP, 506 self::ACTION_GOTO => self::CLASS_DEF, 507 ], 508 self::TYPE_LITERAL => [ 509 self::ACTION_GOTO => self::EXPRESSION_OP, 510 ], 511 ], 512 // An expression immediately after return/throw/break/continue, where a newline 513 // is not allowed. This state is identical to EXPRESSION, except that semicolon 514 // insertion can happen here, and we never stay here: in cases where EXPRESSION would 515 // do nothing, we go to EXPRESSION. 516 self::EXPRESSION_NO_NL => [ 517 self::TYPE_UN_OP => [ 518 self::ACTION_GOTO => self::EXPRESSION, 519 ], 520 self::TYPE_INCR_OP => [ 521 self::ACTION_GOTO => self::EXPRESSION, 522 ], 523 // BIN_OP seems impossible at the start of an expression, but it can happen in 524 // yield *foo 525 self::TYPE_BIN_OP => [ 526 self::ACTION_GOTO => self::EXPRESSION, 527 ], 528 self::TYPE_ADD_OP => [ 529 self::ACTION_GOTO => self::EXPRESSION, 530 ], 531 self::TYPE_SEMICOLON => [ 532 self::ACTION_GOTO => self::STATEMENT, 533 ], 534 self::TYPE_BRACE_OPEN => [ 535 self::ACTION_PUSH => self::EXPRESSION_OP, 536 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, 537 ], 538 self::TYPE_BRACE_CLOSE => [ 539 self::ACTION_POP => true, 540 ], 541 self::TYPE_PAREN_OPEN => [ 542 self::ACTION_PUSH => self::EXPRESSION_OP, 543 self::ACTION_GOTO => self::PAREN_EXPRESSION, 544 ], 545 self::TYPE_FUNC => [ 546 self::ACTION_PUSH => self::EXPRESSION_OP, 547 self::ACTION_GOTO => self::FUNC, 548 ], 549 self::TYPE_CLASS => [ 550 self::ACTION_PUSH => self::EXPRESSION_OP, 551 self::ACTION_GOTO => self::CLASS_DEF, 552 ], 553 self::TYPE_LITERAL => [ 554 self::ACTION_GOTO => self::EXPRESSION_OP, 555 ], 556 ], 557 // Place in an expression after an operand, where we expect an operator 558 self::EXPRESSION_OP => [ 559 self::TYPE_BIN_OP => [ 560 self::ACTION_GOTO => self::EXPRESSION, 561 ], 562 self::TYPE_ADD_OP => [ 563 self::ACTION_GOTO => self::EXPRESSION, 564 ], 565 self::TYPE_DOT => [ 566 self::ACTION_GOTO => self::EXPRESSION_DOT, 567 ], 568 self::TYPE_HOOK => [ 569 self::ACTION_PUSH => self::EXPRESSION, 570 self::ACTION_GOTO => self::EXPRESSION_TERNARY, 571 ], 572 self::TYPE_COLON => [ 573 self::ACTION_GOTO => self::STATEMENT, 574 ], 575 self::TYPE_COMMA => [ 576 self::ACTION_GOTO => self::EXPRESSION, 577 ], 578 self::TYPE_SEMICOLON => [ 579 self::ACTION_GOTO => self::STATEMENT, 580 ], 581 self::TYPE_ARROW => [ 582 self::ACTION_GOTO => self::EXPRESSION_ARROWFUNC, 583 ], 584 self::TYPE_PAREN_OPEN => [ 585 self::ACTION_PUSH => self::EXPRESSION_OP, 586 self::ACTION_GOTO => self::PAREN_EXPRESSION, 587 ], 588 self::TYPE_BRACE_CLOSE => [ 589 self::ACTION_POP => true, 590 ], 591 ], 592 // State after a dot (.). Like EXPRESSION, except that many keywords behave like literals 593 // (e.g. class, if, else, var, function) because they're not valid as identifiers but are 594 // valid as property names. 595 self::EXPRESSION_DOT => [ 596 self::TYPE_LITERAL => [ 597 self::ACTION_GOTO => self::EXPRESSION_OP, 598 ], 599 // The following are keywords behaving as literals 600 self::TYPE_RETURN => [ 601 self::ACTION_GOTO => self::EXPRESSION_OP, 602 ], 603 self::TYPE_IF => [ 604 self::ACTION_GOTO => self::EXPRESSION_OP, 605 ], 606 self::TYPE_DO => [ 607 self::ACTION_GOTO => self::EXPRESSION_OP, 608 ], 609 self::TYPE_VAR => [ 610 self::ACTION_GOTO => self::EXPRESSION_OP, 611 ], 612 self::TYPE_FUNC => [ 613 self::ACTION_GOTO => self::EXPRESSION_OP, 614 ], 615 self::TYPE_CLASS => [ 616 self::ACTION_GOTO => self::EXPRESSION_OP, 617 ], 618 // We don't expect real unary/binary operators here, but some keywords 619 // (new, delete, void, typeof, instanceof, in) are classified as such, and they can be 620 // used as property names 621 self::TYPE_UN_OP => [ 622 self::ACTION_GOTO => self::EXPRESSION_OP, 623 ], 624 self::TYPE_BIN_OP => [ 625 self::ACTION_GOTO => self::EXPRESSION_OP, 626 ], 627 ], 628 // State after the } closing an arrow function body: like STATEMENT except 629 // that it has semicolon insertion, COMMA can continue the expression, and after 630 // a function we go to STATEMENT instead of EXPRESSION_OP 631 self::EXPRESSION_END => [ 632 self::TYPE_UN_OP => [ 633 self::ACTION_GOTO => self::EXPRESSION, 634 ], 635 self::TYPE_INCR_OP => [ 636 self::ACTION_GOTO => self::EXPRESSION, 637 ], 638 self::TYPE_ADD_OP => [ 639 self::ACTION_GOTO => self::EXPRESSION, 640 ], 641 self::TYPE_COMMA => [ 642 self::ACTION_GOTO => self::EXPRESSION, 643 ], 644 self::TYPE_SEMICOLON => [ 645 self::ACTION_GOTO => self::STATEMENT, 646 ], 647 self::TYPE_BRACE_OPEN => [ 648 self::ACTION_PUSH => self::STATEMENT, 649 self::ACTION_GOTO => self::STATEMENT, 650 ], 651 self::TYPE_BRACE_CLOSE => [ 652 self::ACTION_POP => true, 653 ], 654 self::TYPE_PAREN_OPEN => [ 655 self::ACTION_PUSH => self::EXPRESSION_OP, 656 self::ACTION_GOTO => self::PAREN_EXPRESSION, 657 ], 658 self::TYPE_RETURN => [ 659 self::ACTION_GOTO => self::EXPRESSION_NO_NL, 660 ], 661 self::TYPE_IF => [ 662 self::ACTION_GOTO => self::CONDITION, 663 ], 664 self::TYPE_VAR => [ 665 self::ACTION_GOTO => self::EXPRESSION, 666 ], 667 self::TYPE_FUNC => [ 668 self::ACTION_PUSH => self::STATEMENT, 669 self::ACTION_GOTO => self::FUNC, 670 ], 671 self::TYPE_CLASS => [ 672 self::ACTION_PUSH => self::STATEMENT, 673 self::ACTION_GOTO => self::CLASS_DEF, 674 ], 675 self::TYPE_LITERAL => [ 676 self::ACTION_GOTO => self::EXPRESSION_OP, 677 ], 678 ], 679 // State after =>. Like EXPRESSION, except that { begins an arrow function body 680 // rather than an object literal. 681 self::EXPRESSION_ARROWFUNC => [ 682 self::TYPE_UN_OP => [ 683 self::ACTION_GOTO => self::EXPRESSION, 684 ], 685 self::TYPE_INCR_OP => [ 686 self::ACTION_GOTO => self::EXPRESSION, 687 ], 688 self::TYPE_ADD_OP => [ 689 self::ACTION_GOTO => self::EXPRESSION, 690 ], 691 self::TYPE_BRACE_OPEN => [ 692 self::ACTION_PUSH => self::EXPRESSION_END, 693 self::ACTION_GOTO => self::STATEMENT, 694 ], 695 self::TYPE_PAREN_OPEN => [ 696 self::ACTION_PUSH => self::EXPRESSION_OP, 697 self::ACTION_GOTO => self::PAREN_EXPRESSION, 698 ], 699 self::TYPE_FUNC => [ 700 self::ACTION_PUSH => self::EXPRESSION_OP, 701 self::ACTION_GOTO => self::FUNC, 702 ], 703 self::TYPE_CLASS => [ 704 self::ACTION_PUSH => self::EXPRESSION_OP, 705 self::ACTION_GOTO => self::CLASS_DEF, 706 ], 707 self::TYPE_LITERAL => [ 708 self::ACTION_GOTO => self::EXPRESSION_OP, 709 ], 710 ], 711 // Expression after a ? . This differs from EXPRESSION because a : ends the ternary 712 // rather than starting STATEMENT (outside a ternary, : comes after a goto label) 713 // The actual rule for : ending the ternary is in EXPRESSION_TERNARY_OP. 714 self::EXPRESSION_TERNARY => [ 715 self::TYPE_BRACE_OPEN => [ 716 self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, 717 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, 718 ], 719 self::TYPE_PAREN_OPEN => [ 720 self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, 721 self::ACTION_GOTO => self::PAREN_EXPRESSION, 722 ], 723 self::TYPE_FUNC => [ 724 self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, 725 self::ACTION_GOTO => self::FUNC, 726 ], 727 self::TYPE_CLASS => [ 728 self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, 729 self::ACTION_GOTO => self::CLASS_DEF, 730 ], 731 self::TYPE_LITERAL => [ 732 self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, 733 ], 734 ], 735 // Like EXPRESSION_OP, but for ternaries, see EXPRESSION_TERNARY 736 self::EXPRESSION_TERNARY_OP => [ 737 self::TYPE_BIN_OP => [ 738 self::ACTION_GOTO => self::EXPRESSION_TERNARY, 739 ], 740 self::TYPE_ADD_OP => [ 741 self::ACTION_GOTO => self::EXPRESSION_TERNARY, 742 ], 743 self::TYPE_DOT => [ 744 self::ACTION_GOTO => self::EXPRESSION_TERNARY_DOT, 745 ], 746 self::TYPE_HOOK => [ 747 self::ACTION_PUSH => self::EXPRESSION_TERNARY, 748 self::ACTION_GOTO => self::EXPRESSION_TERNARY, 749 ], 750 self::TYPE_COMMA => [ 751 self::ACTION_GOTO => self::EXPRESSION_TERNARY, 752 ], 753 self::TYPE_ARROW => [ 754 self::ACTION_GOTO => self::EXPRESSION_TERNARY_ARROWFUNC, 755 ], 756 self::TYPE_PAREN_OPEN => [ 757 self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, 758 self::ACTION_GOTO => self::PAREN_EXPRESSION, 759 ], 760 self::TYPE_COLON => [ 761 self::ACTION_POP => true, 762 ], 763 ], 764 // Like EXPRESSION_DOT, but for ternaries, see EXPRESSION_TERNARY 765 self::EXPRESSION_TERNARY_DOT => [ 766 self::TYPE_LITERAL => [ 767 self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, 768 ], 769 // The following are keywords behaving as literals 770 self::TYPE_RETURN => [ 771 self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, 772 ], 773 self::TYPE_IF => [ 774 self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, 775 ], 776 self::TYPE_DO => [ 777 self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, 778 ], 779 self::TYPE_VAR => [ 780 self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, 781 ], 782 self::TYPE_FUNC => [ 783 self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, 784 ], 785 self::TYPE_CLASS => [ 786 self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, 787 ], 788 // We don't expect real unary/binary operators here, but some keywords 789 // (new, delete, void, typeof, instanceof, in) are classified as such, and they can be 790 // used as property names 791 self::TYPE_UN_OP => [ 792 self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, 793 ], 794 self::TYPE_BIN_OP => [ 795 self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, 796 ], 797 ], 798 // Like EXPRESSION_ARROWFUNC, but for ternaries, see EXPRESSION_TERNARY 799 self::EXPRESSION_TERNARY_ARROWFUNC => [ 800 self::TYPE_UN_OP => [ 801 self::ACTION_GOTO => self::EXPRESSION_TERNARY, 802 ], 803 self::TYPE_INCR_OP => [ 804 self::ACTION_GOTO => self::EXPRESSION_TERNARY, 805 ], 806 self::TYPE_ADD_OP => [ 807 self::ACTION_GOTO => self::EXPRESSION_TERNARY, 808 ], 809 self::TYPE_BRACE_OPEN => [ 810 self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, 811 self::ACTION_GOTO => self::STATEMENT, 812 ], 813 self::TYPE_PAREN_OPEN => [ 814 self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, 815 self::ACTION_GOTO => self::PAREN_EXPRESSION, 816 ], 817 self::TYPE_FUNC => [ 818 self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, 819 self::ACTION_GOTO => self::FUNC, 820 ], 821 self::TYPE_CLASS => [ 822 self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, 823 self::ACTION_GOTO => self::CLASS_DEF, 824 ], 825 self::TYPE_LITERAL => [ 826 self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, 827 ], 828 ], 829 // Expression inside parentheses. Like EXPRESSION, except that ) ends this state 830 // This differs from EXPRESSION because semicolon insertion can't happen here 831 self::PAREN_EXPRESSION => [ 832 self::TYPE_BRACE_OPEN => [ 833 self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, 834 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, 835 ], 836 self::TYPE_PAREN_OPEN => [ 837 self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, 838 self::ACTION_GOTO => self::PAREN_EXPRESSION, 839 ], 840 self::TYPE_PAREN_CLOSE => [ 841 self::ACTION_POP => true, 842 ], 843 self::TYPE_FUNC => [ 844 self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, 845 self::ACTION_GOTO => self::FUNC, 846 ], 847 self::TYPE_CLASS => [ 848 self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, 849 self::ACTION_GOTO => self::CLASS_DEF, 850 ], 851 self::TYPE_LITERAL => [ 852 self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, 853 ], 854 ], 855 // Like EXPRESSION_OP, but in parentheses, see PAREN_EXPRESSION 856 self::PAREN_EXPRESSION_OP => [ 857 self::TYPE_BIN_OP => [ 858 self::ACTION_GOTO => self::PAREN_EXPRESSION, 859 ], 860 self::TYPE_ADD_OP => [ 861 self::ACTION_GOTO => self::PAREN_EXPRESSION, 862 ], 863 self::TYPE_DOT => [ 864 self::ACTION_GOTO => self::PAREN_EXPRESSION_DOT, 865 ], 866 self::TYPE_HOOK => [ 867 self::ACTION_GOTO => self::PAREN_EXPRESSION, 868 ], 869 self::TYPE_COLON => [ 870 self::ACTION_GOTO => self::PAREN_EXPRESSION, 871 ], 872 self::TYPE_COMMA => [ 873 self::ACTION_GOTO => self::PAREN_EXPRESSION, 874 ], 875 self::TYPE_SEMICOLON => [ 876 self::ACTION_GOTO => self::PAREN_EXPRESSION, 877 ], 878 self::TYPE_ARROW => [ 879 self::ACTION_GOTO => self::PAREN_EXPRESSION_ARROWFUNC, 880 ], 881 self::TYPE_PAREN_OPEN => [ 882 self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, 883 self::ACTION_GOTO => self::PAREN_EXPRESSION, 884 ], 885 self::TYPE_PAREN_CLOSE => [ 886 self::ACTION_POP => true, 887 ], 888 ], 889 // Like EXPRESSION_DOT, but in parentheses, see PAREN_EXPRESSION 890 self::PAREN_EXPRESSION_DOT => [ 891 self::TYPE_LITERAL => [ 892 self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, 893 ], 894 // The following are keywords behaving as literals 895 self::TYPE_RETURN => [ 896 self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, 897 ], 898 self::TYPE_IF => [ 899 self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, 900 ], 901 self::TYPE_DO => [ 902 self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, 903 ], 904 self::TYPE_VAR => [ 905 self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, 906 ], 907 self::TYPE_FUNC => [ 908 self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, 909 ], 910 self::TYPE_CLASS => [ 911 self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, 912 ], 913 // We don't expect real unary/binary operators here, but some keywords 914 // (new, delete, void, typeof, instanceof, in) are classified as such, and they can be 915 // used as property names 916 self::TYPE_UN_OP => [ 917 self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, 918 ], 919 self::TYPE_BIN_OP => [ 920 self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, 921 ], 922 ], 923 // Like EXPRESSION_ARROWFUNC, but in parentheses, see PAREN_EXPRESSION 924 self::PAREN_EXPRESSION_ARROWFUNC => [ 925 self::TYPE_UN_OP => [ 926 self::ACTION_GOTO => self::PAREN_EXPRESSION, 927 ], 928 self::TYPE_INCR_OP => [ 929 self::ACTION_GOTO => self::PAREN_EXPRESSION, 930 ], 931 self::TYPE_ADD_OP => [ 932 self::ACTION_GOTO => self::PAREN_EXPRESSION, 933 ], 934 self::TYPE_BRACE_OPEN => [ 935 self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, 936 self::ACTION_GOTO => self::STATEMENT, 937 ], 938 self::TYPE_PAREN_OPEN => [ 939 self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, 940 self::ACTION_GOTO => self::PAREN_EXPRESSION, 941 ], 942 self::TYPE_FUNC => [ 943 self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, 944 self::ACTION_GOTO => self::FUNC, 945 ], 946 self::TYPE_CLASS => [ 947 self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, 948 self::ACTION_GOTO => self::CLASS_DEF, 949 ], 950 self::TYPE_LITERAL => [ 951 self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, 952 ], 953 ], 954 // Expression as the value of a key in an object literal. Like EXPRESSION, except that 955 // a comma (in PROPERTY_EXPRESSION_OP) goes to PROPERTY_ASSIGNMENT instead 956 self::PROPERTY_EXPRESSION => [ 957 self::TYPE_BRACE_OPEN => [ 958 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, 959 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, 960 ], 961 self::TYPE_BRACE_CLOSE => [ 962 self::ACTION_POP => true, 963 ], 964 self::TYPE_PAREN_OPEN => [ 965 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, 966 self::ACTION_GOTO => self::PAREN_EXPRESSION, 967 ], 968 self::TYPE_FUNC => [ 969 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, 970 self::ACTION_GOTO => self::FUNC, 971 ], 972 self::TYPE_CLASS => [ 973 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, 974 self::ACTION_GOTO => self::CLASS_DEF, 975 ], 976 self::TYPE_LITERAL => [ 977 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, 978 ], 979 ], 980 // Like EXPRESSION_OP, but in a property expression, see PROPERTY_EXPRESSION 981 self::PROPERTY_EXPRESSION_OP => [ 982 self::TYPE_BIN_OP => [ 983 self::ACTION_GOTO => self::PROPERTY_EXPRESSION, 984 ], 985 self::TYPE_ADD_OP => [ 986 self::ACTION_GOTO => self::PROPERTY_EXPRESSION, 987 ], 988 self::TYPE_DOT => [ 989 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_DOT, 990 ], 991 self::TYPE_HOOK => [ 992 self::ACTION_PUSH => self::PROPERTY_EXPRESSION, 993 self::ACTION_GOTO => self::EXPRESSION_TERNARY, 994 ], 995 self::TYPE_COMMA => [ 996 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, 997 ], 998 self::TYPE_ARROW => [ 999 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_ARROWFUNC, 1000 ], 1001 self::TYPE_BRACE_OPEN => [ 1002 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, 1003 ], 1004 self::TYPE_BRACE_CLOSE => [ 1005 self::ACTION_POP => true, 1006 ], 1007 self::TYPE_PAREN_OPEN => [ 1008 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, 1009 self::ACTION_GOTO => self::PAREN_EXPRESSION, 1010 ], 1011 ], 1012 // Like EXPRESSION_DOT, but in a property expression, see PROPERTY_EXPRESSION 1013 self::PROPERTY_EXPRESSION_DOT => [ 1014 self::TYPE_LITERAL => [ 1015 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, 1016 ], 1017 // The following are keywords behaving as literals 1018 self::TYPE_RETURN => [ 1019 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, 1020 ], 1021 self::TYPE_IF => [ 1022 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, 1023 ], 1024 self::TYPE_DO => [ 1025 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, 1026 ], 1027 self::TYPE_VAR => [ 1028 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, 1029 ], 1030 self::TYPE_FUNC => [ 1031 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, 1032 ], 1033 self::TYPE_CLASS => [ 1034 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, 1035 ], 1036 // We don't expect real unary/binary operators here, but some keywords 1037 // (new, delete, void, typeof, instanceof, in) are classified as such, and they can be 1038 // used as property names 1039 self::TYPE_UN_OP => [ 1040 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, 1041 ], 1042 self::TYPE_BIN_OP => [ 1043 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, 1044 ], 1045 ], 1046 // Like EXPRESSION_ARROWFUNC, but in a property expression, see PROPERTY_EXPRESSION 1047 self::PROPERTY_EXPRESSION_ARROWFUNC => [ 1048 self::TYPE_UN_OP => [ 1049 self::ACTION_GOTO => self::PROPERTY_EXPRESSION, 1050 ], 1051 self::TYPE_INCR_OP => [ 1052 self::ACTION_GOTO => self::PROPERTY_EXPRESSION, 1053 ], 1054 self::TYPE_ADD_OP => [ 1055 self::ACTION_GOTO => self::PROPERTY_EXPRESSION, 1056 ], 1057 self::TYPE_BRACE_OPEN => [ 1058 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, 1059 self::ACTION_GOTO => self::STATEMENT, 1060 ], 1061 self::TYPE_PAREN_OPEN => [ 1062 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, 1063 self::ACTION_GOTO => self::PAREN_EXPRESSION, 1064 ], 1065 self::TYPE_FUNC => [ 1066 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, 1067 self::ACTION_GOTO => self::FUNC, 1068 ], 1069 self::TYPE_CLASS => [ 1070 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, 1071 self::ACTION_GOTO => self::CLASS_DEF, 1072 ], 1073 self::TYPE_LITERAL => [ 1074 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, 1075 ], 1076 ], 1077 // Class definition (after the class keyword). Expects an identifier, or the extends 1078 // keyword followed by an expression (or both), followed by {, which starts an object 1079 // literal. The object literal's closing } will pop the stack, so the state to return 1080 // to after the class definition should be pushed to the stack first. 1081 self::CLASS_DEF => [ 1082 self::TYPE_BRACE_OPEN => [ 1083 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, 1084 ], 1085 self::TYPE_PAREN_OPEN => [ 1086 self::ACTION_PUSH => self::CLASS_DEF, 1087 self::ACTION_GOTO => self::PAREN_EXPRESSION, 1088 ], 1089 self::TYPE_FUNC => [ 1090 self::ACTION_PUSH => self::CLASS_DEF, 1091 self::ACTION_GOTO => self::FUNC, 1092 ], 1093 ], 1094 // Import or export declaration 1095 self::IMPORT_EXPORT => [ 1096 self::TYPE_SEMICOLON => [ 1097 self::ACTION_GOTO => self::STATEMENT, 1098 ], 1099 self::TYPE_VAR => [ 1100 self::ACTION_GOTO => self::EXPRESSION, 1101 ], 1102 self::TYPE_FUNC => [ 1103 self::ACTION_PUSH => self::EXPRESSION_OP, 1104 self::ACTION_GOTO => self::FUNC, 1105 ], 1106 self::TYPE_CLASS => [ 1107 self::ACTION_PUSH => self::EXPRESSION_OP, 1108 self::ACTION_GOTO => self::CLASS_DEF, 1109 ], 1110 self::TYPE_SPECIAL => [ 1111 'default' => [ 1112 self::ACTION_GOTO => self::EXPRESSION, 1113 ], 1114 // Stay in this state for *, as, from 1115 '*' => [], 1116 'as' => [], 1117 'from' => [], 1118 ], 1119 ], 1120 // Used in template string-specific code below 1121 self::TEMPLATE_STRING_HEAD => [ 1122 self::TYPE_LITERAL => [ 1123 self::ACTION_PUSH => self::TEMPLATE_STRING_TAIL, 1124 self::ACTION_GOTO => self::EXPRESSION, 1125 ], 1126 ], 1127 ]; 1128 1129 /** 1130 * @var array $semicolon 1131 * 1132 * Rules for when semicolon insertion is appropriate. Semicolon insertion happens if we are 1133 * in one of these states, and encounter one of these tokens preceded by a newline. 1134 * 1135 * This array is augmented by ensureExpandedStates(). 1136 */ 1137 private static $semicolon = [ 1138 self::EXPRESSION_NO_NL => [ 1139 self::TYPE_UN_OP => true, 1140 // BIN_OP seems impossible at the start of an expression, but it can happen in 1141 // yield *foo 1142 self::TYPE_BIN_OP => true, 1143 self::TYPE_INCR_OP => true, 1144 self::TYPE_ADD_OP => true, 1145 self::TYPE_BRACE_OPEN => true, 1146 self::TYPE_PAREN_OPEN => true, 1147 self::TYPE_RETURN => true, 1148 self::TYPE_IF => true, 1149 self::TYPE_DO => true, 1150 self::TYPE_VAR => true, 1151 self::TYPE_FUNC => true, 1152 self::TYPE_CLASS => true, 1153 self::TYPE_LITERAL => true 1154 ], 1155 self::EXPRESSION_OP => [ 1156 self::TYPE_UN_OP => true, 1157 self::TYPE_INCR_OP => true, 1158 self::TYPE_BRACE_OPEN => true, 1159 self::TYPE_RETURN => true, 1160 self::TYPE_IF => true, 1161 self::TYPE_DO => true, 1162 self::TYPE_VAR => true, 1163 self::TYPE_FUNC => true, 1164 self::TYPE_CLASS => true, 1165 self::TYPE_LITERAL => true 1166 ], 1167 self::EXPRESSION_END => [ 1168 self::TYPE_UN_OP => true, 1169 self::TYPE_INCR_OP => true, 1170 self::TYPE_ADD_OP => true, 1171 self::TYPE_BRACE_OPEN => true, 1172 self::TYPE_PAREN_OPEN => true, 1173 self::TYPE_RETURN => true, 1174 self::TYPE_IF => true, 1175 self::TYPE_DO => true, 1176 self::TYPE_VAR => true, 1177 self::TYPE_FUNC => true, 1178 self::TYPE_CLASS => true, 1179 self::TYPE_LITERAL => true 1180 ] 1181 ]; 1182 1183 /** 1184 * @var array $divStates 1185 * 1186 * States in which a / is a division operator. In all other states, it's the start of a regex. 1187 * 1188 * This array is augmented by self::ensureExpandedStates(). 1189 */ 1190 private static $divStates = [ 1191 self::EXPRESSION_OP => true, 1192 self::EXPRESSION_TERNARY_OP => true, 1193 self::PAREN_EXPRESSION_OP => true, 1194 self::PROPERTY_EXPRESSION_OP => true 1195 ]; 1196 1197 /** 1198 * @var array $expressionStates 1199 * 1200 * States that are like EXPRESSION, where true and false can be minified to !1 and !0. 1201 * 1202 * This array is augmented by self::ensureExpandedStates(). 1203 */ 1204 private static $expressionStates = [ 1205 self::EXPRESSION => true, 1206 self::EXPRESSION_NO_NL => true, 1207 self::EXPRESSION_ARROWFUNC => true, 1208 self::EXPRESSION_TERNARY => true, 1209 self::PAREN_EXPRESSION => true, 1210 self::PROPERTY_EXPRESSION => true, 1211 ]; 1212 1213 /** 1214 * Add copies of all states but with negative numbers to self::$model (if not already present), 1215 * to represent generator function states. 1216 */ 1217 private static function ensureExpandedStates() { 1218 // Already done? 1219 if ( self::$expandedStates ) { 1220 return; 1221 } 1222 self::$expandedStates = true; 1223 1224 // Add copies of all states (except FUNC and GENFUNC) with negative numbers. 1225 // These negative states represent states inside generator functions. When in these states, 1226 // TYPE_YIELD is treated as TYPE_RETURN, otherwise as TYPE_LITERAL 1227 foreach ( self::$model as $state => $transitions ) { 1228 if ( $state !== self::FUNC && $state !== self::GENFUNC ) { 1229 foreach ( $transitions as $tokenType => $actions ) { 1230 foreach ( $actions as $action => $target ) { 1231 if ( is_array( $target ) ) { 1232 foreach ( $target as $subaction => $subtarget ) { 1233 self::$model[-$state][$tokenType][$action][$subaction] = 1234 $subtarget === self::FUNC || $subtarget === true || $subtarget === self::GENFUNC 1235 ? $subtarget : -$subtarget; 1236 } 1237 } else { 1238 self::$model[-$state][$tokenType][$action] = 1239 $target === self::FUNC || $target === true || $target === self::GENFUNC 1240 ? $target : -$target; 1241 } 1242 } 1243 } 1244 } 1245 } 1246 // Special cases: 1247 // '{' in a property assignment starts a method, so it shouldn't be flipped 1248 self::$model[-self::PROPERTY_ASSIGNMENT][self::TYPE_BRACE_OPEN][self::ACTION_GOTO] = self::STATEMENT; 1249 1250 // Also add negative versions of states to the other arrays 1251 foreach ( self::$semicolon as $state => $value ) { 1252 self::$semicolon[-$state] = $value; 1253 } 1254 foreach ( self::$divStates as $state => $value ) { 1255 self::$divStates[-$state] = $value; 1256 } 1257 foreach ( self::$expressionStates as $state => $value ) { 1258 self::$expressionStates[-$state] = $value; 1259 } 1260 } 1261 1262 /** 1263 * Returns minified JavaScript code. 1264 * 1265 * @param string $s JavaScript code to minify 1266 * @return string|bool Minified code or false on failure 1267 */ 1268 public static function minify( $s ) { 1269 self::ensureExpandedStates(); 1270 1271 // Here's where the minifying takes place: Loop through the input, looking for tokens 1272 // and output them to $out, taking actions to the above defined rules when appropriate. 1273 $out = ''; 1274 $pos = 0; 1275 $length = strlen( $s ); 1276 $lineLength = 0; 1277 $newlineFound = true; 1278 $state = self::STATEMENT; 1279 $stack = []; 1280 $topOfStack = null; // Optimization: calling end( $stack ) repeatedly is expensive 1281 $last = ';'; // Pretend that we have seen a semicolon yet 1282 while ( $pos < $length ) { 1283 // First, skip over any whitespace and multiline comments, recording whether we 1284 // found any newline character 1285 $skip = strspn( $s, " \t\n\r\xb\xc", $pos ); 1286 if ( !$skip ) { 1287 $ch = $s[$pos]; 1288 if ( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) { 1289 // Multiline comment. Search for the end token or EOT. 1290 $end = strpos( $s, '*/', $pos + 2 ); 1291 $skip = $end === false ? $length - $pos : $end - $pos + 2; 1292 } 1293 } 1294 if ( $skip ) { 1295 // The semicolon insertion mechanism needs to know whether there was a newline 1296 // between two tokens, so record it now. 1297 if ( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) { 1298 $newlineFound = true; 1299 } 1300 $pos += $skip; 1301 continue; 1302 } 1303 // Handle C++-style comments and html comments, which are treated as single line 1304 // comments by the browser, regardless of whether the end tag is on the same line. 1305 // Handle --> the same way, but only if it's at the beginning of the line 1306 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable 1307 if ( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' ) 1308 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' ) 1309 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' ) 1310 ) { 1311 $pos += strcspn( $s, "\r\n", $pos ); 1312 continue; 1313 } 1314 1315 // Find out which kind of token we're handling. 1316 // Note: $end must point past the end of the current token 1317 // so that `substr($s, $pos, $end - $pos)` would be the entire token. 1318 // In order words, $end will be the offset of the last relevant character 1319 // in the stream + 1, or simply put: The offset of the first character 1320 // of any next token in the stream. 1321 $end = $pos + 1; 1322 // Handle string literals 1323 if ( $ch === "'" || $ch === '"' ) { 1324 // Search to the end of the string literal, skipping over backslash escapes 1325 $search = $ch . '\\'; 1326 do { 1327 // Speculatively add 2 to the end so that if we see a backslash, 1328 // the next iteration will start 2 characters further (one for the 1329 // backslash, one for the escaped character). 1330 // We'll correct this outside the loop. 1331 $end += strcspn( $s, $search, $end ) + 2; 1332 // If the last character in our search for a quote or a backlash 1333 // matched a backslash and we haven't reached the end, keep searching.. 1334 } while ( $end - 2 < $length && $s[$end - 2] === '\\' ); 1335 // Correction (1): Undo speculative add, keep only one (end of string literal) 1336 $end--; 1337 if ( $end > $length ) { 1338 // Correction (2): Loop wrongly assumed an end quote ended the search, 1339 // but search ended because we've reached the end. Correct $end. 1340 // TODO: This is invalid and should throw. 1341 $end--; 1342 } 1343 1344 // Handle template strings: beginning (`) or continuation after a ${ expression (} + tail state) 1345 } elseif ( $ch === '`' || ( $ch === '}' && $topOfStack === self::TEMPLATE_STRING_TAIL ) ) { 1346 if ( $ch === '}' ) { 1347 // Pop the TEMPLATE_STRING_TAIL state off the stack 1348 // We don't let it get popped off the stack the normal way, to avoid the newline 1349 // and comment stripping code above running on the continuation of the literal 1350 array_pop( $stack ); 1351 // Also pop the previous state off the stack 1352 $state = array_pop( $stack ); 1353 $topOfStack = end( $stack ); 1354 } 1355 // Search until we reach either a closing ` or a ${, skipping over backslash escapes 1356 // and $ characters followed by something other than { or ` 1357 do { 1358 $end += strcspn( $s, '`$\\', $end ) + 1; 1359 if ( $end - 1 < $length && $s[$end - 1] === '\\' ) { 1360 // Backslash escape. Skip the next character, and keep going 1361 $end++; 1362 continue; 1363 } 1364 if ( $end < $length && $s[$end - 1] === '$' && $s[$end] === '{' ) { 1365 // Beginning of an expression in ${ ... }. Skip the {, and stop 1366 $end++; 1367 // Push the current state to the stack. We'll pop this off later when hitting 1368 // the end of this template string 1369 $stack[] = $state; 1370 $topOfStack = $state; 1371 // Change the state to TEMPLATE_STRING_HEAD. The token type will be detected 1372 // as TYPE_LITERAL, and this will cause the state machine to expect an 1373 // expression, then go to the TEMPLATE_STRING_TAIL state when it hits the } 1374 $state = self::TEMPLATE_STRING_HEAD; 1375 break; 1376 } 1377 } while ( $end - 1 < $length && $s[$end - 1] !== '`' ); 1378 if ( $end > $length ) { 1379 // Loop wrongly assumed an end quote ended the search, 1380 // but search ended because we've reached the end. Correct $end. 1381 // TODO: This is invalid and should throw. 1382 $end--; 1383 } 1384 1385 // We have to distinguish between regexp literals and division operators 1386 // A division operator is only possible in certain states 1387 } elseif ( $ch === '/' && !isset( self::$divStates[$state] ) ) { 1388 // Regexp literal 1389 for ( ; ; ) { 1390 // Search until we find "/" (end of regexp), "\" (backslash escapes), 1391 // or "[" (start of character classes). 1392 do { 1393 // Speculatively add 2 to ensure next iteration skips 1394 // over backslash and escaped character. 1395 // We'll correct this outside the loop. 1396 $end += strcspn( $s, '/[\\', $end ) + 2; 1397 // If backslash escape, keep searching... 1398 } while ( $end - 2 < $length && $s[$end - 2] === '\\' ); 1399 // Correction (1): Undo speculative add, keep only one (end of regexp) 1400 $end--; 1401 if ( $end > $length ) { 1402 // Correction (2): Loop wrongly assumed end slash was seen 1403 // String ended without end of regexp. Correct $end. 1404 // TODO: This is invalid and should throw. 1405 $end--; 1406 break; 1407 } 1408 if ( $s[$end - 1] === '/' ) { 1409 break; 1410 } 1411 // (Implicit else), we must've found the start of a char class, 1412 // skip until we find "]" (end of char class), or "\" (backslash escape) 1413 do { 1414 // Speculatively add 2 for backslash escape. 1415 // We'll substract one outside the loop. 1416 $end += strcspn( $s, ']\\', $end ) + 2; 1417 // If backslash escape, keep searching... 1418 } while ( $end - 2 < $length && $s[$end - 2] === '\\' ); 1419 // Correction (1): Undo speculative add, keep only one (end of regexp) 1420 $end--; 1421 if ( $end > $length ) { 1422 // Correction (2): Loop wrongly assumed "]" was seen 1423 // String ended without ending char class or regexp. Correct $end. 1424 // TODO: This is invalid and should throw. 1425 $end--; 1426 break; 1427 } 1428 } 1429 // Search past the regexp modifiers (gi) 1430 while ( $end < $length && ctype_alpha( $s[$end] ) ) { 1431 $end++; 1432 } 1433 } elseif ( 1434 $ch === '0' 1435 && ( $pos + 1 < $length ) && ( $s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' ) 1436 ) { 1437 // Hex numeric literal 1438 $end++; // x or X 1439 $len = strspn( $s, '0123456789ABCDEFabcdef', $end ); 1440 if ( !$len ) { 1441 return self::parseError( 1442 $s, 1443 $pos, 1444 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' 1445 ); 1446 } 1447 $end += $len; 1448 } elseif ( 1449 // Optimisation: This check must accept only ASCII digits 0-9. 1450 // Avoid ctype_digit() because it is slower and also accepts locale-specific digits. 1451 // Using is_numeric() might seem wrong also as it accepts negative numbers, decimal 1452 // numbers, and exponents (e.g. strings like "+012.34e6"). But, it is fine here 1453 // because we know $ch is a single character, and we believe the only single 1454 // characters that is_numeric() accepts are ASCII digits 0-9. 1455 is_numeric( $ch ) 1456 || ( $ch === '.' && $pos + 1 < $length && is_numeric( $s[$pos + 1] ) ) 1457 ) { 1458 $end += strspn( $s, '0123456789', $end ); 1459 $decimal = strspn( $s, '.', $end ); 1460 if ( $decimal ) { 1461 if ( $decimal > 2 ) { 1462 return self::parseError( $s, $end, 'The number has too many decimal points' ); 1463 } 1464 $end += strspn( $s, '0123456789', $end + 1 ) + $decimal; 1465 } 1466 $exponent = strspn( $s, 'eE', $end ); 1467 if ( $exponent ) { 1468 if ( $exponent > 1 ) { 1469 return self::parseError( $s, $end, 'Number with several E' ); 1470 } 1471 $end++; 1472 1473 // + sign is optional; - sign is required. 1474 $end += strspn( $s, '-+', $end ); 1475 $len = strspn( $s, '0123456789', $end ); 1476 if ( !$len ) { 1477 return self::parseError( 1478 $s, 1479 $pos, 1480 'No decimal digits after e, how many zeroes should be added?' 1481 ); 1482 } 1483 $end += $len; 1484 } 1485 } elseif ( isset( self::$opChars[$ch] ) ) { 1486 // Punctuation character. Search for the longest matching operator. 1487 for ( $tokenLength = self::LONGEST_PUNCTUATION_TOKEN; $tokenLength > 1; $tokenLength-- ) { 1488 if ( 1489 $pos + $tokenLength <= $length && 1490 isset( self::$tokenTypes[ substr( $s, $pos, $tokenLength ) ] ) 1491 ) { 1492 $end = $pos + $tokenLength; 1493 break; 1494 } 1495 } 1496 } else { 1497 // Identifier or reserved word. Search for the end by excluding whitespace and 1498 // punctuation. 1499 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"`!&|^~\xb\xc\r", $end ); 1500 } 1501 1502 // Now get the token type from our type array 1503 $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token ) 1504 $type = isset( self::$model[$state][self::TYPE_SPECIAL][$token] ) 1505 ? self::TYPE_SPECIAL 1506 : self::$tokenTypes[$token] ?? self::TYPE_LITERAL; 1507 if ( $type === self::TYPE_YIELD ) { 1508 // yield is treated as TYPE_RETURN inside a generator function (negative state) 1509 // but as TYPE_LITERAL when not in a generator function (positive state) 1510 $type = $state < 0 ? self::TYPE_RETURN : self::TYPE_LITERAL; 1511 } 1512 1513 if ( 1514 $type === self::TYPE_LITERAL 1515 && ( $token === 'true' || $token === 'false' ) 1516 && isset( self::$expressionStates[$state] ) 1517 && $last !== '.' 1518 ) { 1519 $token = ( $token === 'true' ) ? '!0' : '!1'; 1520 $ch = '!'; 1521 } 1522 1523 if ( $newlineFound && isset( self::$semicolon[$state][$type] ) ) { 1524 // This token triggers the semicolon insertion mechanism of javascript. While we 1525 // could add the ; token here ourselves, keeping the newline has a few advantages. 1526 $out .= "\n"; 1527 $state = $state < 0 ? -self::STATEMENT : self::STATEMENT; 1528 $lineLength = 0; 1529 } elseif ( $lineLength + $end - $pos > self::$maxLineLength && 1530 !isset( self::$semicolon[$state][$type] ) && 1531 $type !== self::TYPE_INCR_OP && 1532 $type !== self::TYPE_ARROW 1533 ) { 1534 // This line would get too long if we added $token, so add a newline first. 1535 // Only do this if it won't trigger semicolon insertion and if it won't 1536 // put a postfix increment operator or an arrow on its own line, 1537 // which is illegal in js. 1538 $out .= "\n"; 1539 $lineLength = 0; 1540 // Check, whether we have to separate the token from the last one with whitespace 1541 } elseif ( !isset( self::$opChars[$last] ) && !isset( self::$opChars[$ch] ) ) { 1542 $out .= ' '; 1543 $lineLength++; 1544 // Don't accidentally create ++, -- or // tokens 1545 } elseif ( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) { 1546 $out .= ' '; 1547 $lineLength++; 1548 } 1549 1550 $out .= $token; 1551 $lineLength += $end - $pos; // += strlen( $token ) 1552 $last = $s[$end - 1]; 1553 $pos = $end; 1554 $newlineFound = false; 1555 1556 // Now that we have output our token, transition into the new state. 1557 $actions = $type === self::TYPE_SPECIAL ? 1558 self::$model[$state][$type][$token] : 1559 self::$model[$state][$type] ?? []; 1560 if ( isset( $actions[self::ACTION_PUSH] ) && 1561 count( $stack ) < self::STACK_LIMIT 1562 ) { 1563 $topOfStack = $actions[self::ACTION_PUSH]; 1564 $stack[] = $topOfStack; 1565 } 1566 if ( $stack && isset( $actions[self::ACTION_POP] ) ) { 1567 $state = array_pop( $stack ); 1568 $topOfStack = end( $stack ); 1569 } elseif ( isset( $actions[self::ACTION_GOTO] ) ) { 1570 $state = $actions[self::ACTION_GOTO]; 1571 } 1572 } 1573 return $out; 1574 } 1575 1576 /** 1577 * @param string $fullJavascript 1578 * @param int $position 1579 * @param string $errorMsg 1580 * @return bool 1581 */ 1582 public static function parseError( $fullJavascript, $position, $errorMsg ) { 1583 // TODO: Handle the error: trigger_error, throw exception, return false... 1584 return false; 1585 } 1586} 1587