1<?php 2/** 3 * Defines the lexer of the library. 4 * 5 * This is one of the most important components, along with the parser. 6 * 7 * Depends on context to extract lexemes. 8 */ 9 10declare(strict_types=1); 11 12namespace PhpMyAdmin\SqlParser; 13 14use PhpMyAdmin\SqlParser\Exceptions\LexerException; 15 16use function define; 17use function defined; 18use function in_array; 19use function mb_strlen; 20use function sprintf; 21use function strlen; 22use function substr; 23 24if (! defined('USE_UTF_STRINGS')) { 25 // NOTE: In previous versions of PHP (5.5 and older) the default 26 // internal encoding is "ISO-8859-1". 27 // All `mb_` functions must specify the correct encoding, which is 28 // 'UTF-8' in order to work properly. 29 30 /* 31 * Forces usage of `UtfString` if the string is multibyte. 32 * `UtfString` may be slower, but it gives better results. 33 * 34 * @var bool 35 */ 36 define('USE_UTF_STRINGS', true); 37} 38 39/** 40 * Performs lexical analysis over a SQL statement and splits it in multiple 41 * tokens. 42 * 43 * The output of the lexer is affected by the context of the SQL statement. 44 * 45 * @see Context 46 */ 47class Lexer extends Core 48{ 49 /** 50 * A list of methods that are used in lexing the SQL query. 51 * 52 * @var array 53 */ 54 public static $PARSER_METHODS = [ 55 // It is best to put the parsers in order of their complexity 56 // (ascending) and their occurrence rate (descending). 57 // 58 // Conflicts: 59 // 60 // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber` 61 // They fight over delimiter. The delimiter may be a keyword, a 62 // number or almost any character which makes the delimiter one of 63 // the first tokens that must be parsed. 64 // 65 // 1. `parseNumber` and `parseOperator` 66 // They fight over `+` and `-`. 67 // 68 // 2. `parseComment` and `parseOperator` 69 // They fight over `/` (as in ```/*comment*/``` or ```a / b```) 70 // 71 // 3. `parseBool` and `parseKeyword` 72 // They fight over `TRUE` and `FALSE`. 73 // 74 // 4. `parseKeyword` and `parseUnknown` 75 // They fight over words. `parseUnknown` does not know about 76 // keywords. 77 78 'parseDelimiter', 79 'parseWhitespace', 80 'parseNumber', 81 'parseComment', 82 'parseOperator', 83 'parseBool', 84 'parseString', 85 'parseSymbol', 86 'parseKeyword', 87 'parseLabel', 88 'parseUnknown', 89 ]; 90 91 /** 92 * The string to be parsed. 93 * 94 * @var string|UtfString 95 */ 96 public $str = ''; 97 98 /** 99 * The length of `$str`. 100 * 101 * By storing its length, a lot of time is saved, because parsing methods 102 * would call `strlen` everytime. 103 * 104 * @var int 105 */ 106 public $len = 0; 107 108 /** 109 * The index of the last parsed character. 110 * 111 * @var int 112 */ 113 public $last = 0; 114 115 /** 116 * Tokens extracted from given strings. 117 * 118 * @var TokensList 119 */ 120 public $list; 121 122 /** 123 * The default delimiter. This is used, by default, in all new instances. 124 * 125 * @var string 126 */ 127 public static $DEFAULT_DELIMITER = ';'; 128 129 /** 130 * Statements delimiter. 131 * This may change during lexing. 132 * 133 * @var string 134 */ 135 public $delimiter; 136 137 /** 138 * The length of the delimiter. 139 * 140 * Because `parseDelimiter` can be called a lot, it would perform a lot of 141 * calls to `strlen`, which might affect performance when the delimiter is 142 * big. 143 * 144 * @var int 145 */ 146 public $delimiterLen; 147 148 /** 149 * Gets the tokens list parsed by a new instance of a lexer. 150 * 151 * @param string|UtfString $str the query to be lexed 152 * @param bool $strict whether strict mode should be 153 * enabled or not 154 * @param string $delimiter the delimiter to be used 155 * 156 * @return TokensList 157 */ 158 public static function getTokens($str, $strict = false, $delimiter = null) 159 { 160 $lexer = new self($str, $strict, $delimiter); 161 162 return $lexer->list; 163 } 164 165 /** 166 * @param string|UtfString $str the query to be lexed 167 * @param bool $strict whether strict mode should be 168 * enabled or not 169 * @param string $delimiter the delimiter to be used 170 */ 171 public function __construct($str, $strict = false, $delimiter = null) 172 { 173 // `strlen` is used instead of `mb_strlen` because the lexer needs to 174 // parse each byte of the input. 175 $len = $str instanceof UtfString ? $str->length() : strlen($str); 176 177 // For multi-byte strings, a new instance of `UtfString` is 178 // initialized (only if `UtfString` usage is forced. 179 if (! $str instanceof UtfString && USE_UTF_STRINGS && $len !== mb_strlen($str, 'UTF-8')) { 180 $str = new UtfString($str); 181 } 182 183 $this->str = $str; 184 $this->len = $str instanceof UtfString ? $str->length() : $len; 185 186 $this->strict = $strict; 187 188 // Setting the delimiter. 189 $this->setDelimiter(! empty($delimiter) ? $delimiter : static::$DEFAULT_DELIMITER); 190 191 $this->lex(); 192 } 193 194 /** 195 * Sets the delimiter. 196 * 197 * @param string $delimiter the new delimiter 198 */ 199 public function setDelimiter($delimiter) 200 { 201 $this->delimiter = $delimiter; 202 $this->delimiterLen = strlen($delimiter); 203 } 204 205 /** 206 * Parses the string and extracts lexemes. 207 */ 208 public function lex() 209 { 210 // TODO: Sometimes, static::parse* functions make unnecessary calls to 211 // is* functions. For a better performance, some rules can be deduced 212 // from context. 213 // For example, in `parseBool` there is no need to compare the token 214 // every time with `true` and `false`. The first step would be to 215 // compare with 'true' only and just after that add another letter from 216 // context and compare again with `false`. 217 // Another example is `parseComment`. 218 219 $list = new TokensList(); 220 221 /** 222 * Last processed token. 223 * 224 * @var Token 225 */ 226 $lastToken = null; 227 228 for ($this->last = 0, $lastIdx = 0; $this->last < $this->len; $lastIdx = ++$this->last) { 229 /** 230 * The new token. 231 * 232 * @var Token 233 */ 234 $token = null; 235 236 foreach (static::$PARSER_METHODS as $method) { 237 $token = $this->$method(); 238 239 if ($token) { 240 break; 241 } 242 } 243 244 if ($token === null) { 245 // @assert($this->last === $lastIdx); 246 $token = new Token($this->str[$this->last]); 247 $this->error('Unexpected character.', $this->str[$this->last], $this->last); 248 } elseif ( 249 $lastToken !== null 250 && $token->type === Token::TYPE_SYMBOL 251 && $token->flags & Token::FLAG_SYMBOL_VARIABLE 252 && ( 253 $lastToken->type === Token::TYPE_STRING 254 || ( 255 $lastToken->type === Token::TYPE_SYMBOL 256 && $lastToken->flags & Token::FLAG_SYMBOL_BACKTICK 257 ) 258 ) 259 ) { 260 // Handles ```... FROM 'user'@'%' ...```. 261 $lastToken->token .= $token->token; 262 $lastToken->type = Token::TYPE_SYMBOL; 263 $lastToken->flags = Token::FLAG_SYMBOL_USER; 264 $lastToken->value .= '@' . $token->value; 265 continue; 266 } elseif ( 267 $lastToken !== null 268 && $token->type === Token::TYPE_KEYWORD 269 && $lastToken->type === Token::TYPE_OPERATOR 270 && $lastToken->value === '.' 271 ) { 272 // Handles ```... tbl.FROM ...```. In this case, FROM is not 273 // a reserved word. 274 $token->type = Token::TYPE_NONE; 275 $token->flags = 0; 276 $token->value = $token->token; 277 } 278 279 $token->position = $lastIdx; 280 281 $list->tokens[$list->count++] = $token; 282 283 // Handling delimiters. 284 if ($token->type === Token::TYPE_NONE && $token->value === 'DELIMITER') { 285 if ($this->last + 1 >= $this->len) { 286 $this->error('Expected whitespace(s) before delimiter.', '', $this->last + 1); 287 continue; 288 } 289 290 // Skipping last R (from `delimiteR`) and whitespaces between 291 // the keyword `DELIMITER` and the actual delimiter. 292 $pos = ++$this->last; 293 $token = $this->parseWhitespace(); 294 295 if ($token !== null) { 296 $token->position = $pos; 297 $list->tokens[$list->count++] = $token; 298 } 299 300 // Preparing the token that holds the new delimiter. 301 if ($this->last + 1 >= $this->len) { 302 $this->error('Expected delimiter.', '', $this->last + 1); 303 continue; 304 } 305 306 $pos = $this->last + 1; 307 308 // Parsing the delimiter. 309 $this->delimiter = null; 310 $delimiterLen = 0; 311 while ( 312 ++$this->last < $this->len 313 && ! Context::isWhitespace($this->str[$this->last]) 314 && $delimiterLen < 15 315 ) { 316 $this->delimiter .= $this->str[$this->last]; 317 ++$delimiterLen; 318 } 319 320 if (empty($this->delimiter)) { 321 $this->error('Expected delimiter.', '', $this->last); 322 $this->delimiter = ';'; 323 } 324 325 --$this->last; 326 327 // Saving the delimiter and its token. 328 $this->delimiterLen = strlen($this->delimiter); 329 $token = new Token($this->delimiter, Token::TYPE_DELIMITER); 330 $token->position = $pos; 331 $list->tokens[$list->count++] = $token; 332 } 333 334 $lastToken = $token; 335 } 336 337 // Adding a final delimiter to mark the ending. 338 $list->tokens[$list->count++] = new Token(null, Token::TYPE_DELIMITER); 339 340 // Saving the tokens list. 341 $this->list = $list; 342 343 $this->solveAmbiguityOnStarOperator(); 344 } 345 346 /** 347 * Resolves the ambiguity when dealing with the "*" operator. 348 * 349 * In SQL statements, the "*" operator can be an arithmetic operator (like in 2*3) or an SQL wildcard (like in 350 * SELECT a.* FROM ...). To solve this ambiguity, the solution is to find the next token, excluding whitespaces and 351 * comments, right after the "*" position. The "*" is for sure an SQL wildcard if the next token found is any of: 352 * - "FROM" (the FROM keyword like in "SELECT * FROM..."); 353 * - "USING" (the USING keyword like in "DELETE table_name.* USING..."); 354 * - "," (a comma separator like in "SELECT *, field FROM..."); 355 * - ")" (a closing parenthesis like in "COUNT(*)"). 356 * This methods will change the flag of the "*" tokens when any of those condition above is true. Otherwise, the 357 * default flag (arithmetic) will be kept. 358 * 359 * @return void 360 */ 361 private function solveAmbiguityOnStarOperator() 362 { 363 $iBak = $this->list->idx; 364 while (($starToken = $this->list->getNextOfTypeAndValue(Token::TYPE_OPERATOR, '*')) !== null) { 365 // getNext() already gets rid of whitespaces and comments. 366 $next = $this->list->getNext(); 367 368 if ($next === null) { 369 continue; 370 } 371 372 if ( 373 ($next->type !== Token::TYPE_KEYWORD || ! in_array($next->value, ['FROM', 'USING'], true)) 374 && ($next->type !== Token::TYPE_OPERATOR || ! in_array($next->value, [',', ')'], true)) 375 ) { 376 continue; 377 } 378 379 $starToken->flags = Token::FLAG_OPERATOR_SQL; 380 } 381 382 $this->list->idx = $iBak; 383 } 384 385 /** 386 * Creates a new error log. 387 * 388 * @param string $msg the error message 389 * @param string $str the character that produced the error 390 * @param int $pos the position of the character 391 * @param int $code the code of the error 392 * 393 * @throws LexerException throws the exception, if strict mode is enabled. 394 */ 395 public function error($msg, $str = '', $pos = 0, $code = 0) 396 { 397 $error = new LexerException( 398 Translator::gettext($msg), 399 $str, 400 $pos, 401 $code 402 ); 403 parent::error($error); 404 } 405 406 /** 407 * Parses a keyword. 408 * 409 * @return Token|null 410 */ 411 public function parseKeyword() 412 { 413 $token = ''; 414 415 /** 416 * Value to be returned. 417 * 418 * @var Token 419 */ 420 $ret = null; 421 422 /** 423 * The value of `$this->last` where `$token` ends in `$this->str`. 424 * 425 * @var int 426 */ 427 $iEnd = $this->last; 428 429 /** 430 * Whether last parsed character is a whitespace. 431 * 432 * @var bool 433 */ 434 $lastSpace = false; 435 436 for ($j = 1; $j < Context::KEYWORD_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) { 437 // Composed keywords shouldn't have more than one whitespace between 438 // keywords. 439 if (Context::isWhitespace($this->str[$this->last])) { 440 if ($lastSpace) { 441 --$j; // The size of the keyword didn't increase. 442 continue; 443 } 444 445 $lastSpace = true; 446 } else { 447 $lastSpace = false; 448 } 449 450 $token .= $this->str[$this->last]; 451 $flags = Context::isKeyword($token); 452 453 if (($this->last + 1 !== $this->len && ! Context::isSeparator($this->str[$this->last + 1])) || ! $flags) { 454 continue; 455 } 456 457 $ret = new Token($token, Token::TYPE_KEYWORD, $flags); 458 $iEnd = $this->last; 459 460 // We don't break so we find longest keyword. 461 // For example, `OR` and `ORDER` have a common prefix `OR`. 462 // If we stopped at `OR`, the parsing would be invalid. 463 } 464 465 $this->last = $iEnd; 466 467 return $ret; 468 } 469 470 /** 471 * Parses a label. 472 * 473 * @return Token|null 474 */ 475 public function parseLabel() 476 { 477 $token = ''; 478 479 /** 480 * Value to be returned. 481 * 482 * @var Token 483 */ 484 $ret = null; 485 486 /** 487 * The value of `$this->last` where `$token` ends in `$this->str`. 488 * 489 * @var int 490 */ 491 $iEnd = $this->last; 492 for ($j = 1; $j < Context::LABEL_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) { 493 if ($this->str[$this->last] === ':' && $j > 1) { 494 // End of label 495 $token .= $this->str[$this->last]; 496 $ret = new Token($token, Token::TYPE_LABEL); 497 $iEnd = $this->last; 498 break; 499 } 500 501 if (Context::isWhitespace($this->str[$this->last]) && $j > 1) { 502 // Whitespace between label and : 503 // The size of the keyword didn't increase. 504 --$j; 505 } elseif (Context::isSeparator($this->str[$this->last])) { 506 // Any other separator 507 break; 508 } 509 510 $token .= $this->str[$this->last]; 511 } 512 513 $this->last = $iEnd; 514 515 return $ret; 516 } 517 518 /** 519 * Parses an operator. 520 * 521 * @return Token|null 522 */ 523 public function parseOperator() 524 { 525 $token = ''; 526 527 /** 528 * Value to be returned. 529 * 530 * @var Token 531 */ 532 $ret = null; 533 534 /** 535 * The value of `$this->last` where `$token` ends in `$this->str`. 536 * 537 * @var int 538 */ 539 $iEnd = $this->last; 540 541 for ($j = 1; $j < Context::OPERATOR_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) { 542 $token .= $this->str[$this->last]; 543 $flags = Context::isOperator($token); 544 545 if (! $flags) { 546 continue; 547 } 548 549 $ret = new Token($token, Token::TYPE_OPERATOR, $flags); 550 $iEnd = $this->last; 551 } 552 553 $this->last = $iEnd; 554 555 return $ret; 556 } 557 558 /** 559 * Parses a whitespace. 560 * 561 * @return Token|null 562 */ 563 public function parseWhitespace() 564 { 565 $token = $this->str[$this->last]; 566 567 if (! Context::isWhitespace($token)) { 568 return null; 569 } 570 571 while (++$this->last < $this->len && Context::isWhitespace($this->str[$this->last])) { 572 $token .= $this->str[$this->last]; 573 } 574 575 --$this->last; 576 577 return new Token($token, Token::TYPE_WHITESPACE); 578 } 579 580 /** 581 * Parses a comment. 582 * 583 * @return Token|null 584 */ 585 public function parseComment() 586 { 587 $iBak = $this->last; 588 $token = $this->str[$this->last]; 589 590 // Bash style comments. (#comment\n) 591 if (Context::isComment($token)) { 592 while (++$this->last < $this->len && $this->str[$this->last] !== "\n") { 593 $token .= $this->str[$this->last]; 594 } 595 596 // Include trailing \n as whitespace token 597 if ($this->last < $this->len) { 598 --$this->last; 599 } 600 601 return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_BASH); 602 } 603 604 // C style comments. (/*comment*\/) 605 if (++$this->last < $this->len) { 606 $token .= $this->str[$this->last]; 607 if (Context::isComment($token)) { 608 // There might be a conflict with "*" operator here, when string is "*/*". 609 // This can occurs in the following statements: 610 // - "SELECT */* comment */ FROM ..." 611 // - "SELECT 2*/* comment */3 AS `six`;" 612 $next = $this->last + 1; 613 if (($next < $this->len) && $this->str[$next] === '*') { 614 // Conflict in "*/*": first "*" was not for ending a comment. 615 // Stop here and let other parsing method define the true behavior of that first star. 616 $this->last = $iBak; 617 618 return null; 619 } 620 621 $flags = Token::FLAG_COMMENT_C; 622 623 // This comment already ended. It may be a part of a 624 // previous MySQL specific command. 625 if ($token === '*/') { 626 return new Token($token, Token::TYPE_COMMENT, $flags); 627 } 628 629 // Checking if this is a MySQL-specific command. 630 if ($this->last + 1 < $this->len && $this->str[$this->last + 1] === '!') { 631 $flags |= Token::FLAG_COMMENT_MYSQL_CMD; 632 $token .= $this->str[++$this->last]; 633 634 while ( 635 ++$this->last < $this->len 636 && $this->str[$this->last] >= '0' 637 && $this->str[$this->last] <= '9' 638 ) { 639 $token .= $this->str[$this->last]; 640 } 641 642 --$this->last; 643 644 // We split this comment and parse only its beginning 645 // here. 646 return new Token($token, Token::TYPE_COMMENT, $flags); 647 } 648 649 // Parsing the comment. 650 while ( 651 ++$this->last < $this->len 652 && ( 653 $this->str[$this->last - 1] !== '*' 654 || $this->str[$this->last] !== '/' 655 ) 656 ) { 657 $token .= $this->str[$this->last]; 658 } 659 660 // Adding the ending. 661 if ($this->last < $this->len) { 662 $token .= $this->str[$this->last]; 663 } 664 665 return new Token($token, Token::TYPE_COMMENT, $flags); 666 } 667 } 668 669 // SQL style comments. (-- comment\n) 670 if (++$this->last < $this->len) { 671 $token .= $this->str[$this->last]; 672 $end = false; 673 } else { 674 --$this->last; 675 $end = true; 676 } 677 678 if (Context::isComment($token, $end)) { 679 // Checking if this comment did not end already (```--\n```). 680 if ($this->str[$this->last] !== "\n") { 681 while (++$this->last < $this->len && $this->str[$this->last] !== "\n") { 682 $token .= $this->str[$this->last]; 683 } 684 } 685 686 // Include trailing \n as whitespace token 687 if ($this->last < $this->len) { 688 --$this->last; 689 } 690 691 return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL); 692 } 693 694 $this->last = $iBak; 695 696 return null; 697 } 698 699 /** 700 * Parses a boolean. 701 * 702 * @return Token|null 703 */ 704 public function parseBool() 705 { 706 if ($this->last + 3 >= $this->len) { 707 // At least `min(strlen('TRUE'), strlen('FALSE'))` characters are 708 // required. 709 return null; 710 } 711 712 $iBak = $this->last; 713 $token = $this->str[$this->last] . $this->str[++$this->last] 714 . $this->str[++$this->last] . $this->str[++$this->last]; // _TRUE_ or _FALS_e 715 716 if (Context::isBool($token)) { 717 return new Token($token, Token::TYPE_BOOL); 718 } 719 720 if (++$this->last < $this->len) { 721 $token .= $this->str[$this->last]; // fals_E_ 722 if (Context::isBool($token)) { 723 return new Token($token, Token::TYPE_BOOL, 1); 724 } 725 } 726 727 $this->last = $iBak; 728 729 return null; 730 } 731 732 /** 733 * Parses a number. 734 * 735 * @return Token|null 736 */ 737 public function parseNumber() 738 { 739 // A rudimentary state machine is being used to parse numbers due to 740 // the various forms of their notation. 741 // 742 // Below are the states of the machines and the conditions to change 743 // the state. 744 // 745 // 1 --------------------[ + or - ]-------------------> 1 746 // 1 -------------------[ 0x or 0X ]------------------> 2 747 // 1 --------------------[ 0 to 9 ]-------------------> 3 748 // 1 -----------------------[ . ]---------------------> 4 749 // 1 -----------------------[ b ]---------------------> 7 750 // 751 // 2 --------------------[ 0 to F ]-------------------> 2 752 // 753 // 3 --------------------[ 0 to 9 ]-------------------> 3 754 // 3 -----------------------[ . ]---------------------> 4 755 // 3 --------------------[ e or E ]-------------------> 5 756 // 757 // 4 --------------------[ 0 to 9 ]-------------------> 4 758 // 4 --------------------[ e or E ]-------------------> 5 759 // 760 // 5 ---------------[ + or - or 0 to 9 ]--------------> 6 761 // 762 // 7 -----------------------[ ' ]---------------------> 8 763 // 764 // 8 --------------------[ 0 or 1 ]-------------------> 8 765 // 8 -----------------------[ ' ]---------------------> 9 766 // 767 // State 1 may be reached by negative numbers. 768 // State 2 is reached only by hex numbers. 769 // State 4 is reached only by float numbers. 770 // State 5 is reached only by numbers in approximate form. 771 // State 7 is reached only by numbers in bit representation. 772 // 773 // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a 774 // state other than these is invalid. 775 // Also, negative states are invalid states. 776 $iBak = $this->last; 777 $token = ''; 778 $flags = 0; 779 $state = 1; 780 for (; $this->last < $this->len; ++$this->last) { 781 if ($state === 1) { 782 if ($this->str[$this->last] === '-') { 783 $flags |= Token::FLAG_NUMBER_NEGATIVE; 784 } elseif ( 785 $this->last + 1 < $this->len 786 && $this->str[$this->last] === '0' 787 && ( 788 $this->str[$this->last + 1] === 'x' 789 || $this->str[$this->last + 1] === 'X' 790 ) 791 ) { 792 $token .= $this->str[$this->last++]; 793 $state = 2; 794 } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') { 795 $state = 3; 796 } elseif ($this->str[$this->last] === '.') { 797 $state = 4; 798 } elseif ($this->str[$this->last] === 'b') { 799 $state = 7; 800 } elseif ($this->str[$this->last] !== '+') { 801 // `+` is a valid character in a number. 802 break; 803 } 804 } elseif ($state === 2) { 805 $flags |= Token::FLAG_NUMBER_HEX; 806 if ( 807 ! ( 808 ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') 809 || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F') 810 || ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f') 811 ) 812 ) { 813 break; 814 } 815 } elseif ($state === 3) { 816 if ($this->str[$this->last] === '.') { 817 $state = 4; 818 } elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') { 819 $state = 5; 820 } elseif ( 821 ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z') 822 || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z') 823 ) { 824 // A number can't be directly followed by a letter 825 $state = -$state; 826 } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { 827 // Just digits and `.`, `e` and `E` are valid characters. 828 break; 829 } 830 } elseif ($state === 4) { 831 $flags |= Token::FLAG_NUMBER_FLOAT; 832 if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') { 833 $state = 5; 834 } elseif ( 835 ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z') 836 || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z') 837 ) { 838 // A number can't be directly followed by a letter 839 $state = -$state; 840 } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { 841 // Just digits, `e` and `E` are valid characters. 842 break; 843 } 844 } elseif ($state === 5) { 845 $flags |= Token::FLAG_NUMBER_APPROXIMATE; 846 if ( 847 $this->str[$this->last] === '+' || $this->str[$this->last] === '-' 848 || ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') 849 ) { 850 $state = 6; 851 } elseif ( 852 ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z') 853 || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z') 854 ) { 855 // A number can't be directly followed by a letter 856 $state = -$state; 857 } else { 858 break; 859 } 860 } elseif ($state === 6) { 861 if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { 862 // Just digits are valid characters. 863 break; 864 } 865 } elseif ($state === 7) { 866 $flags |= Token::FLAG_NUMBER_BINARY; 867 if ($this->str[$this->last] !== '\'') { 868 break; 869 } 870 871 $state = 8; 872 } elseif ($state === 8) { 873 if ($this->str[$this->last] === '\'') { 874 $state = 9; 875 } elseif ($this->str[$this->last] !== '0' && $this->str[$this->last] !== '1') { 876 break; 877 } 878 } elseif ($state === 9) { 879 break; 880 } 881 882 $token .= $this->str[$this->last]; 883 } 884 885 if ($state === 2 || $state === 3 || ($token !== '.' && $state === 4) || $state === 6 || $state === 9) { 886 --$this->last; 887 888 return new Token($token, Token::TYPE_NUMBER, $flags); 889 } 890 891 $this->last = $iBak; 892 893 return null; 894 } 895 896 /** 897 * Parses a string. 898 * 899 * @param string $quote additional starting symbol 900 * 901 * @return Token|null 902 * 903 * @throws LexerException 904 */ 905 public function parseString($quote = '') 906 { 907 $token = $this->str[$this->last]; 908 $flags = Context::isString($token); 909 910 if (! $flags && $token !== $quote) { 911 return null; 912 } 913 914 $quote = $token; 915 916 while (++$this->last < $this->len) { 917 if ( 918 $this->last + 1 < $this->len 919 && ( 920 ($this->str[$this->last] === $quote && $this->str[$this->last + 1] === $quote) 921 || ($this->str[$this->last] === '\\' && $quote !== '`') 922 ) 923 ) { 924 $token .= $this->str[$this->last] . $this->str[++$this->last]; 925 } else { 926 if ($this->str[$this->last] === $quote) { 927 break; 928 } 929 930 $token .= $this->str[$this->last]; 931 } 932 } 933 934 if ($this->last >= $this->len || $this->str[$this->last] !== $quote) { 935 $this->error( 936 sprintf( 937 Translator::gettext('Ending quote %1$s was expected.'), 938 $quote 939 ), 940 '', 941 $this->last 942 ); 943 } else { 944 $token .= $this->str[$this->last]; 945 } 946 947 return new Token($token, Token::TYPE_STRING, $flags); 948 } 949 950 /** 951 * Parses a symbol. 952 * 953 * @return Token|null 954 * 955 * @throws LexerException 956 */ 957 public function parseSymbol() 958 { 959 $token = $this->str[$this->last]; 960 $flags = Context::isSymbol($token); 961 962 if (! $flags) { 963 return null; 964 } 965 966 if ($flags & Token::FLAG_SYMBOL_VARIABLE) { 967 if ($this->last + 1 < $this->len && $this->str[++$this->last] === '@') { 968 // This is a system variable (e.g. `@@hostname`). 969 $token .= $this->str[$this->last++]; 970 $flags |= Token::FLAG_SYMBOL_SYSTEM; 971 } 972 } elseif ($flags & Token::FLAG_SYMBOL_PARAMETER) { 973 if ($token !== '?' && $this->last + 1 < $this->len) { 974 ++$this->last; 975 } 976 } else { 977 $token = ''; 978 } 979 980 $str = null; 981 982 if ($this->last < $this->len) { 983 $str = $this->parseString('`'); 984 985 if ($str === null) { 986 $str = $this->parseUnknown(); 987 988 if ($str === null) { 989 $this->error('Variable name was expected.', $this->str[$this->last], $this->last); 990 } 991 } 992 } 993 994 if ($str !== null) { 995 $token .= $str->token; 996 } 997 998 return new Token($token, Token::TYPE_SYMBOL, $flags); 999 } 1000 1001 /** 1002 * Parses unknown parts of the query. 1003 * 1004 * @return Token|null 1005 */ 1006 public function parseUnknown() 1007 { 1008 $token = $this->str[$this->last]; 1009 if (Context::isSeparator($token)) { 1010 return null; 1011 } 1012 1013 while (++$this->last < $this->len && ! Context::isSeparator($this->str[$this->last])) { 1014 $token .= $this->str[$this->last]; 1015 1016 // Test if end of token equals the current delimiter. If so, remove it from the token. 1017 if (substr($token, -$this->delimiterLen) === $this->delimiter) { 1018 $token = substr($token, 0, -$this->delimiterLen); 1019 $this->last -= $this->delimiterLen - 1; 1020 break; 1021 } 1022 } 1023 1024 --$this->last; 1025 1026 return new Token($token); 1027 } 1028 1029 /** 1030 * Parses the delimiter of the query. 1031 * 1032 * @return Token|null 1033 */ 1034 public function parseDelimiter() 1035 { 1036 $idx = 0; 1037 1038 while ($idx < $this->delimiterLen && $this->last + $idx < $this->len) { 1039 if ($this->delimiter[$idx] !== $this->str[$this->last + $idx]) { 1040 return null; 1041 } 1042 1043 ++$idx; 1044 } 1045 1046 $this->last += $this->delimiterLen - 1; 1047 1048 return new Token($this->delimiter, Token::TYPE_DELIMITER); 1049 } 1050} 1051