1<?php 2 3/** 4 * Defines the lexer of the library. 5 * 6 * This is one of the most important components, along with the parser. 7 * 8 * Depends on context to extract lexemes. 9 */ 10 11namespace PhpMyAdmin\SqlParser; 12 13use PhpMyAdmin\SqlParser\Exceptions\LexerException; 14 15if (! defined('USE_UTF_STRINGS')) { 16 // NOTE: In previous versions of PHP (5.5 and older) the default 17 // internal encoding is "ISO-8859-1". 18 // All `mb_` functions must specify the correct encoding, which is 19 // 'UTF-8' in order to work properly. 20 21 /* 22 * Forces usage of `UtfString` if the string is multibyte. 23 * `UtfString` may be slower, but it gives better results. 24 * 25 * @var bool 26 */ 27 define('USE_UTF_STRINGS', true); 28} 29 30/** 31 * Performs lexical analysis over a SQL statement and splits it in multiple 32 * tokens. 33 * 34 * The output of the lexer is affected by the context of the SQL statement. 35 * 36 * @category Lexer 37 * 38 * @license https://www.gnu.org/licenses/gpl-2.0.txt GPL-2.0+ 39 * 40 * @see Context 41 */ 42class Lexer extends Core 43{ 44 /** 45 * A list of methods that are used in lexing the SQL query. 46 * 47 * @var array 48 */ 49 public static $PARSER_METHODS = array( 50 // It is best to put the parsers in order of their complexity 51 // (ascending) and their occurrence rate (descending). 52 // 53 // Conflicts: 54 // 55 // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber` 56 // They fight over delimiter. The delimiter may be a keyword, a 57 // number or almost any character which makes the delimiter one of 58 // the first tokens that must be parsed. 59 // 60 // 1. `parseNumber` and `parseOperator` 61 // They fight over `+` and `-`. 62 // 63 // 2. `parseComment` and `parseOperator` 64 // They fight over `/` (as in ```/*comment*/``` or ```a / b```) 65 // 66 // 3. `parseBool` and `parseKeyword` 67 // They fight over `TRUE` and `FALSE`. 68 // 69 // 4. `parseKeyword` and `parseUnknown` 70 // They fight over words. `parseUnknown` does not know about 71 // keywords. 72 73 'parseDelimiter', 74 'parseWhitespace', 75 'parseNumber', 76 'parseComment', 77 'parseOperator', 78 'parseBool', 79 'parseString', 80 'parseSymbol', 81 'parseKeyword', 82 'parseLabel', 83 'parseUnknown' 84 ); 85 86 /** 87 * The string to be parsed. 88 * 89 * @var string|UtfString 90 */ 91 public $str = ''; 92 93 /** 94 * The length of `$str`. 95 * 96 * By storing its length, a lot of time is saved, because parsing methods 97 * would call `strlen` everytime. 98 * 99 * @var int 100 */ 101 public $len = 0; 102 103 /** 104 * The index of the last parsed character. 105 * 106 * @var int 107 */ 108 public $last = 0; 109 110 /** 111 * Tokens extracted from given strings. 112 * 113 * @var TokensList 114 */ 115 public $list; 116 117 /** 118 * The default delimiter. This is used, by default, in all new instances. 119 * 120 * @var string 121 */ 122 public static $DEFAULT_DELIMITER = ';'; 123 124 /** 125 * Statements delimiter. 126 * This may change during lexing. 127 * 128 * @var string 129 */ 130 public $delimiter; 131 132 /** 133 * The length of the delimiter. 134 * 135 * Because `parseDelimiter` can be called a lot, it would perform a lot of 136 * calls to `strlen`, which might affect performance when the delimiter is 137 * big. 138 * 139 * @var int 140 */ 141 public $delimiterLen; 142 143 /** 144 * Gets the tokens list parsed by a new instance of a lexer. 145 * 146 * @param string|UtfString $str the query to be lexed 147 * @param bool $strict whether strict mode should be 148 * enabled or not 149 * @param string $delimiter the delimiter to be used 150 * 151 * @return TokensList 152 */ 153 public static function getTokens($str, $strict = false, $delimiter = null) 154 { 155 $lexer = new self($str, $strict, $delimiter); 156 157 return $lexer->list; 158 } 159 160 /** 161 * Constructor. 162 * 163 * @param string|UtfString $str the query to be lexed 164 * @param bool $strict whether strict mode should be 165 * enabled or not 166 * @param string $delimiter the delimiter to be used 167 */ 168 public function __construct($str, $strict = false, $delimiter = null) 169 { 170 // `strlen` is used instead of `mb_strlen` because the lexer needs to 171 // parse each byte of the input. 172 $len = $str instanceof UtfString ? $str->length() : strlen($str); 173 174 // For multi-byte strings, a new instance of `UtfString` is 175 // initialized (only if `UtfString` usage is forced. 176 if (! $str instanceof UtfString && USE_UTF_STRINGS && $len !== mb_strlen($str, 'UTF-8')) { 177 $str = new UtfString($str); 178 } 179 180 $this->str = $str; 181 $this->len = $str instanceof UtfString ? $str->length() : $len; 182 183 $this->strict = $strict; 184 185 // Setting the delimiter. 186 $this->setDelimiter( 187 ! empty($delimiter) ? $delimiter : static::$DEFAULT_DELIMITER 188 ); 189 190 $this->lex(); 191 } 192 193 /** 194 * Sets the delimiter. 195 * 196 * @param string $delimiter the new delimiter 197 */ 198 public function setDelimiter($delimiter) 199 { 200 $this->delimiter = $delimiter; 201 $this->delimiterLen = strlen($delimiter); 202 } 203 204 /** 205 * Parses the string and extracts lexemes. 206 */ 207 public function lex() 208 { 209 // TODO: Sometimes, static::parse* functions make unnecessary calls to 210 // is* functions. For a better performance, some rules can be deduced 211 // from context. 212 // For example, in `parseBool` there is no need to compare the token 213 // every time with `true` and `false`. The first step would be to 214 // compare with 'true' only and just after that add another letter from 215 // context and compare again with `false`. 216 // Another example is `parseComment`. 217 218 $list = new TokensList(); 219 220 /** 221 * Last processed token. 222 * 223 * @var Token 224 */ 225 $lastToken = null; 226 227 for ($this->last = 0, $lastIdx = 0; $this->last < $this->len; $lastIdx = ++$this->last) { 228 /** 229 * The new token. 230 * 231 * @var Token 232 */ 233 $token = null; 234 235 foreach (static::$PARSER_METHODS as $method) { 236 if ($token = $this->$method()) { 237 break; 238 } 239 } 240 241 if ($token === null) { 242 // @assert($this->last === $lastIdx); 243 $token = new Token($this->str[$this->last]); 244 $this->error( 245 'Unexpected character.', 246 $this->str[$this->last], 247 $this->last 248 ); 249 } elseif ($lastToken !== null 250 && $token->type === Token::TYPE_SYMBOL 251 && $token->flags & Token::FLAG_SYMBOL_VARIABLE 252 && ( 253 $lastToken->type === Token::TYPE_STRING 254 || ( 255 $lastToken->type === Token::TYPE_SYMBOL 256 && $lastToken->flags & Token::FLAG_SYMBOL_BACKTICK 257 ) 258 ) 259 ) { 260 // Handles ```... FROM 'user'@'%' ...```. 261 $lastToken->token .= $token->token; 262 $lastToken->type = Token::TYPE_SYMBOL; 263 $lastToken->flags = Token::FLAG_SYMBOL_USER; 264 $lastToken->value .= '@' . $token->value; 265 continue; 266 } elseif ($lastToken !== null 267 && $token->type === Token::TYPE_KEYWORD 268 && $lastToken->type === Token::TYPE_OPERATOR 269 && $lastToken->value === '.' 270 ) { 271 // Handles ```... tbl.FROM ...```. In this case, FROM is not 272 // a reserved word. 273 $token->type = Token::TYPE_NONE; 274 $token->flags = 0; 275 $token->value = $token->token; 276 } 277 278 $token->position = $lastIdx; 279 280 $list->tokens[$list->count++] = $token; 281 282 // Handling delimiters. 283 if ($token->type === Token::TYPE_NONE && $token->value === 'DELIMITER') { 284 if ($this->last + 1 >= $this->len) { 285 $this->error( 286 'Expected whitespace(s) before delimiter.', 287 '', 288 $this->last + 1 289 ); 290 continue; 291 } 292 293 // Skipping last R (from `delimiteR`) and whitespaces between 294 // the keyword `DELIMITER` and the actual delimiter. 295 $pos = ++$this->last; 296 if (($token = $this->parseWhitespace()) !== null) { 297 $token->position = $pos; 298 $list->tokens[$list->count++] = $token; 299 } 300 301 // Preparing the token that holds the new delimiter. 302 if ($this->last + 1 >= $this->len) { 303 $this->error( 304 'Expected delimiter.', 305 '', 306 $this->last + 1 307 ); 308 continue; 309 } 310 $pos = $this->last + 1; 311 312 // Parsing the delimiter. 313 $this->delimiter = null; 314 $delimiterLen = 0; 315 while (++$this->last < $this->len && ! Context::isWhitespace($this->str[$this->last]) && $delimiterLen < 15) { 316 $this->delimiter .= $this->str[$this->last]; 317 ++$delimiterLen; 318 } 319 320 if (empty($this->delimiter)) { 321 $this->error( 322 'Expected delimiter.', 323 '', 324 $this->last 325 ); 326 $this->delimiter = ';'; 327 } 328 329 --$this->last; 330 331 // Saving the delimiter and its token. 332 $this->delimiterLen = strlen($this->delimiter); 333 $token = new Token($this->delimiter, Token::TYPE_DELIMITER); 334 $token->position = $pos; 335 $list->tokens[$list->count++] = $token; 336 } 337 338 $lastToken = $token; 339 } 340 341 // Adding a final delimiter to mark the ending. 342 $list->tokens[$list->count++] = new Token(null, Token::TYPE_DELIMITER); 343 344 // Saving the tokens list. 345 $this->list = $list; 346 347 $this->solveAmbiguityOnStarOperator(); 348 } 349 350 /** 351 * Resolves the ambiguity when dealing with the "*" operator. 352 * 353 * In SQL statements, the "*" operator can be an arithmetic operator (like in 2*3) or an SQL wildcard (like in 354 * SELECT a.* FROM ...). To solve this ambiguity, the solution is to find the next token, excluding whitespaces and 355 * comments, right after the "*" position. The "*" is for sure an SQL wildcard if the next token found is any of: 356 * - "FROM" (the FROM keyword like in "SELECT * FROM..."); 357 * - "USING" (the USING keyword like in "DELETE table_name.* USING..."); 358 * - "," (a comma separator like in "SELECT *, field FROM..."); 359 * - ")" (a closing parenthesis like in "COUNT(*)"). 360 * This methods will change the flag of the "*" tokens when any of those condition above is true. Otherwise, the 361 * default flag (arithmetic) will be kept. 362 * 363 * @return void 364 */ 365 private function solveAmbiguityOnStarOperator() 366 { 367 $iBak = $this->list->idx; 368 while (null !== ($starToken = $this->list->getNextOfTypeAndValue(Token::TYPE_OPERATOR, '*'))) { 369 // ::getNext already gets rid of whitespaces and comments. 370 if (($next = $this->list->getNext()) !== null) { 371 if (($next->type === Token::TYPE_KEYWORD && in_array($next->value, array('FROM', 'USING'), true)) 372 || ($next->type === Token::TYPE_OPERATOR && in_array($next->value, array(',', ')'), true)) 373 ) { 374 $starToken->flags = Token::FLAG_OPERATOR_SQL; 375 } 376 } 377 } 378 $this->list->idx = $iBak; 379 } 380 381 /** 382 * Creates a new error log. 383 * 384 * @param string $msg the error message 385 * @param string $str the character that produced the error 386 * @param int $pos the position of the character 387 * @param int $code the code of the error 388 * 389 * @throws LexerException throws the exception, if strict mode is enabled 390 */ 391 public function error($msg, $str = '', $pos = 0, $code = 0) 392 { 393 $error = new LexerException( 394 Translator::gettext($msg), 395 $str, 396 $pos, 397 $code 398 ); 399 parent::error($error); 400 } 401 402 /** 403 * Parses a keyword. 404 * 405 * @return null|Token 406 */ 407 public function parseKeyword() 408 { 409 $token = ''; 410 411 /** 412 * Value to be returned. 413 * 414 * @var Token 415 */ 416 $ret = null; 417 418 /** 419 * The value of `$this->last` where `$token` ends in `$this->str`. 420 * 421 * @var int 422 */ 423 $iEnd = $this->last; 424 425 /** 426 * Whether last parsed character is a whitespace. 427 * 428 * @var bool 429 */ 430 $lastSpace = false; 431 432 for ($j = 1; $j < Context::KEYWORD_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) { 433 // Composed keywords shouldn't have more than one whitespace between 434 // keywords. 435 if (Context::isWhitespace($this->str[$this->last])) { 436 if ($lastSpace) { 437 --$j; // The size of the keyword didn't increase. 438 continue; 439 } 440 $lastSpace = true; 441 } else { 442 $lastSpace = false; 443 } 444 445 $token .= $this->str[$this->last]; 446 if (($this->last + 1 === $this->len || Context::isSeparator($this->str[$this->last + 1])) 447 && $flags = Context::isKeyword($token) 448 ) { 449 $ret = new Token($token, Token::TYPE_KEYWORD, $flags); 450 $iEnd = $this->last; 451 452 // We don't break so we find longest keyword. 453 // For example, `OR` and `ORDER` have a common prefix `OR`. 454 // If we stopped at `OR`, the parsing would be invalid. 455 } 456 } 457 458 $this->last = $iEnd; 459 460 return $ret; 461 } 462 463 /** 464 * Parses a label. 465 * 466 * @return null|Token 467 */ 468 public function parseLabel() 469 { 470 $token = ''; 471 472 /** 473 * Value to be returned. 474 * 475 * @var Token 476 */ 477 $ret = null; 478 479 /** 480 * The value of `$this->last` where `$token` ends in `$this->str`. 481 * 482 * @var int 483 */ 484 $iEnd = $this->last; 485 for ($j = 1; $j < Context::LABEL_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) { 486 if ($this->str[$this->last] === ':' && $j > 1) { 487 // End of label 488 $token .= $this->str[$this->last]; 489 $ret = new Token($token, Token::TYPE_LABEL); 490 $iEnd = $this->last; 491 break; 492 } elseif (Context::isWhitespace($this->str[$this->last]) && $j > 1) { 493 // Whitespace between label and : 494 // The size of the keyword didn't increase. 495 --$j; 496 } elseif (Context::isSeparator($this->str[$this->last])) { 497 // Any other separator 498 break; 499 } 500 $token .= $this->str[$this->last]; 501 } 502 503 $this->last = $iEnd; 504 505 return $ret; 506 } 507 508 /** 509 * Parses an operator. 510 * 511 * @return null|Token 512 */ 513 public function parseOperator() 514 { 515 $token = ''; 516 517 /** 518 * Value to be returned. 519 * 520 * @var Token 521 */ 522 $ret = null; 523 524 /** 525 * The value of `$this->last` where `$token` ends in `$this->str`. 526 * 527 * @var int 528 */ 529 $iEnd = $this->last; 530 531 for ($j = 1; $j < Context::OPERATOR_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) { 532 $token .= $this->str[$this->last]; 533 if ($flags = Context::isOperator($token)) { 534 $ret = new Token($token, Token::TYPE_OPERATOR, $flags); 535 $iEnd = $this->last; 536 } 537 } 538 539 $this->last = $iEnd; 540 541 return $ret; 542 } 543 544 /** 545 * Parses a whitespace. 546 * 547 * @return null|Token 548 */ 549 public function parseWhitespace() 550 { 551 $token = $this->str[$this->last]; 552 553 if (! Context::isWhitespace($token)) { 554 return null; 555 } 556 557 while (++$this->last < $this->len && Context::isWhitespace($this->str[$this->last])) { 558 $token .= $this->str[$this->last]; 559 } 560 561 --$this->last; 562 563 return new Token($token, Token::TYPE_WHITESPACE); 564 } 565 566 /** 567 * Parses a comment. 568 * 569 * @return null|Token 570 */ 571 public function parseComment() 572 { 573 $iBak = $this->last; 574 $token = $this->str[$this->last]; 575 576 // Bash style comments. (#comment\n) 577 if (Context::isComment($token)) { 578 while (++$this->last < $this->len 579 && $this->str[$this->last] !== "\n" 580 ) { 581 $token .= $this->str[$this->last]; 582 } 583 // Include trailing \n as whitespace token 584 if ($this->last < $this->len) { 585 --$this->last; 586 } 587 588 return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_BASH); 589 } 590 591 // C style comments. (/*comment*\/) 592 if (++$this->last < $this->len) { 593 $token .= $this->str[$this->last]; 594 if (Context::isComment($token)) { 595 // There might be a conflict with "*" operator here, when string is "*/*". 596 // This can occurs in the following statements: 597 // - "SELECT */* comment */ FROM ..." 598 // - "SELECT 2*/* comment */3 AS `six`;" 599 $next = $this->last+1; 600 if (($next < $this->len) && $this->str[$next] === '*') { 601 // Conflict in "*/*": first "*" was not for ending a comment. 602 // Stop here and let other parsing method define the true behavior of that first star. 603 $this->last = $iBak; 604 605 return null; 606 } 607 608 $flags = Token::FLAG_COMMENT_C; 609 610 // This comment already ended. It may be a part of a 611 // previous MySQL specific command. 612 if ($token === '*/') { 613 return new Token($token, Token::TYPE_COMMENT, $flags); 614 } 615 616 // Checking if this is a MySQL-specific command. 617 if ($this->last + 1 < $this->len 618 && $this->str[$this->last + 1] === '!' 619 ) { 620 $flags |= Token::FLAG_COMMENT_MYSQL_CMD; 621 $token .= $this->str[++$this->last]; 622 623 while (++$this->last < $this->len 624 && $this->str[$this->last] >= '0' 625 && $this->str[$this->last] <= '9' 626 ) { 627 $token .= $this->str[$this->last]; 628 } 629 --$this->last; 630 631 // We split this comment and parse only its beginning 632 // here. 633 return new Token($token, Token::TYPE_COMMENT, $flags); 634 } 635 636 // Parsing the comment. 637 while (++$this->last < $this->len 638 && ( 639 $this->str[$this->last - 1] !== '*' 640 || $this->str[$this->last] !== '/' 641 ) 642 ) { 643 $token .= $this->str[$this->last]; 644 } 645 646 // Adding the ending. 647 if ($this->last < $this->len) { 648 $token .= $this->str[$this->last]; 649 } 650 651 return new Token($token, Token::TYPE_COMMENT, $flags); 652 } 653 } 654 655 // SQL style comments. (-- comment\n) 656 if (++$this->last < $this->len) { 657 $token .= $this->str[$this->last]; 658 $end = false; 659 } else { 660 --$this->last; 661 $end = true; 662 } 663 if (Context::isComment($token, $end)) { 664 // Checking if this comment did not end already (```--\n```). 665 if ($this->str[$this->last] !== "\n") { 666 while (++$this->last < $this->len 667 && $this->str[$this->last] !== "\n" 668 ) { 669 $token .= $this->str[$this->last]; 670 } 671 } 672 // Include trailing \n as whitespace token 673 if ($this->last < $this->len) { 674 --$this->last; 675 } 676 677 return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL); 678 } 679 680 $this->last = $iBak; 681 682 return null; 683 } 684 685 /** 686 * Parses a boolean. 687 * 688 * @return null|Token 689 */ 690 public function parseBool() 691 { 692 if ($this->last + 3 >= $this->len) { 693 // At least `min(strlen('TRUE'), strlen('FALSE'))` characters are 694 // required. 695 return null; 696 } 697 698 $iBak = $this->last; 699 $token = $this->str[$this->last] . $this->str[++$this->last] 700 . $this->str[++$this->last] . $this->str[++$this->last]; // _TRUE_ or _FALS_e 701 702 if (Context::isBool($token)) { 703 return new Token($token, Token::TYPE_BOOL); 704 } elseif (++$this->last < $this->len) { 705 $token .= $this->str[$this->last]; // fals_E_ 706 if (Context::isBool($token)) { 707 return new Token($token, Token::TYPE_BOOL, 1); 708 } 709 } 710 711 $this->last = $iBak; 712 713 return null; 714 } 715 716 /** 717 * Parses a number. 718 * 719 * @return null|Token 720 */ 721 public function parseNumber() 722 { 723 // A rudimentary state machine is being used to parse numbers due to 724 // the various forms of their notation. 725 // 726 // Below are the states of the machines and the conditions to change 727 // the state. 728 // 729 // 1 --------------------[ + or - ]-------------------> 1 730 // 1 -------------------[ 0x or 0X ]------------------> 2 731 // 1 --------------------[ 0 to 9 ]-------------------> 3 732 // 1 -----------------------[ . ]---------------------> 4 733 // 1 -----------------------[ b ]---------------------> 7 734 // 735 // 2 --------------------[ 0 to F ]-------------------> 2 736 // 737 // 3 --------------------[ 0 to 9 ]-------------------> 3 738 // 3 -----------------------[ . ]---------------------> 4 739 // 3 --------------------[ e or E ]-------------------> 5 740 // 741 // 4 --------------------[ 0 to 9 ]-------------------> 4 742 // 4 --------------------[ e or E ]-------------------> 5 743 // 744 // 5 ---------------[ + or - or 0 to 9 ]--------------> 6 745 // 746 // 7 -----------------------[ ' ]---------------------> 8 747 // 748 // 8 --------------------[ 0 or 1 ]-------------------> 8 749 // 8 -----------------------[ ' ]---------------------> 9 750 // 751 // State 1 may be reached by negative numbers. 752 // State 2 is reached only by hex numbers. 753 // State 4 is reached only by float numbers. 754 // State 5 is reached only by numbers in approximate form. 755 // State 7 is reached only by numbers in bit representation. 756 // 757 // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a 758 // state other than these is invalid. 759 // Also, negative states are invalid states. 760 $iBak = $this->last; 761 $token = ''; 762 $flags = 0; 763 $state = 1; 764 for (; $this->last < $this->len; ++$this->last) { 765 if ($state === 1) { 766 if ($this->str[$this->last] === '-') { 767 $flags |= Token::FLAG_NUMBER_NEGATIVE; 768 } elseif ($this->last + 1 < $this->len 769 && $this->str[$this->last] === '0' 770 && ( 771 $this->str[$this->last + 1] === 'x' 772 || $this->str[$this->last + 1] === 'X' 773 ) 774 ) { 775 $token .= $this->str[$this->last++]; 776 $state = 2; 777 } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') { 778 $state = 3; 779 } elseif ($this->str[$this->last] === '.') { 780 $state = 4; 781 } elseif ($this->str[$this->last] === 'b') { 782 $state = 7; 783 } elseif ($this->str[$this->last] !== '+') { 784 // `+` is a valid character in a number. 785 break; 786 } 787 } elseif ($state === 2) { 788 $flags |= Token::FLAG_NUMBER_HEX; 789 if (! ( 790 ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') 791 || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F') 792 || ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f') 793 ) 794 ) { 795 break; 796 } 797 } elseif ($state === 3) { 798 if ($this->str[$this->last] === '.') { 799 $state = 4; 800 } elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') { 801 $state = 5; 802 } elseif (($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z') 803 || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')) { 804 // A number can't be directly followed by a letter 805 $state = -$state; 806 } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { 807 // Just digits and `.`, `e` and `E` are valid characters. 808 break; 809 } 810 } elseif ($state === 4) { 811 $flags |= Token::FLAG_NUMBER_FLOAT; 812 if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') { 813 $state = 5; 814 } elseif (($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z') 815 || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')) { 816 // A number can't be directly followed by a letter 817 $state = -$state; 818 } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { 819 // Just digits, `e` and `E` are valid characters. 820 break; 821 } 822 } elseif ($state === 5) { 823 $flags |= Token::FLAG_NUMBER_APPROXIMATE; 824 if ($this->str[$this->last] === '+' || $this->str[$this->last] === '-' 825 || ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') 826 ) { 827 $state = 6; 828 } elseif (($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z') 829 || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')) { 830 // A number can't be directly followed by a letter 831 $state = -$state; 832 } else { 833 break; 834 } 835 } elseif ($state === 6) { 836 if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { 837 // Just digits are valid characters. 838 break; 839 } 840 } elseif ($state === 7) { 841 $flags |= Token::FLAG_NUMBER_BINARY; 842 if ($this->str[$this->last] === '\'') { 843 $state = 8; 844 } else { 845 break; 846 } 847 } elseif ($state === 8) { 848 if ($this->str[$this->last] === '\'') { 849 $state = 9; 850 } elseif ($this->str[$this->last] !== '0' 851 && $this->str[$this->last] !== '1' 852 ) { 853 break; 854 } 855 } elseif ($state === 9) { 856 break; 857 } 858 $token .= $this->str[$this->last]; 859 } 860 if ($state === 2 || $state === 3 861 || ($token !== '.' && $state === 4) 862 || $state === 6 || $state === 9 863 ) { 864 --$this->last; 865 866 return new Token($token, Token::TYPE_NUMBER, $flags); 867 } 868 $this->last = $iBak; 869 870 return null; 871 } 872 873 /** 874 * Parses a string. 875 * 876 * @param string $quote additional starting symbol 877 * 878 * @return null|Token 879 * @throws LexerException 880 */ 881 public function parseString($quote = '') 882 { 883 $token = $this->str[$this->last]; 884 if (! ($flags = Context::isString($token)) && $token !== $quote) { 885 return null; 886 } 887 $quote = $token; 888 889 while (++$this->last < $this->len) { 890 if ($this->last + 1 < $this->len 891 && ( 892 ($this->str[$this->last] === $quote && $this->str[$this->last + 1] === $quote) 893 || ($this->str[$this->last] === '\\' && $quote !== '`') 894 ) 895 ) { 896 $token .= $this->str[$this->last] . $this->str[++$this->last]; 897 } else { 898 if ($this->str[$this->last] === $quote) { 899 break; 900 } 901 $token .= $this->str[$this->last]; 902 } 903 } 904 905 if ($this->last >= $this->len || $this->str[$this->last] !== $quote) { 906 $this->error( 907 sprintf( 908 Translator::gettext('Ending quote %1$s was expected.'), 909 $quote 910 ), 911 '', 912 $this->last 913 ); 914 } else { 915 $token .= $this->str[$this->last]; 916 } 917 918 return new Token($token, Token::TYPE_STRING, $flags); 919 } 920 921 /** 922 * Parses a symbol. 923 * 924 * @return null|Token 925 * @throws LexerException 926 */ 927 public function parseSymbol() 928 { 929 $token = $this->str[$this->last]; 930 if (! ($flags = Context::isSymbol($token))) { 931 return null; 932 } 933 934 if ($flags & Token::FLAG_SYMBOL_VARIABLE) { 935 if ($this->last + 1 < $this->len && $this->str[++$this->last] === '@') { 936 // This is a system variable (e.g. `@@hostname`). 937 $token .= $this->str[$this->last++]; 938 $flags |= Token::FLAG_SYMBOL_SYSTEM; 939 } 940 } elseif ($flags & Token::FLAG_SYMBOL_PARAMETER) { 941 if ($token !== '?' && $this->last + 1 < $this->len) { 942 ++$this->last; 943 } 944 } else { 945 $token = ''; 946 } 947 948 $str = null; 949 950 if ($this->last < $this->len) { 951 if (($str = $this->parseString('`')) === null) { 952 if (($str = $this->parseUnknown()) === null) { 953 $this->error( 954 'Variable name was expected.', 955 $this->str[$this->last], 956 $this->last 957 ); 958 } 959 } 960 } 961 962 if ($str !== null) { 963 $token .= $str->token; 964 } 965 966 return new Token($token, Token::TYPE_SYMBOL, $flags); 967 } 968 969 /** 970 * Parses unknown parts of the query. 971 * 972 * @return null|Token 973 */ 974 public function parseUnknown() 975 { 976 $token = $this->str[$this->last]; 977 if (Context::isSeparator($token)) { 978 return null; 979 } 980 981 while (++$this->last < $this->len && ! Context::isSeparator($this->str[$this->last])) { 982 $token .= $this->str[$this->last]; 983 984 // Test if end of token equals the current delimiter. If so, remove it from the token. 985 if (substr($token, -$this->delimiterLen) === $this->delimiter) { 986 $token = substr($token, 0, -$this->delimiterLen); 987 $this->last -= $this->delimiterLen - 1; 988 break; 989 } 990 } 991 992 --$this->last; 993 994 return new Token($token); 995 } 996 997 /** 998 * Parses the delimiter of the query. 999 * 1000 * @return null|Token 1001 */ 1002 public function parseDelimiter() 1003 { 1004 $idx = 0; 1005 1006 while ($idx < $this->delimiterLen && $this->last + $idx < $this->len) { 1007 if ($this->delimiter[$idx] !== $this->str[$this->last + $idx]) { 1008 return null; 1009 } 1010 ++$idx; 1011 } 1012 1013 $this->last += $this->delimiterLen - 1; 1014 1015 return new Token($this->delimiter, Token::TYPE_DELIMITER); 1016 } 1017} 1018