1<?php
2/**
3 * Defines the lexer of the library.
4 *
5 * This is one of the most important components, along with the parser.
6 *
7 * Depends on context to extract lexemes.
8 */
9
10declare(strict_types=1);
11
12namespace PhpMyAdmin\SqlParser;
13
14use PhpMyAdmin\SqlParser\Exceptions\LexerException;
15
16use function define;
17use function defined;
18use function in_array;
19use function mb_strlen;
20use function sprintf;
21use function strlen;
22use function substr;
23
24if (! defined('USE_UTF_STRINGS')) {
25    // NOTE: In previous versions of PHP (5.5 and older) the default
26    // internal encoding is "ISO-8859-1".
27    // All `mb_` functions must specify the correct encoding, which is
28    // 'UTF-8' in order to work properly.
29
30    /*
31     * Forces usage of `UtfString` if the string is multibyte.
32     * `UtfString` may be slower, but it gives better results.
33     *
34     * @var bool
35     */
36    define('USE_UTF_STRINGS', true);
37}
38
39/**
40 * Performs lexical analysis over a SQL statement and splits it in multiple
41 * tokens.
42 *
43 * The output of the lexer is affected by the context of the SQL statement.
44 *
45 * @see      Context
46 */
47class Lexer extends Core
48{
49    /**
50     * A list of methods that are used in lexing the SQL query.
51     *
52     * @var array
53     */
54    public static $PARSER_METHODS = [
55        // It is best to put the parsers in order of their complexity
56        // (ascending) and their occurrence rate (descending).
57        //
58        // Conflicts:
59        //
60        // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber`
61        // They fight over delimiter. The delimiter may be a keyword, a
62        // number or almost any character which makes the delimiter one of
63        // the first tokens that must be parsed.
64        //
65        // 1. `parseNumber` and `parseOperator`
66        // They fight over `+` and `-`.
67        //
68        // 2. `parseComment` and `parseOperator`
69        // They fight over `/` (as in ```/*comment*/``` or ```a / b```)
70        //
71        // 3. `parseBool` and `parseKeyword`
72        // They fight over `TRUE` and `FALSE`.
73        //
74        // 4. `parseKeyword` and `parseUnknown`
75        // They fight over words. `parseUnknown` does not know about
76        // keywords.
77
78        'parseDelimiter',
79        'parseWhitespace',
80        'parseNumber',
81        'parseComment',
82        'parseOperator',
83        'parseBool',
84        'parseString',
85        'parseSymbol',
86        'parseKeyword',
87        'parseLabel',
88        'parseUnknown',
89    ];
90
91    /**
92     * The string to be parsed.
93     *
94     * @var string|UtfString
95     */
96    public $str = '';
97
98    /**
99     * The length of `$str`.
100     *
101     * By storing its length, a lot of time is saved, because parsing methods
102     * would call `strlen` everytime.
103     *
104     * @var int
105     */
106    public $len = 0;
107
108    /**
109     * The index of the last parsed character.
110     *
111     * @var int
112     */
113    public $last = 0;
114
115    /**
116     * Tokens extracted from given strings.
117     *
118     * @var TokensList
119     */
120    public $list;
121
122    /**
123     * The default delimiter. This is used, by default, in all new instances.
124     *
125     * @var string
126     */
127    public static $DEFAULT_DELIMITER = ';';
128
129    /**
130     * Statements delimiter.
131     * This may change during lexing.
132     *
133     * @var string
134     */
135    public $delimiter;
136
137    /**
138     * The length of the delimiter.
139     *
140     * Because `parseDelimiter` can be called a lot, it would perform a lot of
141     * calls to `strlen`, which might affect performance when the delimiter is
142     * big.
143     *
144     * @var int
145     */
146    public $delimiterLen;
147
148    /**
149     * Gets the tokens list parsed by a new instance of a lexer.
150     *
151     * @param string|UtfString $str       the query to be lexed
152     * @param bool             $strict    whether strict mode should be
153     *                                    enabled or not
154     * @param string           $delimiter the delimiter to be used
155     *
156     * @return TokensList
157     */
158    public static function getTokens($str, $strict = false, $delimiter = null)
159    {
160        $lexer = new self($str, $strict, $delimiter);
161
162        return $lexer->list;
163    }
164
165    /**
166     * @param string|UtfString $str       the query to be lexed
167     * @param bool             $strict    whether strict mode should be
168     *                                    enabled or not
169     * @param string           $delimiter the delimiter to be used
170     */
171    public function __construct($str, $strict = false, $delimiter = null)
172    {
173        // `strlen` is used instead of `mb_strlen` because the lexer needs to
174        // parse each byte of the input.
175        $len = $str instanceof UtfString ? $str->length() : strlen($str);
176
177        // For multi-byte strings, a new instance of `UtfString` is
178        // initialized (only if `UtfString` usage is forced.
179        if (! $str instanceof UtfString && USE_UTF_STRINGS && $len !== mb_strlen($str, 'UTF-8')) {
180            $str = new UtfString($str);
181        }
182
183        $this->str = $str;
184        $this->len = $str instanceof UtfString ? $str->length() : $len;
185
186        $this->strict = $strict;
187
188        // Setting the delimiter.
189        $this->setDelimiter(! empty($delimiter) ? $delimiter : static::$DEFAULT_DELIMITER);
190
191        $this->lex();
192    }
193
194    /**
195     * Sets the delimiter.
196     *
197     * @param string $delimiter the new delimiter
198     */
199    public function setDelimiter($delimiter)
200    {
201        $this->delimiter = $delimiter;
202        $this->delimiterLen = strlen($delimiter);
203    }
204
205    /**
206     * Parses the string and extracts lexemes.
207     */
208    public function lex()
209    {
210        // TODO: Sometimes, static::parse* functions make unnecessary calls to
211        // is* functions. For a better performance, some rules can be deduced
212        // from context.
213        // For example, in `parseBool` there is no need to compare the token
214        // every time with `true` and `false`. The first step would be to
215        // compare with 'true' only and just after that add another letter from
216        // context and compare again with `false`.
217        // Another example is `parseComment`.
218
219        $list = new TokensList();
220
221        /**
222         * Last processed token.
223         *
224         * @var Token
225         */
226        $lastToken = null;
227
228        for ($this->last = 0, $lastIdx = 0; $this->last < $this->len; $lastIdx = ++$this->last) {
229            /**
230             * The new token.
231             *
232             * @var Token
233             */
234            $token = null;
235
236            foreach (static::$PARSER_METHODS as $method) {
237                $token = $this->$method();
238
239                if ($token) {
240                    break;
241                }
242            }
243
244            if ($token === null) {
245                // @assert($this->last === $lastIdx);
246                $token = new Token($this->str[$this->last]);
247                $this->error('Unexpected character.', $this->str[$this->last], $this->last);
248            } elseif (
249                $lastToken !== null
250                && $token->type === Token::TYPE_SYMBOL
251                && $token->flags & Token::FLAG_SYMBOL_VARIABLE
252                && (
253                    $lastToken->type === Token::TYPE_STRING
254                    || (
255                        $lastToken->type === Token::TYPE_SYMBOL
256                        && $lastToken->flags & Token::FLAG_SYMBOL_BACKTICK
257                    )
258                )
259            ) {
260                // Handles ```... FROM 'user'@'%' ...```.
261                $lastToken->token .= $token->token;
262                $lastToken->type = Token::TYPE_SYMBOL;
263                $lastToken->flags = Token::FLAG_SYMBOL_USER;
264                $lastToken->value .= '@' . $token->value;
265                continue;
266            } elseif (
267                $lastToken !== null
268                && $token->type === Token::TYPE_KEYWORD
269                && $lastToken->type === Token::TYPE_OPERATOR
270                && $lastToken->value === '.'
271            ) {
272                // Handles ```... tbl.FROM ...```. In this case, FROM is not
273                // a reserved word.
274                $token->type = Token::TYPE_NONE;
275                $token->flags = 0;
276                $token->value = $token->token;
277            }
278
279            $token->position = $lastIdx;
280
281            $list->tokens[$list->count++] = $token;
282
283            // Handling delimiters.
284            if ($token->type === Token::TYPE_NONE && $token->value === 'DELIMITER') {
285                if ($this->last + 1 >= $this->len) {
286                    $this->error('Expected whitespace(s) before delimiter.', '', $this->last + 1);
287                    continue;
288                }
289
290                // Skipping last R (from `delimiteR`) and whitespaces between
291                // the keyword `DELIMITER` and the actual delimiter.
292                $pos = ++$this->last;
293                $token = $this->parseWhitespace();
294
295                if ($token !== null) {
296                    $token->position = $pos;
297                    $list->tokens[$list->count++] = $token;
298                }
299
300                // Preparing the token that holds the new delimiter.
301                if ($this->last + 1 >= $this->len) {
302                    $this->error('Expected delimiter.', '', $this->last + 1);
303                    continue;
304                }
305
306                $pos = $this->last + 1;
307
308                // Parsing the delimiter.
309                $this->delimiter = null;
310                $delimiterLen = 0;
311                while (
312                    ++$this->last < $this->len
313                    && ! Context::isWhitespace($this->str[$this->last])
314                    && $delimiterLen < 15
315                ) {
316                    $this->delimiter .= $this->str[$this->last];
317                    ++$delimiterLen;
318                }
319
320                if (empty($this->delimiter)) {
321                    $this->error('Expected delimiter.', '', $this->last);
322                    $this->delimiter = ';';
323                }
324
325                --$this->last;
326
327                // Saving the delimiter and its token.
328                $this->delimiterLen = strlen($this->delimiter);
329                $token = new Token($this->delimiter, Token::TYPE_DELIMITER);
330                $token->position = $pos;
331                $list->tokens[$list->count++] = $token;
332            }
333
334            $lastToken = $token;
335        }
336
337        // Adding a final delimiter to mark the ending.
338        $list->tokens[$list->count++] = new Token(null, Token::TYPE_DELIMITER);
339
340        // Saving the tokens list.
341        $this->list = $list;
342
343        $this->solveAmbiguityOnStarOperator();
344    }
345
346    /**
347     * Resolves the ambiguity when dealing with the "*" operator.
348     *
349     * In SQL statements, the "*" operator can be an arithmetic operator (like in 2*3) or an SQL wildcard (like in
350     * SELECT a.* FROM ...). To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
351     * comments, right after the "*" position. The "*" is for sure an SQL wildcard if the next token found is any of:
352     * - "FROM" (the FROM keyword like in "SELECT * FROM...");
353     * - "USING" (the USING keyword like in "DELETE table_name.* USING...");
354     * - "," (a comma separator like in "SELECT *, field FROM...");
355     * - ")" (a closing parenthesis like in "COUNT(*)").
356     * This methods will change the flag of the "*" tokens when any of those condition above is true. Otherwise, the
357     * default flag (arithmetic) will be kept.
358     *
359     * @return void
360     */
361    private function solveAmbiguityOnStarOperator()
362    {
363        $iBak = $this->list->idx;
364        while (($starToken = $this->list->getNextOfTypeAndValue(Token::TYPE_OPERATOR, '*')) !== null) {
365            // getNext() already gets rid of whitespaces and comments.
366            $next = $this->list->getNext();
367
368            if ($next === null) {
369                continue;
370            }
371
372            if (
373                ($next->type !== Token::TYPE_KEYWORD || ! in_array($next->value, ['FROM', 'USING'], true))
374                && ($next->type !== Token::TYPE_OPERATOR || ! in_array($next->value, [',', ')'], true))
375            ) {
376                continue;
377            }
378
379            $starToken->flags = Token::FLAG_OPERATOR_SQL;
380        }
381
382        $this->list->idx = $iBak;
383    }
384
385    /**
386     * Creates a new error log.
387     *
388     * @param string $msg  the error message
389     * @param string $str  the character that produced the error
390     * @param int    $pos  the position of the character
391     * @param int    $code the code of the error
392     *
393     * @throws LexerException throws the exception, if strict mode is enabled.
394     */
395    public function error($msg, $str = '', $pos = 0, $code = 0)
396    {
397        $error = new LexerException(
398            Translator::gettext($msg),
399            $str,
400            $pos,
401            $code
402        );
403        parent::error($error);
404    }
405
406    /**
407     * Parses a keyword.
408     *
409     * @return Token|null
410     */
411    public function parseKeyword()
412    {
413        $token = '';
414
415        /**
416         * Value to be returned.
417         *
418         * @var Token
419         */
420        $ret = null;
421
422        /**
423         * The value of `$this->last` where `$token` ends in `$this->str`.
424         *
425         * @var int
426         */
427        $iEnd = $this->last;
428
429        /**
430         * Whether last parsed character is a whitespace.
431         *
432         * @var bool
433         */
434        $lastSpace = false;
435
436        for ($j = 1; $j < Context::KEYWORD_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
437            // Composed keywords shouldn't have more than one whitespace between
438            // keywords.
439            if (Context::isWhitespace($this->str[$this->last])) {
440                if ($lastSpace) {
441                    --$j; // The size of the keyword didn't increase.
442                    continue;
443                }
444
445                $lastSpace = true;
446            } else {
447                $lastSpace = false;
448            }
449
450            $token .= $this->str[$this->last];
451            $flags = Context::isKeyword($token);
452
453            if (($this->last + 1 !== $this->len && ! Context::isSeparator($this->str[$this->last + 1])) || ! $flags) {
454                continue;
455            }
456
457            $ret = new Token($token, Token::TYPE_KEYWORD, $flags);
458            $iEnd = $this->last;
459
460            // We don't break so we find longest keyword.
461            // For example, `OR` and `ORDER` have a common prefix `OR`.
462            // If we stopped at `OR`, the parsing would be invalid.
463        }
464
465        $this->last = $iEnd;
466
467        return $ret;
468    }
469
470    /**
471     * Parses a label.
472     *
473     * @return Token|null
474     */
475    public function parseLabel()
476    {
477        $token = '';
478
479        /**
480         * Value to be returned.
481         *
482         * @var Token
483         */
484        $ret = null;
485
486        /**
487         * The value of `$this->last` where `$token` ends in `$this->str`.
488         *
489         * @var int
490         */
491        $iEnd = $this->last;
492        for ($j = 1; $j < Context::LABEL_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
493            if ($this->str[$this->last] === ':' && $j > 1) {
494                // End of label
495                $token .= $this->str[$this->last];
496                $ret = new Token($token, Token::TYPE_LABEL);
497                $iEnd = $this->last;
498                break;
499            }
500
501            if (Context::isWhitespace($this->str[$this->last]) && $j > 1) {
502                // Whitespace between label and :
503                // The size of the keyword didn't increase.
504                --$j;
505            } elseif (Context::isSeparator($this->str[$this->last])) {
506                // Any other separator
507                break;
508            }
509
510            $token .= $this->str[$this->last];
511        }
512
513        $this->last = $iEnd;
514
515        return $ret;
516    }
517
518    /**
519     * Parses an operator.
520     *
521     * @return Token|null
522     */
523    public function parseOperator()
524    {
525        $token = '';
526
527        /**
528         * Value to be returned.
529         *
530         * @var Token
531         */
532        $ret = null;
533
534        /**
535         * The value of `$this->last` where `$token` ends in `$this->str`.
536         *
537         * @var int
538         */
539        $iEnd = $this->last;
540
541        for ($j = 1; $j < Context::OPERATOR_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
542            $token .= $this->str[$this->last];
543            $flags = Context::isOperator($token);
544
545            if (! $flags) {
546                continue;
547            }
548
549            $ret = new Token($token, Token::TYPE_OPERATOR, $flags);
550            $iEnd = $this->last;
551        }
552
553        $this->last = $iEnd;
554
555        return $ret;
556    }
557
558    /**
559     * Parses a whitespace.
560     *
561     * @return Token|null
562     */
563    public function parseWhitespace()
564    {
565        $token = $this->str[$this->last];
566
567        if (! Context::isWhitespace($token)) {
568            return null;
569        }
570
571        while (++$this->last < $this->len && Context::isWhitespace($this->str[$this->last])) {
572            $token .= $this->str[$this->last];
573        }
574
575        --$this->last;
576
577        return new Token($token, Token::TYPE_WHITESPACE);
578    }
579
580    /**
581     * Parses a comment.
582     *
583     * @return Token|null
584     */
585    public function parseComment()
586    {
587        $iBak = $this->last;
588        $token = $this->str[$this->last];
589
590        // Bash style comments. (#comment\n)
591        if (Context::isComment($token)) {
592            while (++$this->last < $this->len && $this->str[$this->last] !== "\n") {
593                $token .= $this->str[$this->last];
594            }
595
596            // Include trailing \n as whitespace token
597            if ($this->last < $this->len) {
598                --$this->last;
599            }
600
601            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_BASH);
602        }
603
604        // C style comments. (/*comment*\/)
605        if (++$this->last < $this->len) {
606            $token .= $this->str[$this->last];
607            if (Context::isComment($token)) {
608                // There might be a conflict with "*" operator here, when string is "*/*".
609                // This can occurs in the following statements:
610                // - "SELECT */* comment */ FROM ..."
611                // - "SELECT 2*/* comment */3 AS `six`;"
612                $next = $this->last + 1;
613                if (($next < $this->len) && $this->str[$next] === '*') {
614                    // Conflict in "*/*": first "*" was not for ending a comment.
615                    // Stop here and let other parsing method define the true behavior of that first star.
616                    $this->last = $iBak;
617
618                    return null;
619                }
620
621                $flags = Token::FLAG_COMMENT_C;
622
623                // This comment already ended. It may be a part of a
624                // previous MySQL specific command.
625                if ($token === '*/') {
626                    return new Token($token, Token::TYPE_COMMENT, $flags);
627                }
628
629                // Checking if this is a MySQL-specific command.
630                if ($this->last + 1 < $this->len && $this->str[$this->last + 1] === '!') {
631                    $flags |= Token::FLAG_COMMENT_MYSQL_CMD;
632                    $token .= $this->str[++$this->last];
633
634                    while (
635                        ++$this->last < $this->len
636                        && $this->str[$this->last] >= '0'
637                        && $this->str[$this->last] <= '9'
638                    ) {
639                        $token .= $this->str[$this->last];
640                    }
641
642                    --$this->last;
643
644                    // We split this comment and parse only its beginning
645                    // here.
646                    return new Token($token, Token::TYPE_COMMENT, $flags);
647                }
648
649                // Parsing the comment.
650                while (
651                    ++$this->last < $this->len
652                    && (
653                        $this->str[$this->last - 1] !== '*'
654                        || $this->str[$this->last] !== '/'
655                    )
656                ) {
657                    $token .= $this->str[$this->last];
658                }
659
660                // Adding the ending.
661                if ($this->last < $this->len) {
662                    $token .= $this->str[$this->last];
663                }
664
665                return new Token($token, Token::TYPE_COMMENT, $flags);
666            }
667        }
668
669        // SQL style comments. (-- comment\n)
670        if (++$this->last < $this->len) {
671            $token .= $this->str[$this->last];
672            $end = false;
673        } else {
674            --$this->last;
675            $end = true;
676        }
677
678        if (Context::isComment($token, $end)) {
679            // Checking if this comment did not end already (```--\n```).
680            if ($this->str[$this->last] !== "\n") {
681                while (++$this->last < $this->len && $this->str[$this->last] !== "\n") {
682                    $token .= $this->str[$this->last];
683                }
684            }
685
686            // Include trailing \n as whitespace token
687            if ($this->last < $this->len) {
688                --$this->last;
689            }
690
691            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL);
692        }
693
694        $this->last = $iBak;
695
696        return null;
697    }
698
699    /**
700     * Parses a boolean.
701     *
702     * @return Token|null
703     */
704    public function parseBool()
705    {
706        if ($this->last + 3 >= $this->len) {
707            // At least `min(strlen('TRUE'), strlen('FALSE'))` characters are
708            // required.
709            return null;
710        }
711
712        $iBak = $this->last;
713        $token = $this->str[$this->last] . $this->str[++$this->last]
714        . $this->str[++$this->last] . $this->str[++$this->last]; // _TRUE_ or _FALS_e
715
716        if (Context::isBool($token)) {
717            return new Token($token, Token::TYPE_BOOL);
718        }
719
720        if (++$this->last < $this->len) {
721            $token .= $this->str[$this->last]; // fals_E_
722            if (Context::isBool($token)) {
723                return new Token($token, Token::TYPE_BOOL, 1);
724            }
725        }
726
727        $this->last = $iBak;
728
729        return null;
730    }
731
732    /**
733     * Parses a number.
734     *
735     * @return Token|null
736     */
737    public function parseNumber()
738    {
739        // A rudimentary state machine is being used to parse numbers due to
740        // the various forms of their notation.
741        //
742        // Below are the states of the machines and the conditions to change
743        // the state.
744        //
745        //      1 --------------------[ + or - ]-------------------> 1
746        //      1 -------------------[ 0x or 0X ]------------------> 2
747        //      1 --------------------[ 0 to 9 ]-------------------> 3
748        //      1 -----------------------[ . ]---------------------> 4
749        //      1 -----------------------[ b ]---------------------> 7
750        //
751        //      2 --------------------[ 0 to F ]-------------------> 2
752        //
753        //      3 --------------------[ 0 to 9 ]-------------------> 3
754        //      3 -----------------------[ . ]---------------------> 4
755        //      3 --------------------[ e or E ]-------------------> 5
756        //
757        //      4 --------------------[ 0 to 9 ]-------------------> 4
758        //      4 --------------------[ e or E ]-------------------> 5
759        //
760        //      5 ---------------[ + or - or 0 to 9 ]--------------> 6
761        //
762        //      7 -----------------------[ ' ]---------------------> 8
763        //
764        //      8 --------------------[ 0 or 1 ]-------------------> 8
765        //      8 -----------------------[ ' ]---------------------> 9
766        //
767        // State 1 may be reached by negative numbers.
768        // State 2 is reached only by hex numbers.
769        // State 4 is reached only by float numbers.
770        // State 5 is reached only by numbers in approximate form.
771        // State 7 is reached only by numbers in bit representation.
772        //
773        // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a
774        // state other than these is invalid.
775        // Also, negative states are invalid states.
776        $iBak = $this->last;
777        $token = '';
778        $flags = 0;
779        $state = 1;
780        for (; $this->last < $this->len; ++$this->last) {
781            if ($state === 1) {
782                if ($this->str[$this->last] === '-') {
783                    $flags |= Token::FLAG_NUMBER_NEGATIVE;
784                } elseif (
785                    $this->last + 1 < $this->len
786                    && $this->str[$this->last] === '0'
787                    && (
788                        $this->str[$this->last + 1] === 'x'
789                        || $this->str[$this->last + 1] === 'X'
790                    )
791                ) {
792                    $token .= $this->str[$this->last++];
793                    $state = 2;
794                } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') {
795                    $state = 3;
796                } elseif ($this->str[$this->last] === '.') {
797                    $state = 4;
798                } elseif ($this->str[$this->last] === 'b') {
799                    $state = 7;
800                } elseif ($this->str[$this->last] !== '+') {
801                    // `+` is a valid character in a number.
802                    break;
803                }
804            } elseif ($state === 2) {
805                $flags |= Token::FLAG_NUMBER_HEX;
806                if (
807                    ! (
808                        ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
809                        || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F')
810                        || ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f')
811                    )
812                ) {
813                    break;
814                }
815            } elseif ($state === 3) {
816                if ($this->str[$this->last] === '.') {
817                    $state = 4;
818                } elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
819                    $state = 5;
820                } elseif (
821                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
822                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
823                ) {
824                    // A number can't be directly followed by a letter
825                    $state = -$state;
826                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
827                    // Just digits and `.`, `e` and `E` are valid characters.
828                    break;
829                }
830            } elseif ($state === 4) {
831                $flags |= Token::FLAG_NUMBER_FLOAT;
832                if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
833                    $state = 5;
834                } elseif (
835                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
836                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
837                ) {
838                    // A number can't be directly followed by a letter
839                    $state = -$state;
840                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
841                    // Just digits, `e` and `E` are valid characters.
842                    break;
843                }
844            } elseif ($state === 5) {
845                $flags |= Token::FLAG_NUMBER_APPROXIMATE;
846                if (
847                    $this->str[$this->last] === '+' || $this->str[$this->last] === '-'
848                    || ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
849                ) {
850                    $state = 6;
851                } elseif (
852                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
853                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
854                ) {
855                    // A number can't be directly followed by a letter
856                    $state = -$state;
857                } else {
858                    break;
859                }
860            } elseif ($state === 6) {
861                if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
862                    // Just digits are valid characters.
863                    break;
864                }
865            } elseif ($state === 7) {
866                $flags |= Token::FLAG_NUMBER_BINARY;
867                if ($this->str[$this->last] !== '\'') {
868                    break;
869                }
870
871                $state = 8;
872            } elseif ($state === 8) {
873                if ($this->str[$this->last] === '\'') {
874                    $state = 9;
875                } elseif ($this->str[$this->last] !== '0' && $this->str[$this->last] !== '1') {
876                    break;
877                }
878            } elseif ($state === 9) {
879                break;
880            }
881
882            $token .= $this->str[$this->last];
883        }
884
885        if ($state === 2 || $state === 3 || ($token !== '.' && $state === 4) || $state === 6 || $state === 9) {
886            --$this->last;
887
888            return new Token($token, Token::TYPE_NUMBER, $flags);
889        }
890
891        $this->last = $iBak;
892
893        return null;
894    }
895
896    /**
897     * Parses a string.
898     *
899     * @param string $quote additional starting symbol
900     *
901     * @return Token|null
902     *
903     * @throws LexerException
904     */
905    public function parseString($quote = '')
906    {
907        $token = $this->str[$this->last];
908        $flags = Context::isString($token);
909
910        if (! $flags && $token !== $quote) {
911            return null;
912        }
913
914        $quote = $token;
915
916        while (++$this->last < $this->len) {
917            if (
918                $this->last + 1 < $this->len
919                && (
920                    ($this->str[$this->last] === $quote && $this->str[$this->last + 1] === $quote)
921                    || ($this->str[$this->last] === '\\' && $quote !== '`')
922                )
923            ) {
924                $token .= $this->str[$this->last] . $this->str[++$this->last];
925            } else {
926                if ($this->str[$this->last] === $quote) {
927                    break;
928                }
929
930                $token .= $this->str[$this->last];
931            }
932        }
933
934        if ($this->last >= $this->len || $this->str[$this->last] !== $quote) {
935            $this->error(
936                sprintf(
937                    Translator::gettext('Ending quote %1$s was expected.'),
938                    $quote
939                ),
940                '',
941                $this->last
942            );
943        } else {
944            $token .= $this->str[$this->last];
945        }
946
947        return new Token($token, Token::TYPE_STRING, $flags);
948    }
949
950    /**
951     * Parses a symbol.
952     *
953     * @return Token|null
954     *
955     * @throws LexerException
956     */
957    public function parseSymbol()
958    {
959        $token = $this->str[$this->last];
960        $flags = Context::isSymbol($token);
961
962        if (! $flags) {
963            return null;
964        }
965
966        if ($flags & Token::FLAG_SYMBOL_VARIABLE) {
967            if ($this->last + 1 < $this->len && $this->str[++$this->last] === '@') {
968                // This is a system variable (e.g. `@@hostname`).
969                $token .= $this->str[$this->last++];
970                $flags |= Token::FLAG_SYMBOL_SYSTEM;
971            }
972        } elseif ($flags & Token::FLAG_SYMBOL_PARAMETER) {
973            if ($token !== '?' && $this->last + 1 < $this->len) {
974                ++$this->last;
975            }
976        } else {
977            $token = '';
978        }
979
980        $str = null;
981
982        if ($this->last < $this->len) {
983            $str = $this->parseString('`');
984
985            if ($str === null) {
986                $str = $this->parseUnknown();
987
988                if ($str === null) {
989                    $this->error('Variable name was expected.', $this->str[$this->last], $this->last);
990                }
991            }
992        }
993
994        if ($str !== null) {
995            $token .= $str->token;
996        }
997
998        return new Token($token, Token::TYPE_SYMBOL, $flags);
999    }
1000
1001    /**
1002     * Parses unknown parts of the query.
1003     *
1004     * @return Token|null
1005     */
1006    public function parseUnknown()
1007    {
1008        $token = $this->str[$this->last];
1009        if (Context::isSeparator($token)) {
1010            return null;
1011        }
1012
1013        while (++$this->last < $this->len && ! Context::isSeparator($this->str[$this->last])) {
1014            $token .= $this->str[$this->last];
1015
1016            // Test if end of token equals the current delimiter. If so, remove it from the token.
1017            if (substr($token, -$this->delimiterLen) === $this->delimiter) {
1018                $token = substr($token, 0, -$this->delimiterLen);
1019                $this->last -= $this->delimiterLen - 1;
1020                break;
1021            }
1022        }
1023
1024        --$this->last;
1025
1026        return new Token($token);
1027    }
1028
1029    /**
1030     * Parses the delimiter of the query.
1031     *
1032     * @return Token|null
1033     */
1034    public function parseDelimiter()
1035    {
1036        $idx = 0;
1037
1038        while ($idx < $this->delimiterLen && $this->last + $idx < $this->len) {
1039            if ($this->delimiter[$idx] !== $this->str[$this->last + $idx]) {
1040                return null;
1041            }
1042
1043            ++$idx;
1044        }
1045
1046        $this->last += $this->delimiterLen - 1;
1047
1048        return new Token($this->delimiter, Token::TYPE_DELIMITER);
1049    }
1050}
1051