1<?php
2
3/**
4 * Defines the lexer of the library.
5 *
6 * This is one of the most important components, along with the parser.
7 *
8 * Depends on context to extract lexemes.
9 */
10
11namespace PhpMyAdmin\SqlParser;
12
13use PhpMyAdmin\SqlParser\Exceptions\LexerException;
14
15if (! defined('USE_UTF_STRINGS')) {
16    // NOTE: In previous versions of PHP (5.5 and older) the default
17    // internal encoding is "ISO-8859-1".
18    // All `mb_` functions must specify the correct encoding, which is
19    // 'UTF-8' in order to work properly.
20
21    /*
22     * Forces usage of `UtfString` if the string is multibyte.
23     * `UtfString` may be slower, but it gives better results.
24     *
25     * @var bool
26     */
27    define('USE_UTF_STRINGS', true);
28}
29
30/**
31 * Performs lexical analysis over a SQL statement and splits it in multiple
32 * tokens.
33 *
34 * The output of the lexer is affected by the context of the SQL statement.
35 *
36 * @category Lexer
37 *
38 * @license  https://www.gnu.org/licenses/gpl-2.0.txt GPL-2.0+
39 *
40 * @see      Context
41 */
42class Lexer extends Core
43{
44    /**
45     * A list of methods that are used in lexing the SQL query.
46     *
47     * @var array
48     */
49    public static $PARSER_METHODS = array(
50        // It is best to put the parsers in order of their complexity
51        // (ascending) and their occurrence rate (descending).
52        //
53        // Conflicts:
54        //
55        // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber`
56        // They fight over delimiter. The delimiter may be a keyword, a
57        // number or almost any character which makes the delimiter one of
58        // the first tokens that must be parsed.
59        //
60        // 1. `parseNumber` and `parseOperator`
61        // They fight over `+` and `-`.
62        //
63        // 2. `parseComment` and `parseOperator`
64        // They fight over `/` (as in ```/*comment*/``` or ```a / b```)
65        //
66        // 3. `parseBool` and `parseKeyword`
67        // They fight over `TRUE` and `FALSE`.
68        //
69        // 4. `parseKeyword` and `parseUnknown`
70        // They fight over words. `parseUnknown` does not know about
71        // keywords.
72
73        'parseDelimiter',
74        'parseWhitespace',
75        'parseNumber',
76        'parseComment',
77        'parseOperator',
78        'parseBool',
79        'parseString',
80        'parseSymbol',
81        'parseKeyword',
82        'parseLabel',
83        'parseUnknown'
84    );
85
86    /**
87     * The string to be parsed.
88     *
89     * @var string|UtfString
90     */
91    public $str = '';
92
93    /**
94     * The length of `$str`.
95     *
96     * By storing its length, a lot of time is saved, because parsing methods
97     * would call `strlen` everytime.
98     *
99     * @var int
100     */
101    public $len = 0;
102
103    /**
104     * The index of the last parsed character.
105     *
106     * @var int
107     */
108    public $last = 0;
109
110    /**
111     * Tokens extracted from given strings.
112     *
113     * @var TokensList
114     */
115    public $list;
116
117    /**
118     * The default delimiter. This is used, by default, in all new instances.
119     *
120     * @var string
121     */
122    public static $DEFAULT_DELIMITER = ';';
123
124    /**
125     * Statements delimiter.
126     * This may change during lexing.
127     *
128     * @var string
129     */
130    public $delimiter;
131
132    /**
133     * The length of the delimiter.
134     *
135     * Because `parseDelimiter` can be called a lot, it would perform a lot of
136     * calls to `strlen`, which might affect performance when the delimiter is
137     * big.
138     *
139     * @var int
140     */
141    public $delimiterLen;
142
143    /**
144     * Gets the tokens list parsed by a new instance of a lexer.
145     *
146     * @param string|UtfString $str       the query to be lexed
147     * @param bool             $strict    whether strict mode should be
148     *                                    enabled or not
149     * @param string           $delimiter the delimiter to be used
150     *
151     * @return TokensList
152     */
153    public static function getTokens($str, $strict = false, $delimiter = null)
154    {
155        $lexer = new self($str, $strict, $delimiter);
156
157        return $lexer->list;
158    }
159
160    /**
161     * Constructor.
162     *
163     * @param string|UtfString $str       the query to be lexed
164     * @param bool             $strict    whether strict mode should be
165     *                                    enabled or not
166     * @param string           $delimiter the delimiter to be used
167     */
168    public function __construct($str, $strict = false, $delimiter = null)
169    {
170        // `strlen` is used instead of `mb_strlen` because the lexer needs to
171        // parse each byte of the input.
172        $len = $str instanceof UtfString ? $str->length() : strlen($str);
173
174        // For multi-byte strings, a new instance of `UtfString` is
175        // initialized (only if `UtfString` usage is forced.
176        if (! $str instanceof UtfString && USE_UTF_STRINGS && $len !== mb_strlen($str, 'UTF-8')) {
177            $str = new UtfString($str);
178        }
179
180        $this->str = $str;
181        $this->len = $str instanceof UtfString ? $str->length() : $len;
182
183        $this->strict = $strict;
184
185        // Setting the delimiter.
186        $this->setDelimiter(
187            ! empty($delimiter) ? $delimiter : static::$DEFAULT_DELIMITER
188        );
189
190        $this->lex();
191    }
192
193    /**
194     * Sets the delimiter.
195     *
196     * @param string $delimiter the new delimiter
197     */
198    public function setDelimiter($delimiter)
199    {
200        $this->delimiter = $delimiter;
201        $this->delimiterLen = strlen($delimiter);
202    }
203
204    /**
205     * Parses the string and extracts lexemes.
206     */
207    public function lex()
208    {
209        // TODO: Sometimes, static::parse* functions make unnecessary calls to
210        // is* functions. For a better performance, some rules can be deduced
211        // from context.
212        // For example, in `parseBool` there is no need to compare the token
213        // every time with `true` and `false`. The first step would be to
214        // compare with 'true' only and just after that add another letter from
215        // context and compare again with `false`.
216        // Another example is `parseComment`.
217
218        $list = new TokensList();
219
220        /**
221         * Last processed token.
222         *
223         * @var Token
224         */
225        $lastToken = null;
226
227        for ($this->last = 0, $lastIdx = 0; $this->last < $this->len; $lastIdx = ++$this->last) {
228            /**
229             * The new token.
230             *
231             * @var Token
232             */
233            $token = null;
234
235            foreach (static::$PARSER_METHODS as $method) {
236                if ($token = $this->$method()) {
237                    break;
238                }
239            }
240
241            if ($token === null) {
242                // @assert($this->last === $lastIdx);
243                $token = new Token($this->str[$this->last]);
244                $this->error(
245                    'Unexpected character.',
246                    $this->str[$this->last],
247                    $this->last
248                );
249            } elseif ($lastToken !== null
250                && $token->type === Token::TYPE_SYMBOL
251                && $token->flags & Token::FLAG_SYMBOL_VARIABLE
252                && (
253                    $lastToken->type === Token::TYPE_STRING
254                    || (
255                        $lastToken->type === Token::TYPE_SYMBOL
256                        && $lastToken->flags & Token::FLAG_SYMBOL_BACKTICK
257                    )
258                )
259            ) {
260                // Handles ```... FROM 'user'@'%' ...```.
261                $lastToken->token .= $token->token;
262                $lastToken->type = Token::TYPE_SYMBOL;
263                $lastToken->flags = Token::FLAG_SYMBOL_USER;
264                $lastToken->value .= '@' . $token->value;
265                continue;
266            } elseif ($lastToken !== null
267                && $token->type === Token::TYPE_KEYWORD
268                && $lastToken->type === Token::TYPE_OPERATOR
269                && $lastToken->value === '.'
270            ) {
271                // Handles ```... tbl.FROM ...```. In this case, FROM is not
272                // a reserved word.
273                $token->type = Token::TYPE_NONE;
274                $token->flags = 0;
275                $token->value = $token->token;
276            }
277
278            $token->position = $lastIdx;
279
280            $list->tokens[$list->count++] = $token;
281
282            // Handling delimiters.
283            if ($token->type === Token::TYPE_NONE && $token->value === 'DELIMITER') {
284                if ($this->last + 1 >= $this->len) {
285                    $this->error(
286                        'Expected whitespace(s) before delimiter.',
287                        '',
288                        $this->last + 1
289                    );
290                    continue;
291                }
292
293                // Skipping last R (from `delimiteR`) and whitespaces between
294                // the keyword `DELIMITER` and the actual delimiter.
295                $pos = ++$this->last;
296                if (($token = $this->parseWhitespace()) !== null) {
297                    $token->position = $pos;
298                    $list->tokens[$list->count++] = $token;
299                }
300
301                // Preparing the token that holds the new delimiter.
302                if ($this->last + 1 >= $this->len) {
303                    $this->error(
304                        'Expected delimiter.',
305                        '',
306                        $this->last + 1
307                    );
308                    continue;
309                }
310                $pos = $this->last + 1;
311
312                // Parsing the delimiter.
313                $this->delimiter = null;
314                $delimiterLen = 0;
315                while (++$this->last < $this->len && ! Context::isWhitespace($this->str[$this->last]) && $delimiterLen < 15) {
316                    $this->delimiter .= $this->str[$this->last];
317                    ++$delimiterLen;
318                }
319
320                if (empty($this->delimiter)) {
321                    $this->error(
322                        'Expected delimiter.',
323                        '',
324                        $this->last
325                    );
326                    $this->delimiter = ';';
327                }
328
329                --$this->last;
330
331                // Saving the delimiter and its token.
332                $this->delimiterLen = strlen($this->delimiter);
333                $token = new Token($this->delimiter, Token::TYPE_DELIMITER);
334                $token->position = $pos;
335                $list->tokens[$list->count++] = $token;
336            }
337
338            $lastToken = $token;
339        }
340
341        // Adding a final delimiter to mark the ending.
342        $list->tokens[$list->count++] = new Token(null, Token::TYPE_DELIMITER);
343
344        // Saving the tokens list.
345        $this->list = $list;
346
347        $this->solveAmbiguityOnStarOperator();
348    }
349
350    /**
351     * Resolves the ambiguity when dealing with the "*" operator.
352     *
353     * In SQL statements, the "*" operator can be an arithmetic operator (like in 2*3) or an SQL wildcard (like in
354     * SELECT a.* FROM ...). To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
355     * comments, right after the "*" position. The "*" is for sure an SQL wildcard if the next token found is any of:
356     * - "FROM" (the FROM keyword like in "SELECT * FROM...");
357     * - "USING" (the USING keyword like in "DELETE table_name.* USING...");
358     * - "," (a comma separator like in "SELECT *, field FROM...");
359     * - ")" (a closing parenthesis like in "COUNT(*)").
360     * This methods will change the flag of the "*" tokens when any of those condition above is true. Otherwise, the
361     * default flag (arithmetic) will be kept.
362     *
363     * @return void
364     */
365    private function solveAmbiguityOnStarOperator()
366    {
367        $iBak = $this->list->idx;
368        while (null !== ($starToken = $this->list->getNextOfTypeAndValue(Token::TYPE_OPERATOR, '*'))) {
369            // ::getNext already gets rid of whitespaces and comments.
370            if (($next = $this->list->getNext()) !== null) {
371                if (($next->type === Token::TYPE_KEYWORD && in_array($next->value, array('FROM', 'USING'), true))
372                    || ($next->type === Token::TYPE_OPERATOR && in_array($next->value, array(',', ')'), true))
373                ) {
374                    $starToken->flags = Token::FLAG_OPERATOR_SQL;
375                }
376            }
377        }
378        $this->list->idx = $iBak;
379    }
380
381    /**
382     * Creates a new error log.
383     *
384     * @param string $msg  the error message
385     * @param string $str  the character that produced the error
386     * @param int    $pos  the position of the character
387     * @param int    $code the code of the error
388     *
389     * @throws LexerException throws the exception, if strict mode is enabled
390     */
391    public function error($msg, $str = '', $pos = 0, $code = 0)
392    {
393        $error = new LexerException(
394            Translator::gettext($msg),
395            $str,
396            $pos,
397            $code
398        );
399        parent::error($error);
400    }
401
402    /**
403     * Parses a keyword.
404     *
405     * @return null|Token
406     */
407    public function parseKeyword()
408    {
409        $token = '';
410
411        /**
412         * Value to be returned.
413         *
414         * @var Token
415         */
416        $ret = null;
417
418        /**
419         * The value of `$this->last` where `$token` ends in `$this->str`.
420         *
421         * @var int
422         */
423        $iEnd = $this->last;
424
425        /**
426         * Whether last parsed character is a whitespace.
427         *
428         * @var bool
429         */
430        $lastSpace = false;
431
432        for ($j = 1; $j < Context::KEYWORD_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
433            // Composed keywords shouldn't have more than one whitespace between
434            // keywords.
435            if (Context::isWhitespace($this->str[$this->last])) {
436                if ($lastSpace) {
437                    --$j; // The size of the keyword didn't increase.
438                    continue;
439                }
440                $lastSpace = true;
441            } else {
442                $lastSpace = false;
443            }
444
445            $token .= $this->str[$this->last];
446            if (($this->last + 1 === $this->len || Context::isSeparator($this->str[$this->last + 1]))
447                && $flags = Context::isKeyword($token)
448            ) {
449                $ret = new Token($token, Token::TYPE_KEYWORD, $flags);
450                $iEnd = $this->last;
451
452                // We don't break so we find longest keyword.
453                // For example, `OR` and `ORDER` have a common prefix `OR`.
454                // If we stopped at `OR`, the parsing would be invalid.
455            }
456        }
457
458        $this->last = $iEnd;
459
460        return $ret;
461    }
462
463    /**
464     * Parses a label.
465     *
466     * @return null|Token
467     */
468    public function parseLabel()
469    {
470        $token = '';
471
472        /**
473         * Value to be returned.
474         *
475         * @var Token
476         */
477        $ret = null;
478
479        /**
480         * The value of `$this->last` where `$token` ends in `$this->str`.
481         *
482         * @var int
483         */
484        $iEnd = $this->last;
485        for ($j = 1; $j < Context::LABEL_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
486            if ($this->str[$this->last] === ':' && $j > 1) {
487                // End of label
488                $token .= $this->str[$this->last];
489                $ret = new Token($token, Token::TYPE_LABEL);
490                $iEnd = $this->last;
491                break;
492            } elseif (Context::isWhitespace($this->str[$this->last]) && $j > 1) {
493                // Whitespace between label and :
494                // The size of the keyword didn't increase.
495                --$j;
496            } elseif (Context::isSeparator($this->str[$this->last])) {
497                // Any other separator
498                break;
499            }
500            $token .= $this->str[$this->last];
501        }
502
503        $this->last = $iEnd;
504
505        return $ret;
506    }
507
508    /**
509     * Parses an operator.
510     *
511     * @return null|Token
512     */
513    public function parseOperator()
514    {
515        $token = '';
516
517        /**
518         * Value to be returned.
519         *
520         * @var Token
521         */
522        $ret = null;
523
524        /**
525         * The value of `$this->last` where `$token` ends in `$this->str`.
526         *
527         * @var int
528         */
529        $iEnd = $this->last;
530
531        for ($j = 1; $j < Context::OPERATOR_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
532            $token .= $this->str[$this->last];
533            if ($flags = Context::isOperator($token)) {
534                $ret = new Token($token, Token::TYPE_OPERATOR, $flags);
535                $iEnd = $this->last;
536            }
537        }
538
539        $this->last = $iEnd;
540
541        return $ret;
542    }
543
544    /**
545     * Parses a whitespace.
546     *
547     * @return null|Token
548     */
549    public function parseWhitespace()
550    {
551        $token = $this->str[$this->last];
552
553        if (! Context::isWhitespace($token)) {
554            return null;
555        }
556
557        while (++$this->last < $this->len && Context::isWhitespace($this->str[$this->last])) {
558            $token .= $this->str[$this->last];
559        }
560
561        --$this->last;
562
563        return new Token($token, Token::TYPE_WHITESPACE);
564    }
565
566    /**
567     * Parses a comment.
568     *
569     * @return null|Token
570     */
571    public function parseComment()
572    {
573        $iBak = $this->last;
574        $token = $this->str[$this->last];
575
576        // Bash style comments. (#comment\n)
577        if (Context::isComment($token)) {
578            while (++$this->last < $this->len
579                && $this->str[$this->last] !== "\n"
580            ) {
581                $token .= $this->str[$this->last];
582            }
583            // Include trailing \n as whitespace token
584            if ($this->last < $this->len) {
585                --$this->last;
586            }
587
588            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_BASH);
589        }
590
591        // C style comments. (/*comment*\/)
592        if (++$this->last < $this->len) {
593            $token .= $this->str[$this->last];
594            if (Context::isComment($token)) {
595                // There might be a conflict with "*" operator here, when string is "*/*".
596                // This can occurs in the following statements:
597                // - "SELECT */* comment */ FROM ..."
598                // - "SELECT 2*/* comment */3 AS `six`;"
599                $next = $this->last+1;
600                if (($next < $this->len) && $this->str[$next] === '*') {
601                    // Conflict in "*/*": first "*" was not for ending a comment.
602                    // Stop here and let other parsing method define the true behavior of that first star.
603                    $this->last = $iBak;
604
605                    return null;
606                }
607
608                $flags = Token::FLAG_COMMENT_C;
609
610                // This comment already ended. It may be a part of a
611                // previous MySQL specific command.
612                if ($token === '*/') {
613                    return new Token($token, Token::TYPE_COMMENT, $flags);
614                }
615
616                // Checking if this is a MySQL-specific command.
617                if ($this->last + 1 < $this->len
618                    && $this->str[$this->last + 1] === '!'
619                ) {
620                    $flags |= Token::FLAG_COMMENT_MYSQL_CMD;
621                    $token .= $this->str[++$this->last];
622
623                    while (++$this->last < $this->len
624                        && $this->str[$this->last] >= '0'
625                        && $this->str[$this->last] <= '9'
626                    ) {
627                        $token .= $this->str[$this->last];
628                    }
629                    --$this->last;
630
631                    // We split this comment and parse only its beginning
632                    // here.
633                    return new Token($token, Token::TYPE_COMMENT, $flags);
634                }
635
636                // Parsing the comment.
637                while (++$this->last < $this->len
638                    && (
639                        $this->str[$this->last - 1] !== '*'
640                        || $this->str[$this->last] !== '/'
641                    )
642                ) {
643                    $token .= $this->str[$this->last];
644                }
645
646                // Adding the ending.
647                if ($this->last < $this->len) {
648                    $token .= $this->str[$this->last];
649                }
650
651                return new Token($token, Token::TYPE_COMMENT, $flags);
652            }
653        }
654
655        // SQL style comments. (-- comment\n)
656        if (++$this->last < $this->len) {
657            $token .= $this->str[$this->last];
658            $end = false;
659        } else {
660            --$this->last;
661            $end = true;
662        }
663        if (Context::isComment($token, $end)) {
664            // Checking if this comment did not end already (```--\n```).
665            if ($this->str[$this->last] !== "\n") {
666                while (++$this->last < $this->len
667                    && $this->str[$this->last] !== "\n"
668                ) {
669                    $token .= $this->str[$this->last];
670                }
671            }
672            // Include trailing \n as whitespace token
673            if ($this->last < $this->len) {
674                --$this->last;
675            }
676
677            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL);
678        }
679
680        $this->last = $iBak;
681
682        return null;
683    }
684
685    /**
686     * Parses a boolean.
687     *
688     * @return null|Token
689     */
690    public function parseBool()
691    {
692        if ($this->last + 3 >= $this->len) {
693            // At least `min(strlen('TRUE'), strlen('FALSE'))` characters are
694            // required.
695            return null;
696        }
697
698        $iBak = $this->last;
699        $token = $this->str[$this->last] . $this->str[++$this->last]
700        . $this->str[++$this->last] . $this->str[++$this->last]; // _TRUE_ or _FALS_e
701
702        if (Context::isBool($token)) {
703            return new Token($token, Token::TYPE_BOOL);
704        } elseif (++$this->last < $this->len) {
705            $token .= $this->str[$this->last]; // fals_E_
706            if (Context::isBool($token)) {
707                return new Token($token, Token::TYPE_BOOL, 1);
708            }
709        }
710
711        $this->last = $iBak;
712
713        return null;
714    }
715
716    /**
717     * Parses a number.
718     *
719     * @return null|Token
720     */
721    public function parseNumber()
722    {
723        // A rudimentary state machine is being used to parse numbers due to
724        // the various forms of their notation.
725        //
726        // Below are the states of the machines and the conditions to change
727        // the state.
728        //
729        //      1 --------------------[ + or - ]-------------------> 1
730        //      1 -------------------[ 0x or 0X ]------------------> 2
731        //      1 --------------------[ 0 to 9 ]-------------------> 3
732        //      1 -----------------------[ . ]---------------------> 4
733        //      1 -----------------------[ b ]---------------------> 7
734        //
735        //      2 --------------------[ 0 to F ]-------------------> 2
736        //
737        //      3 --------------------[ 0 to 9 ]-------------------> 3
738        //      3 -----------------------[ . ]---------------------> 4
739        //      3 --------------------[ e or E ]-------------------> 5
740        //
741        //      4 --------------------[ 0 to 9 ]-------------------> 4
742        //      4 --------------------[ e or E ]-------------------> 5
743        //
744        //      5 ---------------[ + or - or 0 to 9 ]--------------> 6
745        //
746        //      7 -----------------------[ ' ]---------------------> 8
747        //
748        //      8 --------------------[ 0 or 1 ]-------------------> 8
749        //      8 -----------------------[ ' ]---------------------> 9
750        //
751        // State 1 may be reached by negative numbers.
752        // State 2 is reached only by hex numbers.
753        // State 4 is reached only by float numbers.
754        // State 5 is reached only by numbers in approximate form.
755        // State 7 is reached only by numbers in bit representation.
756        //
757        // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a
758        // state other than these is invalid.
759        // Also, negative states are invalid states.
760        $iBak = $this->last;
761        $token = '';
762        $flags = 0;
763        $state = 1;
764        for (; $this->last < $this->len; ++$this->last) {
765            if ($state === 1) {
766                if ($this->str[$this->last] === '-') {
767                    $flags |= Token::FLAG_NUMBER_NEGATIVE;
768                } elseif ($this->last + 1 < $this->len
769                    && $this->str[$this->last] === '0'
770                    && (
771                        $this->str[$this->last + 1] === 'x'
772                        || $this->str[$this->last + 1] === 'X'
773                    )
774                ) {
775                    $token .= $this->str[$this->last++];
776                    $state = 2;
777                } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') {
778                    $state = 3;
779                } elseif ($this->str[$this->last] === '.') {
780                    $state = 4;
781                } elseif ($this->str[$this->last] === 'b') {
782                    $state = 7;
783                } elseif ($this->str[$this->last] !== '+') {
784                    // `+` is a valid character in a number.
785                    break;
786                }
787            } elseif ($state === 2) {
788                $flags |= Token::FLAG_NUMBER_HEX;
789                if (! (
790                        ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
791                        || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F')
792                        || ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f')
793                    )
794                ) {
795                    break;
796                }
797            } elseif ($state === 3) {
798                if ($this->str[$this->last] === '.') {
799                    $state = 4;
800                } elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
801                    $state = 5;
802                } elseif (($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
803                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')) {
804                    // A number can't be directly followed by a letter
805                    $state = -$state;
806                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
807                    // Just digits and `.`, `e` and `E` are valid characters.
808                    break;
809                }
810            } elseif ($state === 4) {
811                $flags |= Token::FLAG_NUMBER_FLOAT;
812                if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
813                    $state = 5;
814                } elseif (($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
815                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')) {
816                    // A number can't be directly followed by a letter
817                    $state = -$state;
818                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
819                    // Just digits, `e` and `E` are valid characters.
820                    break;
821                }
822            } elseif ($state === 5) {
823                $flags |= Token::FLAG_NUMBER_APPROXIMATE;
824                if ($this->str[$this->last] === '+' || $this->str[$this->last] === '-'
825                    || ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
826                ) {
827                    $state = 6;
828                } elseif (($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
829                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')) {
830                    // A number can't be directly followed by a letter
831                    $state = -$state;
832                } else {
833                    break;
834                }
835            } elseif ($state === 6) {
836                if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
837                    // Just digits are valid characters.
838                    break;
839                }
840            } elseif ($state === 7) {
841                $flags |= Token::FLAG_NUMBER_BINARY;
842                if ($this->str[$this->last] === '\'') {
843                    $state = 8;
844                } else {
845                    break;
846                }
847            } elseif ($state === 8) {
848                if ($this->str[$this->last] === '\'') {
849                    $state = 9;
850                } elseif ($this->str[$this->last] !== '0'
851                    && $this->str[$this->last] !== '1'
852                ) {
853                    break;
854                }
855            } elseif ($state === 9) {
856                break;
857            }
858            $token .= $this->str[$this->last];
859        }
860        if ($state === 2 || $state === 3
861            || ($token !== '.' && $state === 4)
862            || $state === 6 || $state === 9
863        ) {
864            --$this->last;
865
866            return new Token($token, Token::TYPE_NUMBER, $flags);
867        }
868        $this->last = $iBak;
869
870        return null;
871    }
872
873    /**
874     * Parses a string.
875     *
876     * @param string $quote additional starting symbol
877     *
878     * @return null|Token
879     * @throws LexerException
880     */
881    public function parseString($quote = '')
882    {
883        $token = $this->str[$this->last];
884        if (! ($flags = Context::isString($token)) && $token !== $quote) {
885            return null;
886        }
887        $quote = $token;
888
889        while (++$this->last < $this->len) {
890            if ($this->last + 1 < $this->len
891                && (
892                    ($this->str[$this->last] === $quote && $this->str[$this->last + 1] === $quote)
893                    || ($this->str[$this->last] === '\\' && $quote !== '`')
894                )
895            ) {
896                $token .= $this->str[$this->last] . $this->str[++$this->last];
897            } else {
898                if ($this->str[$this->last] === $quote) {
899                    break;
900                }
901                $token .= $this->str[$this->last];
902            }
903        }
904
905        if ($this->last >= $this->len || $this->str[$this->last] !== $quote) {
906            $this->error(
907                sprintf(
908                    Translator::gettext('Ending quote %1$s was expected.'),
909                    $quote
910                ),
911                '',
912                $this->last
913            );
914        } else {
915            $token .= $this->str[$this->last];
916        }
917
918        return new Token($token, Token::TYPE_STRING, $flags);
919    }
920
921    /**
922     * Parses a symbol.
923     *
924     * @return null|Token
925     * @throws LexerException
926     */
927    public function parseSymbol()
928    {
929        $token = $this->str[$this->last];
930        if (! ($flags = Context::isSymbol($token))) {
931            return null;
932        }
933
934        if ($flags & Token::FLAG_SYMBOL_VARIABLE) {
935            if ($this->last + 1 < $this->len && $this->str[++$this->last] === '@') {
936                // This is a system variable (e.g. `@@hostname`).
937                $token .= $this->str[$this->last++];
938                $flags |= Token::FLAG_SYMBOL_SYSTEM;
939            }
940        } elseif ($flags & Token::FLAG_SYMBOL_PARAMETER) {
941            if ($token !== '?' && $this->last + 1 < $this->len) {
942                ++$this->last;
943            }
944        } else {
945            $token = '';
946        }
947
948        $str = null;
949
950        if ($this->last < $this->len) {
951            if (($str = $this->parseString('`')) === null) {
952                if (($str = $this->parseUnknown()) === null) {
953                    $this->error(
954                        'Variable name was expected.',
955                        $this->str[$this->last],
956                        $this->last
957                    );
958                }
959            }
960        }
961
962        if ($str !== null) {
963            $token .= $str->token;
964        }
965
966        return new Token($token, Token::TYPE_SYMBOL, $flags);
967    }
968
969    /**
970     * Parses unknown parts of the query.
971     *
972     * @return null|Token
973     */
974    public function parseUnknown()
975    {
976        $token = $this->str[$this->last];
977        if (Context::isSeparator($token)) {
978            return null;
979        }
980
981        while (++$this->last < $this->len && ! Context::isSeparator($this->str[$this->last])) {
982            $token .= $this->str[$this->last];
983
984            // Test if end of token equals the current delimiter. If so, remove it from the token.
985            if (substr($token, -$this->delimiterLen) === $this->delimiter) {
986                $token = substr($token, 0, -$this->delimiterLen);
987                $this->last -= $this->delimiterLen - 1;
988                break;
989            }
990        }
991
992        --$this->last;
993
994        return new Token($token);
995    }
996
997    /**
998     * Parses the delimiter of the query.
999     *
1000     * @return null|Token
1001     */
1002    public function parseDelimiter()
1003    {
1004        $idx = 0;
1005
1006        while ($idx < $this->delimiterLen && $this->last + $idx < $this->len) {
1007            if ($this->delimiter[$idx] !== $this->str[$this->last + $idx]) {
1008                return null;
1009            }
1010            ++$idx;
1011        }
1012
1013        $this->last += $this->delimiterLen - 1;
1014
1015        return new Token($this->delimiter, Token::TYPE_DELIMITER);
1016    }
1017}
1018