1<?php
2
3declare(strict_types=1);
4
5/*
6 * This file is part of the TYPO3 CMS project.
7 *
8 * It is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License, either version 2
10 * of the License, or any later version.
11 *
12 * For the full copyright and license information, please read the
13 * LICENSE.txt file that was distributed with this source code.
14 *
15 * The TYPO3 project - inspiring people to share!
16 */
17
18namespace TYPO3\CMS\Core\Database\Schema\Parser;
19
20use Doctrine\Common\Lexer\AbstractLexer;
21
22/**
23 * Scans a MySQL CREATE TABLE statement for tokens.
24 */
25class Lexer extends AbstractLexer
26{
27    // All tokens that are not valid identifiers must be < 100
28    const T_NONE = 1;
29    const T_STRING = 2;
30    const T_INPUT_PARAMETER = 3;
31    const T_CLOSE_PARENTHESIS = 4;
32    const T_OPEN_PARENTHESIS = 5;
33    const T_COMMA = 6;
34    const T_DIVIDE = 7;
35    const T_DOT = 8;
36    const T_EQUALS = 9;
37    const T_GREATER_THAN = 10;
38    const T_LOWER_THAN = 11;
39    const T_MINUS = 12;
40    const T_MULTIPLY = 13;
41    const T_NEGATE = 14;
42    const T_PLUS = 15;
43    const T_OPEN_CURLY_BRACE = 16;
44    const T_CLOSE_CURLY_BRACE = 17;
45    const T_SEMICOLON = 18;
46
47    // All tokens that are identifiers or keywords that could be considered as identifiers should be >= 100
48    const T_IDENTIFIER = 100;
49
50    // All tokens that could be considered as a data type should be >= 200
51    const T_BIT = 201;
52    const T_TINYINT = 202;
53    const T_SMALLINT = 203;
54    const T_MEDIUMINT = 204;
55    const T_INT = 205;
56    const T_INTEGER = 206;
57    const T_BIGINT = 207;
58    const T_REAL = 208;
59    const T_DOUBLE = 209;
60    const T_FLOAT = 210;
61    const T_DECIMAL = 211;
62    const T_NUMERIC = 212;
63    const T_DATE = 213;
64    const T_TIME = 214;
65    const T_TIMESTAMP = 215;
66    const T_DATETIME = 216;
67    const T_YEAR = 217;
68    const T_CHAR = 218;
69    const T_VARCHAR = 219;
70    const T_BINARY = 220;
71    const T_VARBINARY = 221;
72    const T_TINYBLOB = 222;
73    const T_BLOB = 223;
74    const T_MEDIUMBLOB = 224;
75    const T_LONGBLOB = 225;
76    const T_TINYTEXT = 226;
77    const T_TEXT = 227;
78    const T_MEDIUMTEXT = 228;
79    const T_LONGTEXT = 229;
80    const T_ENUM = 230;
81    const T_SET = 231;
82    const T_JSON = 232;
83
84    // All keyword tokens should be >= 300
85    const T_CREATE = 300;
86    const T_TEMPORARY = 301;
87    const T_TABLE = 302;
88    const T_IF = 303;
89    const T_NOT = 304;
90    const T_EXISTS = 305;
91    const T_CONSTRAINT = 306;
92    const T_INDEX = 307;
93    const T_KEY = 308;
94    const T_FULLTEXT = 309;
95    const T_SPATIAL = 310;
96    const T_PRIMARY = 311;
97    const T_UNIQUE = 312;
98    const T_CHECK = 313;
99    const T_DEFAULT = 314;
100    const T_AUTO_INCREMENT = 315;
101    const T_COMMENT = 316;
102    const T_COLUMN_FORMAT = 317;
103    const T_STORAGE = 318;
104    const T_REFERENCES = 319;
105    const T_NULL = 320;
106    const T_FIXED = 321;
107    const T_DYNAMIC = 322;
108    const T_MEMORY = 323;
109    const T_DISK = 324;
110    const T_UNSIGNED = 325;
111    const T_ZEROFILL = 326;
112    const T_CURRENT_TIMESTAMP = 327;
113    const T_CHARACTER = 328;
114    const T_COLLATE = 329;
115    const T_ASC = 330;
116    const T_DESC = 331;
117    const T_MATCH = 332;
118    const T_FULL = 333;
119    const T_PARTIAL = 334;
120    const T_SIMPLE = 335;
121    const T_ON = 336;
122    const T_UPDATE = 337;
123    const T_DELETE = 338;
124    const T_RESTRICT = 339;
125    const T_CASCADE = 340;
126    const T_NO = 341;
127    const T_ACTION = 342;
128    const T_USING = 343;
129    const T_BTREE = 344;
130    const T_HASH = 345;
131    const T_KEY_BLOCK_SIZE = 346;
132    const T_WITH = 347;
133    const T_PARSER = 348;
134    const T_FOREIGN = 349;
135    const T_ENGINE = 350;
136    const T_AVG_ROW_LENGTH = 351;
137    const T_CHECKSUM = 352;
138    const T_COMPRESSION = 353;
139    const T_CONNECTION = 354;
140    const T_DATA = 355;
141    const T_DIRECTORY = 356;
142    const T_DELAY_KEY_WRITE = 357;
143    const T_ENCRYPTION = 358;
144    const T_INSERT_METHOD = 359;
145    const T_MAX_ROWS = 360;
146    const T_MIN_ROWS = 361;
147    const T_PACK_KEYS = 362;
148    const T_PASSWORD = 363;
149    const T_ROW_FORMAT = 364;
150    const T_STATS_AUTO_RECALC = 365;
151    const T_STATS_PERSISTENT = 366;
152    const T_STATS_SAMPLE_PAGES = 367;
153    const T_TABLESPACE = 368;
154    const T_UNION = 369;
155    const T_PRECISION = 370;
156
157    /**
158     * Creates a new statement scanner object.
159     *
160     * @param string $input A statement string.
161     */
162    public function __construct($input)
163    {
164        $this->setInput($input);
165    }
166
167    /**
168     * Lexical catchable patterns.
169     *
170     * @return array
171     */
172    protected function getCatchablePatterns(): array
173    {
174        return [
175            '(?:-?[0-9]+(?:[\.][0-9]+)*)(?:e[+-]?[0-9]+)?', // numbers
176            '`(?:[^`]|``)*`', // quoted identifiers
177            "'(?:[^']|'')*'", // quoted strings
178            '\)', // closing parenthesis
179            '[a-z0-9$_][\w$]*', // unquoted identifiers
180        ];
181    }
182
183    /**
184     * Lexical non-catchable patterns.
185     *
186     * @return array
187     */
188    protected function getNonCatchablePatterns(): array
189    {
190        return ['\s+'];
191    }
192
193    /**
194     * Retrieve token type. Also processes the token value if necessary.
195     *
196     * @param string $value
197     * @return int
198     */
199    protected function getType(&$value): int
200    {
201        $type = self::T_NONE;
202
203        // Recognize numeric values
204        if (is_numeric($value)) {
205            if (strpos($value, '.') !== false || stripos($value, 'e') !== false) {
206                return self::T_FLOAT;
207            }
208
209            return self::T_INTEGER;
210        }
211
212        // Recognize quoted strings
213        if ($value[0] === "'") {
214            $value = str_replace("''", "'", substr($value, 1, -1));
215
216            return self::T_STRING;
217        }
218
219        // Recognize quoted strings
220        if ($value[0] === '`') {
221            $value = str_replace('``', '`', substr($value, 1, -1));
222
223            return self::T_IDENTIFIER;
224        }
225
226        // Recognize identifiers, aliased or qualified names
227        if (ctype_alpha($value[0])) {
228            $name = 'TYPO3\\CMS\\Core\\Database\\Schema\\Parser\\Lexer::T_' . strtoupper($value);
229
230            if (defined($name)) {
231                $type = constant($name);
232
233                if ($type > 100) {
234                    return $type;
235                }
236            }
237
238            return self::T_STRING;
239        }
240
241        switch ($value) {
242            // Recognize symbols
243            case '.':
244                return self::T_DOT;
245            case ';':
246                return self::T_SEMICOLON;
247            case ',':
248                return self::T_COMMA;
249            case '(':
250                return self::T_OPEN_PARENTHESIS;
251            case ')':
252                return self::T_CLOSE_PARENTHESIS;
253            case '=':
254                return self::T_EQUALS;
255            case '>':
256                return self::T_GREATER_THAN;
257            case '<':
258                return self::T_LOWER_THAN;
259            case '+':
260                return self::T_PLUS;
261            case '-':
262                return self::T_MINUS;
263            case '*':
264                return self::T_MULTIPLY;
265            case '/':
266                return self::T_DIVIDE;
267            case '!':
268                return self::T_NEGATE;
269            case '{':
270                return self::T_OPEN_CURLY_BRACE;
271            case '}':
272                return self::T_CLOSE_CURLY_BRACE;
273            // Default
274            default:
275                // Do nothing
276        }
277
278        return $type;
279    }
280}
281