1<?php
2declare(strict_types = 1);
3
4namespace TYPO3\CMS\Core\Database\Schema\Parser;
5
6/*
7 * This file is part of the TYPO3 CMS project.
8 *
9 * It is free software; you can redistribute it and/or modify it under
10 * the terms of the GNU General Public License, either version 2
11 * of the License, or any later version.
12 *
13 * For the full copyright and license information, please read the
14 * LICENSE.txt file that was distributed with this source code.
15 *
16 * The TYPO3 project - inspiring people to share!
17 */
18
19/**
20 * Scans a MySQL CREATE TABLE statement for tokens.
21 */
22class Lexer extends \Doctrine\Common\Lexer\AbstractLexer
23{
24    // All tokens that are not valid identifiers must be < 100
25    const T_NONE = 1;
26    const T_STRING = 2;
27    const T_INPUT_PARAMETER = 3;
28    const T_CLOSE_PARENTHESIS = 4;
29    const T_OPEN_PARENTHESIS = 5;
30    const T_COMMA = 6;
31    const T_DIVIDE = 7;
32    const T_DOT = 8;
33    const T_EQUALS = 9;
34    const T_GREATER_THAN = 10;
35    const T_LOWER_THAN = 11;
36    const T_MINUS = 12;
37    const T_MULTIPLY = 13;
38    const T_NEGATE = 14;
39    const T_PLUS = 15;
40    const T_OPEN_CURLY_BRACE = 16;
41    const T_CLOSE_CURLY_BRACE = 17;
42    const T_SEMICOLON = 18;
43
44    // All tokens that are identifiers or keywords that could be considered as identifiers should be >= 100
45    const T_IDENTIFIER = 100;
46
47    // All tokens that could be considered as a data type should be >= 200
48    const T_BIT = 201;
49    const T_TINYINT = 202;
50    const T_SMALLINT = 203;
51    const T_MEDIUMINT = 204;
52    const T_INT = 205;
53    const T_INTEGER = 206;
54    const T_BIGINT = 207;
55    const T_REAL = 208;
56    const T_DOUBLE = 209;
57    const T_FLOAT = 210;
58    const T_DECIMAL = 211;
59    const T_NUMERIC = 212;
60    const T_DATE = 213;
61    const T_TIME = 214;
62    const T_TIMESTAMP = 215;
63    const T_DATETIME = 216;
64    const T_YEAR = 217;
65    const T_CHAR = 218;
66    const T_VARCHAR = 219;
67    const T_BINARY = 220;
68    const T_VARBINARY = 221;
69    const T_TINYBLOB = 222;
70    const T_BLOB = 223;
71    const T_MEDIUMBLOB = 224;
72    const T_LONGBLOB = 225;
73    const T_TINYTEXT = 226;
74    const T_TEXT = 227;
75    const T_MEDIUMTEXT = 228;
76    const T_LONGTEXT = 229;
77    const T_ENUM = 230;
78    const T_SET = 231;
79    const T_JSON = 232;
80
81    // All keyword tokens should be >= 300
82    const T_CREATE = 300;
83    const T_TEMPORARY = 301;
84    const T_TABLE = 302;
85    const T_IF = 303;
86    const T_NOT = 304;
87    const T_EXISTS = 305;
88    const T_CONSTRAINT = 306;
89    const T_INDEX = 307;
90    const T_KEY = 308;
91    const T_FULLTEXT = 309;
92    const T_SPATIAL = 310;
93    const T_PRIMARY = 311;
94    const T_UNIQUE = 312;
95    const T_CHECK = 313;
96    const T_DEFAULT = 314;
97    const T_AUTO_INCREMENT = 315;
98    const T_COMMENT = 316;
99    const T_COLUMN_FORMAT = 317;
100    const T_STORAGE = 318;
101    const T_REFERENCES = 319;
102    const T_NULL = 320;
103    const T_FIXED = 321;
104    const T_DYNAMIC = 322;
105    const T_MEMORY = 323;
106    const T_DISK = 324;
107    const T_UNSIGNED = 325;
108    const T_ZEROFILL = 326;
109    const T_CURRENT_TIMESTAMP = 327;
110    const T_CHARACTER = 328;
111    const T_COLLATE = 329;
112    const T_ASC = 330;
113    const T_DESC = 331;
114    const T_MATCH = 332;
115    const T_FULL = 333;
116    const T_PARTIAL = 334;
117    const T_SIMPLE = 335;
118    const T_ON = 336;
119    const T_UPDATE = 337;
120    const T_DELETE = 338;
121    const T_RESTRICT = 339;
122    const T_CASCADE = 340;
123    const T_NO = 341;
124    const T_ACTION = 342;
125    const T_USING = 343;
126    const T_BTREE = 344;
127    const T_HASH = 345;
128    const T_KEY_BLOCK_SIZE = 346;
129    const T_WITH = 347;
130    const T_PARSER = 348;
131    const T_FOREIGN = 349;
132    const T_ENGINE = 350;
133    const T_AVG_ROW_LENGTH = 351;
134    const T_CHECKSUM = 352;
135    const T_COMPRESSION = 353;
136    const T_CONNECTION = 354;
137    const T_DATA = 355;
138    const T_DIRECTORY = 356;
139    const T_DELAY_KEY_WRITE = 357;
140    const T_ENCRYPTION = 358;
141    const T_INSERT_METHOD = 359;
142    const T_MAX_ROWS = 360;
143    const T_MIN_ROWS = 361;
144    const T_PACK_KEYS = 362;
145    const T_PASSWORD = 363;
146    const T_ROW_FORMAT = 364;
147    const T_STATS_AUTO_RECALC = 365;
148    const T_STATS_PERSISTENT = 366;
149    const T_STATS_SAMPLE_PAGES = 367;
150    const T_TABLESPACE = 368;
151    const T_UNION = 369;
152    const T_PRECISION = 370;
153
154    /**
155     * Creates a new statement scanner object.
156     *
157     * @param string $input A statement string.
158     */
159    public function __construct($input)
160    {
161        $this->setInput($input);
162    }
163
164    /**
165     * Lexical catchable patterns.
166     *
167     * @return array
168     */
169    protected function getCatchablePatterns(): array
170    {
171        return [
172            '(?:-?[0-9]+(?:[\.][0-9]+)*)(?:e[+-]?[0-9]+)?', // numbers
173            '`(?:[^`]|``)*`', // quoted identifiers
174            "'(?:[^']|'')*'", // quoted strings
175            '\)', // closing parenthesis
176            '[a-z0-9$_][\w$]*', // unquoted identifiers
177        ];
178    }
179
180    /**
181     * Lexical non-catchable patterns.
182     *
183     * @return array
184     */
185    protected function getNonCatchablePatterns(): array
186    {
187        return ['\s+'];
188    }
189
190    /**
191     * Retrieve token type. Also processes the token value if necessary.
192     *
193     * @param string $value
194     * @return int
195     */
196    protected function getType(&$value): int
197    {
198        $type = self::T_NONE;
199
200        // Recognize numeric values
201        if (is_numeric($value)) {
202            if (strpos($value, '.') !== false || stripos($value, 'e') !== false) {
203                return self::T_FLOAT;
204            }
205
206            return self::T_INTEGER;
207        }
208
209        // Recognize quoted strings
210        if ($value[0] === "'") {
211            $value = str_replace("''", "'", substr($value, 1, -1));
212
213            return self::T_STRING;
214        }
215
216        // Recognize quoted strings
217        if ($value[0] === '`') {
218            $value = str_replace('``', '`', substr($value, 1, -1));
219
220            return self::T_IDENTIFIER;
221        }
222
223        // Recognize identifiers, aliased or qualified names
224        if (ctype_alpha($value[0])) {
225            $name = 'TYPO3\\CMS\\Core\\Database\\Schema\\Parser\\Lexer::T_' . strtoupper($value);
226
227            if (defined($name)) {
228                $type = constant($name);
229
230                if ($type > 100) {
231                    return $type;
232                }
233            }
234
235            return self::T_STRING;
236        }
237
238        switch ($value) {
239            // Recognize symbols
240            case '.':
241                return self::T_DOT;
242            case ';':
243                return self::T_SEMICOLON;
244            case ',':
245                return self::T_COMMA;
246            case '(':
247                return self::T_OPEN_PARENTHESIS;
248            case ')':
249                return self::T_CLOSE_PARENTHESIS;
250            case '=':
251                return self::T_EQUALS;
252            case '>':
253                return self::T_GREATER_THAN;
254            case '<':
255                return self::T_LOWER_THAN;
256            case '+':
257                return self::T_PLUS;
258            case '-':
259                return self::T_MINUS;
260            case '*':
261                return self::T_MULTIPLY;
262            case '/':
263                return self::T_DIVIDE;
264            case '!':
265                return self::T_NEGATE;
266            case '{':
267                return self::T_OPEN_CURLY_BRACE;
268            case '}':
269                return self::T_CLOSE_CURLY_BRACE;
270            // Default
271            default:
272                // Do nothing
273        }
274
275        return $type;
276    }
277}
278