1<?php 2 3declare(strict_types=1); 4 5/* 6 * This file is part of the TYPO3 CMS project. 7 * 8 * It is free software; you can redistribute it and/or modify it under 9 * the terms of the GNU General Public License, either version 2 10 * of the License, or any later version. 11 * 12 * For the full copyright and license information, please read the 13 * LICENSE.txt file that was distributed with this source code. 14 * 15 * The TYPO3 project - inspiring people to share! 16 */ 17 18namespace TYPO3\CMS\Core\Database\Schema\Parser; 19 20use Doctrine\Common\Lexer\AbstractLexer; 21 22/** 23 * Scans a MySQL CREATE TABLE statement for tokens. 24 */ 25class Lexer extends AbstractLexer 26{ 27 // All tokens that are not valid identifiers must be < 100 28 const T_NONE = 1; 29 const T_STRING = 2; 30 const T_INPUT_PARAMETER = 3; 31 const T_CLOSE_PARENTHESIS = 4; 32 const T_OPEN_PARENTHESIS = 5; 33 const T_COMMA = 6; 34 const T_DIVIDE = 7; 35 const T_DOT = 8; 36 const T_EQUALS = 9; 37 const T_GREATER_THAN = 10; 38 const T_LOWER_THAN = 11; 39 const T_MINUS = 12; 40 const T_MULTIPLY = 13; 41 const T_NEGATE = 14; 42 const T_PLUS = 15; 43 const T_OPEN_CURLY_BRACE = 16; 44 const T_CLOSE_CURLY_BRACE = 17; 45 const T_SEMICOLON = 18; 46 47 // All tokens that are identifiers or keywords that could be considered as identifiers should be >= 100 48 const T_IDENTIFIER = 100; 49 50 // All tokens that could be considered as a data type should be >= 200 51 const T_BIT = 201; 52 const T_TINYINT = 202; 53 const T_SMALLINT = 203; 54 const T_MEDIUMINT = 204; 55 const T_INT = 205; 56 const T_INTEGER = 206; 57 const T_BIGINT = 207; 58 const T_REAL = 208; 59 const T_DOUBLE = 209; 60 const T_FLOAT = 210; 61 const T_DECIMAL = 211; 62 const T_NUMERIC = 212; 63 const T_DATE = 213; 64 const T_TIME = 214; 65 const T_TIMESTAMP = 215; 66 const T_DATETIME = 216; 67 const T_YEAR = 217; 68 const T_CHAR = 218; 69 const T_VARCHAR = 219; 70 const T_BINARY = 220; 71 const T_VARBINARY = 221; 72 const T_TINYBLOB = 222; 73 const T_BLOB = 223; 74 const T_MEDIUMBLOB = 224; 75 const T_LONGBLOB = 225; 76 const T_TINYTEXT = 226; 77 const T_TEXT = 227; 78 const T_MEDIUMTEXT = 228; 79 const T_LONGTEXT = 229; 80 const T_ENUM = 230; 81 const T_SET = 231; 82 const T_JSON = 232; 83 84 // All keyword tokens should be >= 300 85 const T_CREATE = 300; 86 const T_TEMPORARY = 301; 87 const T_TABLE = 302; 88 const T_IF = 303; 89 const T_NOT = 304; 90 const T_EXISTS = 305; 91 const T_CONSTRAINT = 306; 92 const T_INDEX = 307; 93 const T_KEY = 308; 94 const T_FULLTEXT = 309; 95 const T_SPATIAL = 310; 96 const T_PRIMARY = 311; 97 const T_UNIQUE = 312; 98 const T_CHECK = 313; 99 const T_DEFAULT = 314; 100 const T_AUTO_INCREMENT = 315; 101 const T_COMMENT = 316; 102 const T_COLUMN_FORMAT = 317; 103 const T_STORAGE = 318; 104 const T_REFERENCES = 319; 105 const T_NULL = 320; 106 const T_FIXED = 321; 107 const T_DYNAMIC = 322; 108 const T_MEMORY = 323; 109 const T_DISK = 324; 110 const T_UNSIGNED = 325; 111 const T_ZEROFILL = 326; 112 const T_CURRENT_TIMESTAMP = 327; 113 const T_CHARACTER = 328; 114 const T_COLLATE = 329; 115 const T_ASC = 330; 116 const T_DESC = 331; 117 const T_MATCH = 332; 118 const T_FULL = 333; 119 const T_PARTIAL = 334; 120 const T_SIMPLE = 335; 121 const T_ON = 336; 122 const T_UPDATE = 337; 123 const T_DELETE = 338; 124 const T_RESTRICT = 339; 125 const T_CASCADE = 340; 126 const T_NO = 341; 127 const T_ACTION = 342; 128 const T_USING = 343; 129 const T_BTREE = 344; 130 const T_HASH = 345; 131 const T_KEY_BLOCK_SIZE = 346; 132 const T_WITH = 347; 133 const T_PARSER = 348; 134 const T_FOREIGN = 349; 135 const T_ENGINE = 350; 136 const T_AVG_ROW_LENGTH = 351; 137 const T_CHECKSUM = 352; 138 const T_COMPRESSION = 353; 139 const T_CONNECTION = 354; 140 const T_DATA = 355; 141 const T_DIRECTORY = 356; 142 const T_DELAY_KEY_WRITE = 357; 143 const T_ENCRYPTION = 358; 144 const T_INSERT_METHOD = 359; 145 const T_MAX_ROWS = 360; 146 const T_MIN_ROWS = 361; 147 const T_PACK_KEYS = 362; 148 const T_PASSWORD = 363; 149 const T_ROW_FORMAT = 364; 150 const T_STATS_AUTO_RECALC = 365; 151 const T_STATS_PERSISTENT = 366; 152 const T_STATS_SAMPLE_PAGES = 367; 153 const T_TABLESPACE = 368; 154 const T_UNION = 369; 155 const T_PRECISION = 370; 156 157 /** 158 * Creates a new statement scanner object. 159 * 160 * @param string $input A statement string. 161 */ 162 public function __construct($input) 163 { 164 $this->setInput($input); 165 } 166 167 /** 168 * Lexical catchable patterns. 169 * 170 * @return array 171 */ 172 protected function getCatchablePatterns(): array 173 { 174 return [ 175 '(?:-?[0-9]+(?:[\.][0-9]+)*)(?:e[+-]?[0-9]+)?', // numbers 176 '`(?:[^`]|``)*`', // quoted identifiers 177 "'(?:[^']|'')*'", // quoted strings 178 '\)', // closing parenthesis 179 '[a-z0-9$_][\w$]*', // unquoted identifiers 180 ]; 181 } 182 183 /** 184 * Lexical non-catchable patterns. 185 * 186 * @return array 187 */ 188 protected function getNonCatchablePatterns(): array 189 { 190 return ['\s+']; 191 } 192 193 /** 194 * Retrieve token type. Also processes the token value if necessary. 195 * 196 * @param string $value 197 * @return int 198 */ 199 protected function getType(&$value): int 200 { 201 $type = self::T_NONE; 202 203 // Recognize numeric values 204 if (is_numeric($value)) { 205 if (str_contains($value, '.') || stripos($value, 'e') !== false) { 206 return self::T_FLOAT; 207 } 208 209 return self::T_INTEGER; 210 } 211 212 // Recognize quoted strings 213 if ($value[0] === "'") { 214 $value = str_replace("''", "'", substr($value, 1, -1)); 215 216 return self::T_STRING; 217 } 218 219 // Recognize quoted strings 220 if ($value[0] === '`') { 221 $value = str_replace('``', '`', substr($value, 1, -1)); 222 223 return self::T_IDENTIFIER; 224 } 225 226 // Recognize identifiers, aliased or qualified names 227 if (ctype_alpha($value[0])) { 228 $name = 'TYPO3\\CMS\\Core\\Database\\Schema\\Parser\\Lexer::T_' . strtoupper($value); 229 230 if (defined($name)) { 231 $type = constant($name); 232 233 if ($type > 100) { 234 return $type; 235 } 236 } 237 238 return self::T_STRING; 239 } 240 241 switch ($value) { 242 // Recognize symbols 243 case '.': 244 return self::T_DOT; 245 case ';': 246 return self::T_SEMICOLON; 247 case ',': 248 return self::T_COMMA; 249 case '(': 250 return self::T_OPEN_PARENTHESIS; 251 case ')': 252 return self::T_CLOSE_PARENTHESIS; 253 case '=': 254 return self::T_EQUALS; 255 case '>': 256 return self::T_GREATER_THAN; 257 case '<': 258 return self::T_LOWER_THAN; 259 case '+': 260 return self::T_PLUS; 261 case '-': 262 return self::T_MINUS; 263 case '*': 264 return self::T_MULTIPLY; 265 case '/': 266 return self::T_DIVIDE; 267 case '!': 268 return self::T_NEGATE; 269 case '{': 270 return self::T_OPEN_CURLY_BRACE; 271 case '}': 272 return self::T_CLOSE_CURLY_BRACE; 273 // Default 274 default: 275 // Do nothing 276 } 277 278 return $type; 279 } 280} 281